diff --git a/exercises/06/.gitignore b/exercises/06/.gitignore new file mode 100644 index 0000000..ea8acb0 --- /dev/null +++ b/exercises/06/.gitignore @@ -0,0 +1,17 @@ +# Buildfiles +*.o +vslc/src/vslc +parser.c +scanner.c +y.tab.h + +# Submission +*.tar.xz + +# VSL treefiles +*.ast +*.sast +*.sym +*.bin +*.s +*.S \ No newline at end of file diff --git a/exercises/06/PS6.pdf b/exercises/06/PS6.pdf new file mode 100644 index 0000000..a0127c8 Binary files /dev/null and b/exercises/06/PS6.pdf differ diff --git a/exercises/06/PS6_recitation.pdf b/exercises/06/PS6_recitation.pdf new file mode 100644 index 0000000..89da84a Binary files /dev/null and b/exercises/06/PS6_recitation.pdf differ diff --git a/exercises/06/ps6_skeleton.tar.gz b/exercises/06/ps6_skeleton.tar.gz new file mode 100644 index 0000000..f3df610 Binary files /dev/null and b/exercises/06/ps6_skeleton.tar.gz differ diff --git a/exercises/06/vslc/Makefile b/exercises/06/vslc/Makefile new file mode 100644 index 0000000..511e561 --- /dev/null +++ b/exercises/06/vslc/Makefile @@ -0,0 +1,12 @@ +LEX=flex +YACC=bison +YFLAGS+=--defines=src/y.tab.h -o y.tab.c +CFLAGS+=-std=c99 -g -Isrc -Iinclude -D_POSIX_C_SOURCE=200809L -DYYSTYPE="node_t *" + +src/vslc: src/vslc.c src/parser.o src/scanner.o src/nodetypes.o src/tree.o src/ir.o src/generator.o src/tlhash.c +src/y.tab.h: src/parser.c +src/scanner.c: src/y.tab.h src/scanner.l +clean: + -rm -f src/parser.c src/scanner.c src/*.tab.* src/*.o +purge: clean + -rm -f src/vslc diff --git a/exercises/06/vslc/include/ir.h b/exercises/06/vslc/include/ir.h new file mode 100644 index 0000000..78150e1 --- /dev/null +++ b/exercises/06/vslc/include/ir.h @@ -0,0 +1,30 @@ +#ifndef IR_H +#define IR_H + +/* This is the tree node structure */ +typedef struct n { + node_index_t type; + void *data; + struct s *entry; + uint64_t n_children; + struct n **children; +} node_t; + +// Export the initializer function, it is needed by the parser +void node_init ( + node_t *nd, node_index_t type, void *data, uint64_t n_children, ... +); + +typedef enum { + SYM_GLOBAL_VAR, SYM_FUNCTION, SYM_PARAMETER, SYM_LOCAL_VAR +} symtype_t; + +typedef struct s { + char *name; + symtype_t type; + node_t *node; + size_t seq; + size_t nparms; + tlhash_t *locals; +} symbol_t; +#endif diff --git a/exercises/06/vslc/include/nodetypes.h b/exercises/06/vslc/include/nodetypes.h new file mode 100644 index 0000000..5d302cc --- /dev/null +++ b/exercises/06/vslc/include/nodetypes.h @@ -0,0 +1,37 @@ +#ifndef NODETYPES_H +#define NODETYPES_H +typedef enum { + PROGRAM, + GLOBAL_LIST, + GLOBAL, + STATEMENT_LIST, + PRINT_LIST, + EXPRESSION_LIST, + VARIABLE_LIST, + ARGUMENT_LIST, + PARAMETER_LIST, + DECLARATION_LIST, + FUNCTION, + STATEMENT, + BLOCK, + ASSIGNMENT_STATEMENT, + ADD_STATEMENT, + SUBTRACT_STATEMENT, + MULTIPLY_STATEMENT, + DIVIDE_STATEMENT, + RETURN_STATEMENT, + PRINT_STATEMENT, + NULL_STATEMENT, + IF_STATEMENT, + WHILE_STATEMENT, + EXPRESSION, + RELATION, + DECLARATION, + PRINT_ITEM, + IDENTIFIER_DATA, + NUMBER_DATA, + STRING_DATA +} node_index_t; + +extern char *node_string[26]; +#endif diff --git a/exercises/06/vslc/include/tlhash.h b/exercises/06/vslc/include/tlhash.h new file mode 100644 index 0000000..889e535 --- /dev/null +++ b/exercises/06/vslc/include/tlhash.h @@ -0,0 +1,28 @@ +#ifndef TLHASH_H +#define TLHASH_H +#include +typedef struct el { + void *key, *value; + size_t key_length; + struct el *next; +} tlhash_element_t; + +typedef struct { + size_t n_buckets, size; + tlhash_element_t **buckets; +} tlhash_t; + +int tlhash_init ( tlhash_t *tab, size_t n_buckets ); +int tlhash_finalize ( tlhash_t *tab ); +int tlhash_insert ( tlhash_t *tab, void *key, size_t keylen, void *val ); +int tlhash_lookup ( tlhash_t *tab, void *key, size_t keylen, void **val ); +int tlhash_remove ( tlhash_t *tab, void *key, size_t key_length ); +size_t tlhash_size ( tlhash_t *tab ); +void tlhash_keys ( tlhash_t *tab, void **keys ); +void tlhash_values ( tlhash_t *tab, void **values ); + +#define TLHASH_SUCCESS 0 /* Success */ +#define TLHASH_ENOMEM 1 /* No memory available */ +#define TLHASH_ENOENT 2 /* No such table entry */ +#define TLHASH_EEXIST 3 /* Table entry already exists */ +#endif diff --git a/exercises/06/vslc/include/vslc.h b/exercises/06/vslc/include/vslc.h new file mode 100644 index 0000000..1e4d9aa --- /dev/null +++ b/exercises/06/vslc/include/vslc.h @@ -0,0 +1,50 @@ +#ifndef VSLC_H +#define VSLC_H +#include +#include +#include +#include +#include +#include + +// Prototypes for the hash table functions +#include "tlhash.h" + +// Numbers and names for the types of syntax tree nodes +#include "nodetypes.h" + +// Definition of the tree node type +#include "ir.h" + +// Token definitions and other things from bison, needs def. of node type +#include "y.tab.h" + +/* This is generated from the bison grammar, calls on the flex specification */ +int yyerror ( const char *error ); + +/* These are defined in the parser generated by bison */ +extern int yylineno; +extern int yylex ( void ); +extern char yytext[]; + +/* Global state */ +extern node_t *root; + +// Moving global defs to global header + +extern tlhash_t *global_names; // Defined in ir.c, used by generator.c +extern char **string_list; // Defined in ir.c, used by generator.c +extern size_t stringc; // Defined in ir.c, used by generator.c + +/* Global routines, called from main in vslc.c */ +void simplify_syntax_tree ( void ); +void print_syntax_tree ( void ); +void destroy_syntax_tree ( void ); + +void create_symbol_table ( void ); +void print_symbol_table ( void ); +void destroy_symbol_table ( void ); + +void generate_program ( void ); + +#endif diff --git a/exercises/06/vslc/src/generator.c b/exercises/06/vslc/src/generator.c new file mode 100644 index 0000000..ec87fb4 --- /dev/null +++ b/exercises/06/vslc/src/generator.c @@ -0,0 +1,454 @@ +#include + +void generate_stringtable ( void ); +void generate_global_variables ( void ); +void generate_function ( symbol_t *function ); + +static void generate_node ( node_t *node ); +void generate_main ( symbol_t *first ); +static void generate_function_call ( node_t *call ); + +#define MIN(a,b) (((a)<(b)) ? (a):(b)) + +static const char *record[6] = { + "%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9" +}; + +static symbol_t *current_function = NULL; + + +void +generate_program ( void ) +{ + + size_t n_globals = tlhash_size(global_names); + symbol_t *global_list[n_globals]; + tlhash_values ( global_names, (void **)&global_list ); + + symbol_t *first_function; + for ( size_t i=0; itype == SYM_FUNCTION ) + { + // Allows the use of main as name to override entry point + if (!strcmp(global_list[i]->name, "main")) { + first_function = global_list[i]; + break; + } + else if (global_list[i]->seq == 0) { + first_function = global_list[i]; + } + } + + generate_stringtable(); + generate_global_variables(); + generate_main ( first_function ); + for ( size_t i=0; itype == SYM_FUNCTION ) + generate_function ( global_list[i] ); + +} + + +void +generate_stringtable ( void ) +{ + puts ( ".section .rodata" ); + puts ( ".intout: .string \"\%ld \"" ); + puts ( ".strout: .string \"\%s \"" ); + puts ( ".errout: .string \"Wrong number of arguments\"" ); + for ( size_t s=0; stype == SYM_GLOBAL_VAR ) + printf ( "._%s: .zero 8\n", syms[n]->name ); + } +} + + +void +generate_main ( symbol_t *first ) +{ + puts ( ".globl main" ); + puts ( ".section .text" ); + puts ( "main:" ); + puts ( "\tpushq %rbp" ); + puts ( "\tmovq %rsp, %rbp" ); + + printf ( "\tsubq\t$1,%%rdi\n" ); + printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms ); + printf ( "\tjne\tABORT\n" ); + printf ( "\tcmpq\t$0,%%rdi\n" ); + printf ( "\tjz\tSKIP_ARGS\n" ); + + printf ( "\tmovq\t%%rdi,%%rcx\n" ); + printf ( "\taddq $%zu, %%rsi\n", 8*first->nparms ); + printf ( "PARSE_ARGV:\n" ); + printf ( "\tpushq %%rcx\n" ); + printf ( "\tpushq %%rsi\n" ); + + printf ( "\tmovq\t(%%rsi),%%rdi\n" ); + printf ( "\tmovq\t$0,%%rsi\n" ); + printf ( "\tmovq\t$10,%%rdx\n" ); + printf ( "\tcall\tstrtol\n" ); + + /* Now a new argument is an integer in rax */ + + printf ( "\tpopq %%rsi\n" ); + printf ( "\tpopq %%rcx\n" ); + printf ( "\tpushq %%rax\n" ); + printf ( "\tsubq $8, %%rsi\n" ); + printf ( "\tloop PARSE_ARGV\n" ); + + /* Now the arguments are in order on stack */ + for ( int arg=0; argnparms); arg++ ) + printf ( "\tpopq\t%s\n", record[arg] ); + + printf ( "SKIP_ARGS:\n" ); + printf ( "\tcall\t_%s\n", first->name ); + printf ( "\tjmp\tEND\n" ); + printf ( "ABORT:\n" ); + printf ( "\tmovq\t$.errout, %%rdi\n" ); + printf ( "\tcall puts\n" ); + + printf ( "END:\n" ); + puts ( "\tmovq %rax, %rdi" ); + puts ( "\tcall exit" ); + +} + + +static void +generate_identifier ( node_t *ident ) +{ + symbol_t *symbol = ident->entry; + int64_t argument_offset; + switch ( symbol->type ) + { + case SYM_GLOBAL_VAR: + /* Global variables called by name */ + printf ( "._%s", symbol->name ); + break; + case SYM_PARAMETER: + if ( symbol->seq > 5 ) + /* Extra parameters pushed in decreasing order */ + printf ( "%ld(%%rbp)", 8+8*(symbol->seq-5) ); + else + /* First six parameters directly after base poiter */ + printf ( "%ld(%%rbp)", -8*(symbol->seq+1) ); + break; + case SYM_LOCAL_VAR: + /* Local variables places after parameters in stack */ + argument_offset = -8*MIN(6,current_function->nparms); + printf ( "%ld(%%rbp)", -8*(symbol->seq+1) + argument_offset ); + break; + } +} + + +static void +generate_expression ( node_t *expr ) +{ + if ( expr->type == IDENTIFIER_DATA ) + { + printf ( "\tmovq\t" ); + generate_identifier ( expr ); + printf ( ", %%rax\n" ); + } + else if ( expr->type == NUMBER_DATA ) + { + printf ( "\tmovq\t$%ld, %%rax\n", *(int64_t *)expr->data ); + } + else if ( expr->n_children == 1 ) + { + switch ( *((char*)(expr->data)) ) + { + case '-': + generate_expression ( expr->children[0] ); + printf ( "\tnegq\t%%rax\n" ); + break; + case '~': + generate_expression ( expr->children[0] ); + printf ( "\tnotq\t%%rax\n" ); + break; + } + } + else if ( expr->n_children == 2 ) + { + if ( expr->data != NULL ) + { + switch ( *((char *)expr->data) ) + { + case '+': + generate_expression ( expr->children[0] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[1] ); + printf ( "\taddq\t%%rax, (%%rsp)\n" ); + printf ( "\tpopq\t%%rax\n" ); + break; + case '-': + generate_expression ( expr->children[0] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[1] ); + printf ( "\tsubq\t%%rax, (%%rsp)\n" ); + printf ( "\tpopq\t%%rax\n" ); + break; + case '*': + printf ( "\tpushq\t%%rdx\n" ); + generate_expression ( expr->children[1] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[0] ); + printf ( "\tmulq\t(%%rsp)\n" ); + printf ( "\tpopq\t%%rdx\n" ); + printf ( "\tpopq\t%%rdx\n" ); + break; + case '/': + printf ( "\tpushq\t%%rdx\n" ); + generate_expression ( expr->children[1] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[0] ); + printf ( "\tcqo\n" ); + printf ( "\tidivq\t(%%rsp)\n" ); + printf ( "\tpopq\t%%rdx\n" ); + printf ( "\tpopq\t%%rdx\n" ); + break; + case '|': + generate_expression ( expr->children[0] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[1] ); + printf ( "\torq\t%%rax, (%%rsp)\n" ); + printf ( "\tpopq\t%%rax\n" ); + break; + case '^': + generate_expression ( expr->children[0] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[1] ); + printf ( "\txorq\t%%rax, (%%rsp)\n" ); + printf ( "\tpopq\t%%rax\n" ); + break; + case '&': + generate_expression ( expr->children[0] ); + printf ( "\tpushq\t%%rax\n" ); + generate_expression ( expr->children[1] ); + printf ( "\tandq\t%%rax, (%%rsp)\n" ); + printf ( "\tpopq\t%%rax\n" ); + break; + } + } else { + generate_function_call ( expr ); + } + } +} + + +static void +generate_function_call ( node_t *call ) +{ + /* Check function call */ + size_t n_arguments = 0; + if ( call->children[1] != NULL ) + n_arguments = call->children[1]->n_children; + symbol_t *function = call->children[0]->entry; + if ( n_arguments != function->nparms ) + { + fprintf ( stderr, + "Function %s has %zu parameters, called with %zu arguments\n", + (char *) call->children[0]->data, + (size_t) call->children[0]->entry->nparms, + n_arguments + ); + exit ( EXIT_FAILURE ); + } + + /* Generate function call: */ + + /* Push all the arguments */ + node_t *arglist = call->children[1]; + if ( arglist != NULL ) + { + for ( size_t p=arglist->n_children; p>0; p-- ) + { + generate_expression ( arglist->children[(p-1)] ); + if ( (p-1)>5 ) + printf ( "\tpushq\t%%rax\n" ); + else + printf ( "\tmovq\t%%rax, %s\n", record[(p-1)] ); + } + } + /* Call the function */ + printf ( "\tcall _%s\n", (char *)call->children[0]->data ); +} + + +static void +generate_assignment_statement ( node_t *statement ) +{ + switch ( statement->type ) + { + case ASSIGNMENT_STATEMENT: + generate_expression ( statement->children[1] ); + printf ( "\tmovq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + break; + case ADD_STATEMENT: + generate_expression ( statement->children[1] ); + printf ( "\taddq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + break; + case SUBTRACT_STATEMENT: + generate_expression ( statement->children[1] ); + printf ( "\tsubq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + break; + case MULTIPLY_STATEMENT: + generate_expression ( statement->children[1] ); + printf ( "\tmulq\t " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + printf ( "\tmovq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + break; + case DIVIDE_STATEMENT: + generate_expression ( statement->children[1] ); + printf ( "\txchgq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + printf ( "\tcqo\n" ); + printf ( "\tidivq\t" ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + printf ( "\txchgq\t%%rax, " ); + generate_identifier ( statement->children[0] ); + printf ( "\n" ); + break; + } +} + + +static void +generate_print_statement ( node_t *statement ) +{ + for ( size_t i=0; in_children; i++ ) + { + node_t *item = statement->children[i]; + switch ( item->type ) + { + case STRING_DATA: + printf ( "\tmovq\t$.STR%zu, %%rsi\n", *((size_t *)item->data) ); + printf ( "\tmovq\t$.strout, %%rdi\n" ); + break; + case NUMBER_DATA: + printf ("\tmovq\t$%ld, %%rsi\n", *((int64_t *)item->data) ); + printf ( "\tmovq\t$.intout, %%rdi\n" ); + break; + case IDENTIFIER_DATA: + printf ( "\tmovq\t" ); + generate_identifier ( item ); + printf ( ", %%rsi\n" ); + printf ( "\tmovq\t$.intout, %%rdi\n" ); + break; + case EXPRESSION: + generate_expression ( item ); + printf ( "\tmovq\t%%rax, %%rsi\n" ); + printf ( "\tmovq\t$.intout, %%rdi\n" ); + break; + } + puts ( "\tmovq\t$0, %rax\n" // Clear rax to indicate not to use SSE instructions + "\tcall\tprintf" ); + } + printf ( "\tmovq\t$0x0A, %%rdi\n" ); // Finish statement by inserting a newline + puts ( "\tcall\tputchar" ); +} + + +static void +generate_if_statement ( node_t *statement ) +{ + // TODO: Handle if statement + // statement->nodetype = IF_STATEMENT +} + + +static void +generate_while_statement ( node_t *statement ) +{ + // TODO: Handle while statement + // statement->nodetype = WHILE_STATEMENT +} + + +static void +generate_node ( node_t *node ) +{ + switch (node->type) + { + case PRINT_STATEMENT: + generate_print_statement ( node ); + break; + case ASSIGNMENT_STATEMENT: + case ADD_STATEMENT: + case SUBTRACT_STATEMENT: + case MULTIPLY_STATEMENT: + case DIVIDE_STATEMENT: + generate_assignment_statement ( node ); + break; + case RETURN_STATEMENT: + generate_expression ( node->children[0] ); + printf ( "\tleave\n" ); + printf ( "\tret\n" ); + break; + case IF_STATEMENT: + // TODO: Implement + break; + case WHILE_STATEMENT: + // TODO: Implement + break; + case NULL_STATEMENT: + // TODO: Implement + break; + default: + for ( size_t i=0; in_children; i++ ) + generate_node ( node->children[i] ); + break; + } +} + + +void +generate_function ( symbol_t *function ) +{ + current_function = function; + printf ( "_%s:\n", function->name ); + puts ( "\tpushq %rbp" ); + puts ( "\tmovq %rsp, %rbp" ); + + /* Save arguments in local stack frame */ + for ( size_t arg=1; arg<=MIN(6,function->nparms); arg++ ) + printf ( "\tpushq\t%s\n", record[arg-1] ); + /* Make space for locals in local stack frame */ + size_t local_vars = tlhash_size(function->locals) - function->nparms; + if ( local_vars > 0 ) + printf ( "\tsubq $%zu, %%rsp\n", 8*local_vars ); + if ( (tlhash_size(function->locals)&1) == 1 ) + puts ( "\tpushq\t$0 /* Stack padding for 16-byte alignment */" ); + generate_node ( function->node ); + printf( "\tmovq\t%%rbp, %%rsp\n" // movq %rbp, %rsp // restore stack pointer + "\tmovq\t$0, %%rax\n" // movq $0, %rax // return 0 if nothing else + "\tpopq\t%%rbp\n" // popq %rbp // restore base pointer + "\tret\n"); // ret + current_function = NULL; +} diff --git a/exercises/06/vslc/src/ir.c b/exercises/06/vslc/src/ir.c new file mode 100644 index 0000000..ae5ea61 --- /dev/null +++ b/exercises/06/vslc/src/ir.c @@ -0,0 +1,336 @@ +#include + +// Externally visible, for the generator +extern tlhash_t *global_names; +extern char **string_list; +extern size_t n_string_list, stringc; + + +// Implementation choices, only relevant internally +static void find_globals ( void ); +static void bind_names ( symbol_t *function, node_t *root ); +static void print_symbols ( tlhash_t *table ); +static void destroy_symtab ( void ); + +// Internal details of name resolution +static size_t n_scopes = 1, scope_depth = 0; +static tlhash_t **scopes = NULL; + +/* External interface */ + +void +create_symbol_table ( void ) +{ + find_globals(); + size_t n_globals = tlhash_size ( global_names ); + symbol_t *global_list[n_globals]; + tlhash_values ( global_names, (void **)&global_list ); + for ( size_t i=0; itype == SYM_FUNCTION ) + bind_names ( global_list[i], global_list[i]->node ); +} + + +void +print_symbol_table ( void ) +{ + print_symbols ( global_names ); +} + + +void +destroy_symbol_table ( void ) +{ + destroy_symtab(); +} + +/* Internal matters */ + + +static void +print_symbols ( tlhash_t *table ) +{ + if ( table == NULL ) + return; + size_t n_entries = tlhash_size(table); + symbol_t *entry_list[n_entries]; + tlhash_values ( table, (void **)&entry_list ); + for ( size_t e=0; etype ) + { + case SYM_FUNCTION: + fprintf ( stderr, "function: %s\n", entry_list[e]->name ); + if ( entry_list[e]->type == SYM_FUNCTION ) + print_symbols ( entry_list[e]->locals ); + break; + case SYM_GLOBAL_VAR: + fprintf ( stderr, "global var: %s\n", entry_list[e]->name ); + break; + case SYM_PARAMETER: + fprintf ( stderr, "parameter: %s\n", entry_list[e]->name ); + break; + case SYM_LOCAL_VAR: + fprintf ( stderr, "local var: %s\n", entry_list[e]->name ); + break; + default: + /* This should never happen if all symbols have correct type */ + fprintf ( stderr, + "** Unknown symbol: %s\n", entry_list[e]->name + ); + break; + } + } +} + + +static void +add_global ( symbol_t *symbol ) +{ + tlhash_insert ( + global_names, symbol->name, strlen(symbol->name), symbol + ); +} + + +static void +find_globals ( void ) +{ + global_names = malloc ( sizeof(tlhash_t) ); + tlhash_init ( global_names, 32 ); + string_list = malloc ( n_string_list * sizeof(char * ) ); + size_t n_functions = 0; + + node_t *global_list = root->children[0]; + for ( uint64_t g=0; gn_children; g++ ) + { + node_t *global = global_list->children[g], *namelist; + symbol_t *symbol; + switch ( global->type ) + { + case FUNCTION: + symbol = malloc ( sizeof(symbol_t) ); + *symbol = (symbol_t) { + .type = SYM_FUNCTION, + .name = global->children[0]->data, + .node = global->children[2], + .seq = n_functions, + .nparms = 0, + .locals = malloc ( sizeof(tlhash_t) ) + }; + n_functions++; + + tlhash_init ( symbol->locals, 32 ); + if ( global->children[1] != NULL ) + { + symbol->nparms = global->children[1]->n_children; + for ( int p=0; pnparms; p++ ) + { + node_t *param = global->children[1]->children[p]; + symbol_t *psym = malloc ( sizeof(symbol_t) ); + *psym = (symbol_t) { + .type = SYM_PARAMETER, + .name = param->data, + .node = NULL, + .seq = p, + .nparms = 0, + .locals = NULL + }; + tlhash_insert ( + symbol->locals, psym->name, strlen(psym->name), psym + ); + } + } + add_global ( symbol ); + break; + case DECLARATION: + namelist = global->children[0]; + for ( uint64_t d=0; dn_children; d++ ) + { + symbol = malloc ( sizeof(symbol_t) ); + *symbol = (symbol_t) { + .type = SYM_GLOBAL_VAR, + .name = namelist->children[d]->data, + .node = NULL, + .seq = 0, + .nparms = 0, + .locals = NULL + }; + add_global(symbol); + } + break; + } + } +} + + +static void +push_scope ( void ) +{ + if ( scopes == NULL ) + scopes = malloc ( n_scopes * sizeof(tlhash_t *) ); + tlhash_t *new_scope = malloc ( sizeof(tlhash_t) ); + tlhash_init ( new_scope, 32 ); + scopes[scope_depth] = new_scope; + + scope_depth += 1; + if ( scope_depth >= n_scopes ) + { + n_scopes *= 2; + scopes = realloc ( scopes, n_scopes*sizeof(tlhash_t **) ); + } + +} + + +static void +add_local ( symbol_t *local ) +{ + tlhash_insert ( + scopes[scope_depth-1],local->name,strlen(local->name),local + ); +} + + +static symbol_t * +lookup_local ( char *name ) +{ + symbol_t *result = NULL; + size_t depth = scope_depth; + while ( result == NULL && depth > 0 ) + { + depth -= 1; + tlhash_lookup ( scopes[depth], name, strlen(name), (void **)&result ); + } + return result; +} + + +static void +pop_scope ( void ) +{ + scope_depth -= 1; + tlhash_finalize ( scopes[scope_depth] ); + free ( scopes[scope_depth] ); + scopes[scope_depth] = NULL; +} + + +static void +add_string ( node_t *string ) +{ + string_list[stringc] = string->data; + string->data = malloc ( sizeof(size_t) ); + *((size_t *)string->data) = stringc; + stringc++; + if ( stringc >= n_string_list ) + { + n_string_list *= 2; + string_list = realloc ( string_list, n_string_list * sizeof(char *) ); + } + +} + + +static void +bind_names ( symbol_t *function, node_t *root ) +{ + if ( root == NULL ) + return; + else switch ( root->type ) + { + node_t *namelist; + symbol_t *entry; + + case BLOCK: + push_scope(); + for ( size_t c=0; cn_children; c++ ) + bind_names ( function, root->children[c] ); + pop_scope(); + break; + + case DECLARATION: + namelist = root->children[0]; + for ( uint64_t d=0; dn_children; d++ ) + { + node_t *varname = namelist->children[d]; + size_t local_num = + tlhash_size(function->locals) - function->nparms; + symbol_t *symbol = malloc ( sizeof(symbol_t) ); + *symbol = (symbol_t) { + .type = SYM_LOCAL_VAR, + .name = varname->data, + .node = NULL, + .seq = local_num, + .nparms = 0, + .locals = NULL + }; + tlhash_insert ( + function->locals, &local_num, sizeof(size_t), symbol + ); + add_local ( symbol ); + } + break; + + case IDENTIFIER_DATA: + entry = lookup_local ( root->data ); + if ( entry == NULL ) + tlhash_lookup ( + function->locals, root->data, + strlen(root->data), (void**)&entry + ); + if ( entry == NULL ) + tlhash_lookup ( + global_names,root->data,strlen(root->data),(void**)&entry + ); + if ( entry == NULL ) + { + fprintf ( stderr, "Identifier '%s' does not exist in scope\n", + (char *)root->data + ); + exit ( EXIT_FAILURE ); + } + root->entry = entry; + break; + + case STRING_DATA: + add_string ( root ); + break; + + default: + for ( size_t c=0; cn_children; c++ ) + bind_names ( function, root->children[c] ); + break; + } +} + + +void +destroy_symtab ( void ) +{ + for ( size_t i=0; ilocals != NULL ) + { + size_t n_locals = tlhash_size ( glob->locals ); + symbol_t *locals[n_locals]; + tlhash_values ( glob->locals, (void **)&locals ); + for ( size_t l=0; llocals ); + free ( glob->locals ); + } + free ( glob ); + } + tlhash_finalize ( global_names ); + free ( global_names ); + free ( scopes ); +} diff --git a/exercises/06/vslc/src/nodetypes.c b/exercises/06/vslc/src/nodetypes.c new file mode 100644 index 0000000..dd90315 --- /dev/null +++ b/exercises/06/vslc/src/nodetypes.c @@ -0,0 +1,34 @@ +#define STRING(x) #x +char *node_string[30] = { + STRING(PROGRAM), + STRING(GLOBAL_LIST), + STRING(GLOBAL), + STRING(STATEMENT_LIST), + STRING(PRINT_LIST), + STRING(EXPRESSION_LIST), + STRING(VARIABLE_LIST), + STRING(ARGUMENT_LIST), + STRING(PARAMETER_LIST), + STRING(DECLARATION_LIST), + STRING(FUNCTION), + STRING(STATEMENT), + STRING(BLOCK), + STRING(ASSIGNMENT_STATEMENT), + STRING(ADD_STATEMENT), + STRING(SUBTRACT_STATEMENT), + STRING(MULTIPLY_STATEMENT), + STRING(DIVIDE_STATEMENT), + STRING(RETURN_STATEMENT), + STRING(PRINT_STATEMENT), + STRING(NULL_STATEMENT), + STRING(IF_STATEMENT), + STRING(WHILE_STATEMENT), + STRING(EXPRESSION), + STRING(RELATION), + STRING(DECLARATION), + STRING(PRINT_ITEM), + STRING(IDENTIFIER_DATA), + STRING(NUMBER_DATA), + STRING(STRING_DATA) +}; +#undef STRING diff --git a/exercises/06/vslc/src/parser.y b/exercises/06/vslc/src/parser.y new file mode 100644 index 0000000..f1abcc8 --- /dev/null +++ b/exercises/06/vslc/src/parser.y @@ -0,0 +1,181 @@ +%{ +#include + +#define N0C(n,t,d) do { \ + node_init ( n = malloc(sizeof(node_t)), t, d, 0 ); \ +} while ( false ) +#define N1C(n,t,d,a) do { \ + node_init ( n = malloc(sizeof(node_t)), t, d, 1, a ); \ +} while ( false ) +#define N2C(n,t,d,a,b) do { \ + node_init ( n = malloc(sizeof(node_t)), t, d, 2, a, b ); \ +} while ( false ) +#define N3C(n,t,d,a,b,c) do { \ + node_init ( n = malloc(sizeof(node_t)), t, d, 3, a, b, c ); \ +} while ( false ) + +%} + +%left '|' +%left '^' +%left '&' +%left '+' '-' +%left '*' '/' +%nonassoc UMINUS +%right '~' +%expect 1 + +%token FUNC PRINT RETURN CONTINUE IF THEN ELSE WHILE DO OPENBLOCK CLOSEBLOCK +%token VAR NUMBER IDENTIFIER STRING + +%% +program : + global_list { N1C ( root, PROGRAM, NULL, $1 ); } + ; +global_list : + global { N1C ( $$, GLOBAL_LIST, NULL, $1 ); } + | global_list global { N2C ( $$, GLOBAL_LIST, NULL, $1, $2 ); } + ; +global: + function { N1C ( $$, GLOBAL, NULL, $1 ); } + | declaration { N1C ( $$, GLOBAL, NULL, $1 ); } + ; +statement_list : + statement { N1C ( $$, STATEMENT_LIST, NULL, $1 ); } + | statement_list statement { N2C ( $$, STATEMENT_LIST, NULL, $1, $2 ); } + ; +print_list : + print_item { N1C ( $$, PRINT_LIST, NULL, $1 ); } + | print_list ',' print_item { N2C ( $$, PRINT_LIST, NULL, $1, $3 ); } + ; +expression_list : + expression { N1C ( $$, EXPRESSION_LIST, NULL, $1 ); } + | expression_list ',' expression { N2C($$, EXPRESSION_LIST, NULL, $1, $3); } + ; +variable_list : + identifier { N1C ( $$, VARIABLE_LIST, NULL, $1 ); } + | variable_list ',' identifier { N2C ( $$, VARIABLE_LIST, NULL, $1, $3 ); } + ; +argument_list : + expression_list { N1C ( $$, ARGUMENT_LIST, NULL, $1 ); } + | /* epsilon */ { $$ = NULL; } + ; +parameter_list : + variable_list { N1C ( $$, PARAMETER_LIST, NULL, $1 ); } + | /* epsilon */ { $$ = NULL; } + ; +declaration_list : + declaration { N1C ( $$, DECLARATION_LIST, NULL, $1 ); } + | declaration_list declaration { N2C ($$, DECLARATION_LIST, NULL, $1, $2); } + ; +function : + FUNC identifier '(' parameter_list ')' statement + { N3C ( $$, FUNCTION, NULL, $2, $4, $6 ); } + ; +statement : + assignment_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | return_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | print_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | if_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | while_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | null_statement { N1C ( $$, STATEMENT, NULL, $1 ); } + | block { N1C ( $$, STATEMENT, NULL, $1 ); } + ; +block : + OPENBLOCK declaration_list statement_list CLOSEBLOCK + { N2C ($$, BLOCK, NULL, $2, $3); } + | OPENBLOCK statement_list CLOSEBLOCK { N1C ($$, BLOCK, NULL, $2 ); } + ; +assignment_statement : + identifier ':' '=' expression + { N2C ( $$, ASSIGNMENT_STATEMENT, NULL, $1, $4 ); } + | identifier '+' '=' expression + { N2C ( $$, ADD_STATEMENT, NULL, $1, $4 ); } + | identifier '-' '=' expression + { N2C ( $$, SUBTRACT_STATEMENT, NULL, $1, $4 ); } + | identifier '*' '=' expression + { N2C ( $$, MULTIPLY_STATEMENT, NULL, $1, $4 ); } + | identifier '/' '=' expression + { N2C ( $$, DIVIDE_STATEMENT, NULL, $1, $4 ); } + ; +return_statement : + RETURN expression + { N1C ( $$, RETURN_STATEMENT, NULL, $2 ); } + ; +print_statement : + PRINT print_list + { N1C ( $$, PRINT_STATEMENT, NULL, $2 ); } + ; +null_statement : + CONTINUE + { N0C ( $$, NULL_STATEMENT, NULL ); } + ; +if_statement : + IF relation THEN statement + { N2C ( $$, IF_STATEMENT, NULL, $2, $4 ); } + | IF relation THEN statement ELSE statement + { N3C ( $$, IF_STATEMENT, NULL, $2, $4, $6 ); } + ; +while_statement : + WHILE relation DO statement + { N2C ( $$, WHILE_STATEMENT, NULL, $2, $4 ); } + ; +relation: + expression '=' expression + { N2C ( $$, RELATION, strdup("="), $1, $3 ); } + | expression '<' expression + { N2C ( $$, RELATION, strdup("<"), $1, $3 ); } + | expression '>' expression + { N2C ( $$, RELATION, strdup(">"), $1, $3 ); } + ; +expression : + expression '|' expression + { N2C ( $$, EXPRESSION, strdup("|"), $1, $3 ); } + | expression '^' expression + { N2C ( $$, EXPRESSION, strdup("^"), $1, $3 ); } + | expression '&' expression + { N2C ( $$, EXPRESSION, strdup("&"), $1, $3 ); } + | expression '+' expression + { N2C ( $$, EXPRESSION, strdup("+"), $1, $3 ); } + | expression '-' expression + { N2C ( $$, EXPRESSION, strdup("-"), $1, $3 ); } + | expression '*' expression + { N2C ( $$, EXPRESSION, strdup("*"), $1, $3 ); } + | expression '/' expression + { N2C ( $$, EXPRESSION, strdup("/"), $1, $3 ); } + | '-' expression %prec UMINUS + { N1C ( $$, EXPRESSION, strdup("-"), $2 ); } + | '~' expression %prec UMINUS + { N1C ( $$, EXPRESSION, strdup("~"), $2 ); } + | '(' expression ')' { $$ = $2; } + | number { N1C ( $$, EXPRESSION, NULL, $1 ); } + | identifier + { N1C ( $$, EXPRESSION, NULL, $1 ); } + | identifier '(' argument_list ')' + { N2C ( $$, EXPRESSION, NULL, $1, $3 ); } + ; +declaration : + VAR variable_list { N1C ( $$, DECLARATION, NULL, $2 ); } + ; +print_item : + expression + { N1C ( $$, PRINT_ITEM, NULL, $1 ); } + | string + { N1C ( $$, PRINT_ITEM, NULL, $1 ); } + ; +identifier: IDENTIFIER { N0C($$, IDENTIFIER_DATA, strdup(yytext) ); } +number: NUMBER + { + int64_t *value = malloc ( sizeof(int64_t) ); + *value = strtol ( yytext, NULL, 10 ); + N0C($$, NUMBER_DATA, value ); + } +string: STRING { N0C($$, STRING_DATA, strdup(yytext) ); } +%% + +int +yyerror ( const char *error ) +{ + fprintf ( stderr, "%s on line %d\n", error, yylineno ); + exit ( EXIT_FAILURE ); +} diff --git a/exercises/06/vslc/src/scanner.l b/exercises/06/vslc/src/scanner.l new file mode 100644 index 0000000..ffa93ff --- /dev/null +++ b/exercises/06/vslc/src/scanner.l @@ -0,0 +1,30 @@ +%{ +#include +%} +%option noyywrap +%option array +%option yylineno + +WHITESPACE [\ \t\v\r\n] +COMMENT \/\/[^\n]+ +QUOTED \"([^\"\n]|\\\")*\" +%% +{WHITESPACE}+ { /* Eliminate whitespace */ } +{COMMENT} { /* Eliminate comments */ } +func { return FUNC; } +print { return PRINT; } +return { return RETURN; } +continue { return CONTINUE; } +if { return IF; } +then { return THEN; } +else { return ELSE; } +while { return WHILE; } +do { return DO; } +begin { return OPENBLOCK; } +end { return CLOSEBLOCK; } +var { return VAR; } +[0-9]+ { return NUMBER; } +[A-Za-z_][0-9A-Za-z_]* { return IDENTIFIER; } +{QUOTED} { return STRING; } +. { return yytext[0]; } +%% diff --git a/exercises/06/vslc/src/tlhash.c b/exercises/06/vslc/src/tlhash.c new file mode 100644 index 0000000..7936ece --- /dev/null +++ b/exercises/06/vslc/src/tlhash.c @@ -0,0 +1,281 @@ +#include +#include +#include + +#include + +/********************************************************************* + * Declarations of the utility functions for obtaining hashes, found * + * at the bottom of this file. * + *********************************************************************/ + +/* Little-endian, for x86-s */ +#define CRC32_IEEE802_3 (0xedb88320) +static const uint32_t crc32_ieee802_3[256]; +static const uint32_t *crc_table = (uint32_t *)crc32_ieee802_3; + +static uint32_t crc32 ( void *input, size_t length ); + + +/******************************** + * External interface functions * + ********************************/ + + +/* Initializer + * Returns + * ENOMEM - if allocation of table entries fails. + */ +int +tlhash_init ( tlhash_t *tab, size_t n_buckets ) +{ + size_t i; + tab->n_buckets = n_buckets; + tab->size = 0; + tab->buckets = (tlhash_element_t **) calloc ( + n_buckets, sizeof(tlhash_element_t *) + ); + if ( tab->buckets == NULL ) + return TLHASH_ENOMEM; + for ( i=0; ibuckets[i] = NULL; + return TLHASH_SUCCESS; +} + + +/* Finalizer + * Returns + * ENOENT - if there is no table to free. + */ +int +tlhash_finalize ( tlhash_t *tab ) +{ + size_t i; + if ( tab == NULL ) + return TLHASH_ENOENT; + for ( i=0; in_buckets; i++ ) + { + tlhash_element_t *element = tab->buckets[i], *next; + while ( element != NULL ) + { + next = element->next; + free ( element->key ); + free ( element ); + tab->size -= 1; + element = next; + } + } + + free ( tab->buckets ); + return TLHASH_SUCCESS; +} + + +/* Insert - find hash value, modulate over buckets, append to linked list + * Returns + * EEXIST - if an element is already indexed by this key + * ENOMEM - if allocation of element or key copy fails + */ +int +tlhash_insert ( + tlhash_t *tab, void *key, size_t key_length, void *value +) +{ + void *test_entry; + int test = tlhash_lookup ( tab, key, key_length, &test_entry ); + if ( test != TLHASH_ENOENT ) + return TLHASH_EEXIST; + uint32_t hash = crc32 ( key, key_length ); + size_t bucket = hash % tab->n_buckets; + tlhash_element_t *element = malloc ( sizeof(tlhash_element_t) ); + if ( element == NULL ) + return TLHASH_ENOMEM; + void *key_copy = malloc ( key_length ); + if ( key_copy == NULL ) + { + free ( element ); + return TLHASH_ENOMEM; + } + memcpy ( key_copy, key, key_length ); + element->key = key_copy; + element->key_length = key_length; + element->value = value; + element->next = tab->buckets[bucket]; + tab->buckets[bucket] = element; + tab->size += 1; + return TLHASH_SUCCESS; +} + + +/* Lookup - find hash value, modulate over buckets, search linked list + * Returns + * ENOENT - if no element is indexed by this key + */ +int +tlhash_lookup ( + tlhash_t *tab, void *key, size_t key_length, void **value +) +{ + uint32_t hash = crc32 ( key, key_length ); + size_t bucket = hash % tab->n_buckets; + tlhash_element_t *el = tab->buckets[bucket]; + + *value = NULL; + while ( el != NULL ) + { + if ( el->key_length == key_length && ! memcmp(el->key,key,key_length) ) + { + *value = el->value; + break; + } + el = el->next; + } + if ( el != NULL ) + return TLHASH_SUCCESS; + else + return TLHASH_ENOENT; +} + + +/* Removal - find hash value, modulate over buckets, delete entry + * Returns + * ENOENT - no such element to remove was found. + */ +int +tlhash_remove ( tlhash_t *tab, void *key, size_t key_length ) +{ + uint32_t hash = crc32 ( key, key_length ); + size_t bucket = hash % tab->n_buckets; + tlhash_element_t *el = tab->buckets[bucket], *prev = NULL; + + while ( el != NULL ) + { + if ( el->key_length == key_length && ! memcmp(el->key,key,key_length) ) + { + /* We have a match. */ + if ( prev != NULL ) /* Remove from list if it's not the head */ + prev->next = (void *)el->next; + else /* Substitute it if it IS the head */ + tab->buckets[bucket] = el->next; + /* Free the container and key copy allocated by this lib */ + free ( el->key ); + free ( el ); + break; + } + prev = el; + el = el->next; + } + if ( el == NULL ) + return TLHASH_ENOENT; + else + { + tab->size -= 1; + return TLHASH_SUCCESS; + } +} + + +size_t +tlhash_size ( tlhash_t *tab ) +{ + return tab->size; +} + + +void +tlhash_keys ( tlhash_t *tab, void **keys ) +{ + size_t b, i = 0; + for ( b=0; bn_buckets; b++ ) + { + tlhash_element_t *el = tab->buckets[b]; + while ( el != NULL ) + { + keys[i] = el->key; + i += 1; + el = el->next; + } + } +} + + +void +tlhash_values ( tlhash_t *tab, void **values ) +{ + size_t b, i = 0; + for ( b=0; bn_buckets; b++ ) + { + tlhash_element_t *el = tab->buckets[b]; + while ( el != NULL ) + { + values[i] = el->value; + i += 1; + el = el->next; + } + } +} + + +/*************************************** + * Hashing function and IEEE data blob * + ***************************************/ + + +static uint32_t +crc32 ( void *input, size_t length ) +{ + const uint8_t *data = (uint8_t *)input; + size_t i = 0; + uint32_t hash = 0xFFFFFFFF; + for ( i = 0; i>8) ^ crc_table [ data[i] ^ (uint8_t)hash ]; + return (hash^0xFFFFFFFF); +} + + +static const uint32_t +crc32_ieee802_3[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; diff --git a/exercises/06/vslc/src/tree.c b/exercises/06/vslc/src/tree.c new file mode 100644 index 0000000..ef7658c --- /dev/null +++ b/exercises/06/vslc/src/tree.c @@ -0,0 +1,232 @@ +#include + +static void node_print ( node_t *root, int nesting ); +static void simplify_tree ( node_t **simplified, node_t *root ); +static void node_finalize ( node_t *discard ); + +typedef struct stem_t *stem; +struct stem_t { const char *str; stem next; }; +static void +tree_print(node_t* root, stem head); + + +static void destroy_subtree ( node_t *discard ); + + +/* External interface */ +void +destroy_syntax_tree ( void ) +{ + destroy_subtree ( root ); +} + + +void +simplify_syntax_tree ( void ) +{ + simplify_tree ( &root, root ); +} + + +extern bool new_print_style; +void +print_syntax_tree ( void ) +{ + if (new_print_style) + tree_print ( root, 0 ); + // Old tree printing + else + node_print ( root, 0 ); +} + + +void +node_init (node_t *nd, node_index_t type, void *data, uint64_t n_children, ...) +{ + va_list child_list; + *nd = (node_t) { + .type = type, + .data = data, + .entry = NULL, + .n_children = n_children, + .children = (node_t **) malloc ( n_children * sizeof(node_t *) ) + }; + va_start ( child_list, n_children ); + for ( uint64_t i=0; ichildren[i] = va_arg ( child_list, node_t * ); + va_end ( child_list ); +} + + +static void +tree_print(node_t* root, stem head) +{ + static const char *sdown = " │", *slast = " └", *snone = " "; + struct stem_t col = {0, 0}, *tail; + + // Print stems of branches coming further down + for (tail = head; tail; tail = tail->next) { + if (!tail->next) { + if (!strcmp(sdown, tail->str)) + printf(" ├"); + else + printf("%s", tail->str); + break; + } + printf("%s", tail->str); + } + + if (root == NULL) { + // Secure against null pointers sent as root + printf("─(nil)\n"); + return; + } + printf("─%s", node_string[root->type]); + if ( root->type == IDENTIFIER_DATA || + root->type == STRING_DATA || + root->type == EXPRESSION ) + printf("(%s)", (char *) root->data); + else if (root->type == NUMBER_DATA) + printf("(%ld)", *((int64_t *)root->data)); + putchar('\n'); + + if (!root->n_children) return; + + if (tail && tail->str == slast) + tail->str = snone; + + if (!tail) tail = head = &col; + else tail->next = &col; + + for ( int64_t i=0; i < root->n_children; i++ ) { + col.str = root->n_children - i - 1 ? sdown : slast; + tree_print(root->children[i], head); + } + tail->next = 0; +} + +/* Internal choices */ +static void +node_print ( node_t *root, int nesting ) +{ + if ( root != NULL ) + { + printf ( "%*c%s", nesting, ' ', node_string[root->type] ); + if ( root->type == IDENTIFIER_DATA || + root->type == STRING_DATA || + root->type == EXPRESSION ) + printf ( "(%s)", (char *) root->data ); + else if ( root->type == NUMBER_DATA ) + printf ( "(%ld)", *((int64_t *)root->data) ); + putchar ( '\n' ); + for ( int64_t i=0; in_children; i++ ) + node_print ( root->children[i], nesting+1 ); + } + else + printf ( "%*c%p\n", nesting, ' ', root ); +} + + +static void +node_finalize ( node_t *discard ) +{ + if ( discard != NULL ) + { + free ( discard->data ); + free ( discard->children ); + free ( discard ); + } +} + + +static void +destroy_subtree ( node_t *discard ) +{ + if ( discard != NULL ) + { + for ( uint64_t i=0; in_children; i++ ) + destroy_subtree ( discard->children[i] ); + node_finalize ( discard ); + } +} + + +static void +simplify_tree ( node_t **simplified, node_t *root ) +{ + if ( root == NULL ) + return; + + /* Simplify subtrees before examining this node */ + for ( uint64_t i=0; in_children; i++ ) + simplify_tree ( &root->children[i], root->children[i] ); + + node_t *discard, *result = root; + switch ( root->type ) + { + /* Structures of purely syntactic function */ + case PARAMETER_LIST: case ARGUMENT_LIST: + case STATEMENT: case PRINT_ITEM: case GLOBAL: + result = root->children[0]; + node_finalize ( root ); + break; + case PRINT_STATEMENT: + result = root->children[0]; + result->type = PRINT_STATEMENT; + node_finalize(root); + /* Flatten lists: + * Take left child, append right child, substitute left for root. + */ + case STATEMENT_LIST: case DECLARATION_LIST: case GLOBAL_LIST: + case PRINT_LIST: case EXPRESSION_LIST: case VARIABLE_LIST: + if ( root->n_children >= 2 ) + { + result = root->children[0]; + result->n_children += 1; + result->children = realloc ( + result->children, result->n_children * sizeof(node_t *) + ); + result->children[result->n_children-1] = root->children[1]; + node_finalize ( root ); + } + break; + case EXPRESSION: + switch ( root->n_children ) + { + case 1: + if ( root->children[0]->type == NUMBER_DATA ) + { + result = root->children[0]; + if ( root->data != NULL ) + *((int64_t *)result->data) *= -1; + node_finalize (root); + } + else if ( root->data == NULL ) + { + result = root->children[0]; + node_finalize (root); + } + break; + case 2: + if ( root->children[0]->type == NUMBER_DATA && + root->children[1]->type == NUMBER_DATA + ) { + result = root->children[0]; + int64_t + *x = result->data, + *y = root->children[1]->data; + switch ( *((char *)root->data) ) + { + case '+': *x += *y; break; + case '-': *x -= *y; break; + case '*': *x *= *y; break; + case '/': *x /= *y; break; + } + node_finalize ( root->children[1] ); + node_finalize ( root ); + } + break; + } + } + *simplified = result; +} diff --git a/exercises/06/vslc/src/vslc.c b/exercises/06/vslc/src/vslc.c new file mode 100644 index 0000000..3d239a4 --- /dev/null +++ b/exercises/06/vslc/src/vslc.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include + + +/* Global state */ + +node_t *root; // Syntax tree +tlhash_t *global_names; // Symbol table +char **string_list; // List of strings in the source +size_t n_string_list = 8; // Initial string list capacity (grow on demand) +size_t stringc = 0; // Initial string count + +/* Command line option parsing for the main function */ +static void options ( int argc, char **argv ); +bool + print_full_tree = false, + print_simplified_tree = false, + print_symbol_table_contents = false, + print_generated_program = true, + new_print_style = true; + + +/* Entry point */ +int +main ( int argc, char **argv ) +{ + options ( argc, argv ); + + yyparse(); // Generated from grammar/bison, constructs syntax tree + + if ( print_full_tree ) + print_syntax_tree (); + simplify_syntax_tree (); // In tree.c + if ( print_simplified_tree ) + print_syntax_tree (); + + create_symbol_table (); // In ir.c + if ( print_symbol_table_contents ) + print_symbol_table(); + + if ( print_generated_program ) + generate_program (); // In generator.c + + + destroy_syntax_tree (); // In tree.c + destroy_symbol_table (); // In ir.c +} + + +static const char *usage = +"Command line options\n" +"\t-h\tOutput this text and halt\n" +"\t-t\tOutput the full syntax tree\n" +"\t-T\tOutput the simplified syntax tree\n" +"\t-s\tOutput the symbol table contents\n" +"\t-q\tQuiet: suppress output from the code generator\n" +"\t-u\tDo not use print style more like the tree command\n"; + + +static void +options ( int argc, char **argv ) +{ + int o; + while ( (o=getopt(argc,argv,"htTsqu")) != -1 ) + { + switch ( o ) + { + case 'h': + printf ( "%s:\n%s", argv[0], usage ); + exit ( EXIT_FAILURE ); + break; + case 't': print_full_tree = true; break; + case 'T': print_simplified_tree = true; break; + case 's': print_symbol_table_contents = true; break; + case 'q': print_generated_program = false; break; + case 'u': new_print_style = false; break; + } + } +} + diff --git a/exercises/06/vslc/vsl_programs/Makefile b/exercises/06/vslc/vsl_programs/Makefile new file mode 100644 index 0000000..57f099e --- /dev/null +++ b/exercises/06/vslc/vsl_programs/Makefile @@ -0,0 +1,45 @@ +VSLC := ../src/vslc +AS := gcc + +# This updated makefile calls the relevant print for each assignment +# The added target compile will attempt to compile all vsl source files, +# for ps5 and ps6. Other programs are mostly not interesting after the their +# targeted assignment. + +# Call `vslc -h` to see the available flags, and call `vslc [flags] < file.vsl` +# to compile a single file. + +PS2_EXAMPLES := $(patsubst ps2-parser/%.vsl, ps2-parser/%.ast, $(wildcard ps2-parser/*.vsl)) +PS3_EXAMPLES := $(patsubst ps3-simplify/%.vsl, ps3-simplify/%.sast, $(wildcard ps3-simplify/*.vsl)) +PS4_EXAMPLES := $(patsubst ps4-symtab/%.vsl, ps4-symtab/%.sym, $(wildcard ps4-symtab/*.vsl)) +PS5_EXAMPLES := $(patsubst ps5-codegen1/%.vsl, ps5-codegen1/%.S, $(wildcard ps5-codegen1/*.vsl)) +PS6_EXAMPLES := $(patsubst ps6-codegen2/%.vsl, ps6-codegen2/%.S, $(wildcard ps6-codegen2/*.vsl)) + +PS5_OBJECTS := $(PS5_EXAMPLES:.S=.bin) +PS6_OBJECTS := $(PS6_EXAMPLES:.S=.bin) +# OBJECTS := $(PS5_OBJECTS) $(PS4_EXAMPLES:.sym=.bin) $(PS3_EXAMPLES:.sast=.bin) $(PS2_EXAMPLES:.ast=.bin) +OBJECTS := $(PS5_OBJECTS) $(PS6_OBJECTS) +all: $(PS2_EXAMPLES) $(PS3_EXAMPLES) $(PS4_EXAMPLES) $(PS5_EXAMPLES) $(PS6_EXAMPLES) +ps2: $(PS2_EXAMPLES) +ps3: $(PS3_EXAMPLES) +ps4: $(PS4_EXAMPLES) +ps5: $(PS5_EXAMPLES) + +%.ast: %.vsl + $(VSLC) -t -q < $^ > $@ 2> $@ +%.sast: %.vsl + $(VSLC) -T -q < $^ > $@ 2> $@ +%.sym: %.vsl + $(VSLC) -s -q < $^ > $@ 2> $@ +%.S: %.vsl + $(VSLC) < $^ > $@ +# This target is only tested on x86-linux +%.bin: %.S + $(AS) -no-pie -o $@ $^ + +ps5-compile: $(PS5_OBJECTS) +ps6-compile: $(PS6_OBJECTS) +compile: $(OBJECTS) + +clean: + -rm -r */*.ast */*.sast */*.sym */*.bin */*.S \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/assignments.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/assignments.vsl new file mode 100644 index 0000000..b2f9227 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/assignments.vsl @@ -0,0 +1,14 @@ +// checking that comments are ignored + +// This program checks the assignment operators + +func main() +begin + var a + a := 3 + a += 1 + a /= 2 + a *= 32 + a -= 2 + print a +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/function_call.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/function_call.vsl new file mode 100644 index 0000000..d5c75a5 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/function_call.vsl @@ -0,0 +1,8 @@ +func add(a, b) begin + return a + b +end + +func main() +begin + print add(40, 2) +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/helloworld.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/helloworld.vsl new file mode 100644 index 0000000..49219f8 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/helloworld.vsl @@ -0,0 +1,3 @@ +func main() begin + print "Hello, World!" +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/if_else.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/if_else.vsl new file mode 100644 index 0000000..7d4fbe3 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/if_else.vsl @@ -0,0 +1,25 @@ +func main() +begin + var a, b, c, d + c := 1 + a := 3 + b := a + c // 4 + d := a * 100 + 50 + print "a", a + print "b", b + print "c", c + print "d", d + if a = 14 then + print 1, "N", d / 5 + a, "RPR", a, "TERS " + else + print "COMP", c, "L", "ERS " + + print b, "R", a, " " + + if a < b then + if d > 42 then + print b, "W", d, "ME" + else + print "L", b, "M", c + // A dangling else, what could go wrong? +end diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/variables.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/variables.vsl new file mode 100644 index 0000000..abd12ec --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/variables.vsl @@ -0,0 +1,19 @@ + +var global_var + +func my_func(param) +begin + var local_var, local_var2 + local_var := 1 +end + +var glob1, glob2 + +func main() +begin + var main_local_var + begin + var main_local_nested_var + main_local_nested_var := main_local_var + end +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps2-parser/while.vsl b/exercises/06/vslc/vsl_programs/ps2-parser/while.vsl new file mode 100644 index 0000000..8d6b024 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps2-parser/while.vsl @@ -0,0 +1,10 @@ +// check parsing of do-while loop + +func main() +begin + var i + i := 2 + while i < 9000 do + i := i * i + print i +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps3-simplify/constants.vsl b/exercises/06/vslc/vsl_programs/ps3-simplify/constants.vsl new file mode 100644 index 0000000..8843eb6 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps3-simplify/constants.vsl @@ -0,0 +1,9 @@ + +func main() begin + var a, b + a := 1 + 2 + 4 + 5 + 6 + 7 + 8 + 9 + b := (10 + 10 * 4) * (2 + 2 * (1 + 1)) / 10 + 2 * 5 + 6 / 3 + + if a = b then + print "The answer is", b +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps3-simplify/lists.vsl b/exercises/06/vslc/vsl_programs/ps3-simplify/lists.vsl new file mode 100644 index 0000000..d18d54d --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps3-simplify/lists.vsl @@ -0,0 +1,16 @@ + +func my_func(a, b, c, d, e, f, g, h) begin + var i, j, k, l, m + + i := a + b + d + + if i = f then begin + print "hmmm" + end +end + +func main() begin + var n, o, p, q, r, s, t, u, v, w + n := 5 + n += my_func(1, 2, 3, 5, 8, 13, 21, 34) +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps4-symtab/globals.vsl b/exercises/06/vslc/vsl_programs/ps4-symtab/globals.vsl new file mode 100644 index 0000000..b6f6be5 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps4-symtab/globals.vsl @@ -0,0 +1,11 @@ +var global_var0, global_var1 + +func my_func(param0, param1) begin + var a + return 0 +end + +func main() begin + var a + print "a string" +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps4-symtab/shadow.vsl b/exercises/06/vslc/vsl_programs/ps4-symtab/shadow.vsl new file mode 100644 index 0000000..7995200 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps4-symtab/shadow.vsl @@ -0,0 +1,24 @@ + +func main() begin + var a, b + a := 1 + begin + var a + a := 2 + b := 40 + begin + var a + a := b + 2 + print a, b + end + print a + begin + var b + b := 38 + a := b + 3 + print a, b + end + print a + end + print b +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps5-codegen1/function_call.vsl b/exercises/06/vslc/vsl_programs/ps5-codegen1/function_call.vsl new file mode 100644 index 0000000..5bd29ec --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps5-codegen1/function_call.vsl @@ -0,0 +1,9 @@ +func add(a, b) begin + print "adding", a, "and", b + return a + b +end + +func main() +begin + print 2 + add(40, 2) + 2 +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps5-codegen1/helloworld.vsl b/exercises/06/vslc/vsl_programs/ps5-codegen1/helloworld.vsl new file mode 100644 index 0000000..49219f8 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps5-codegen1/helloworld.vsl @@ -0,0 +1,3 @@ +func main() begin + print "Hello, World!" +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps5-codegen1/ps5.vsl b/exercises/06/vslc/vsl_programs/ps5-codegen1/ps5.vsl new file mode 100644 index 0000000..55d33aa --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps5-codegen1/ps5.vsl @@ -0,0 +1,30 @@ +// This program tests activation records, function call and return +func funcall () +begin + var x,y,z + x := 5 + y := 10 + print "Calling my_function with parameters", x, y + z := my_function ( x, y ) + print "The returned result is", z + z := my_other_function () + print "The other returned result is", z + return 0 +end + +func my_function ( s, t ) +begin + var u + u := s*s + t*t + print "Parameter s is", s + print "Parameter t is", t + print "The sum of their squares is", u + return u +end + +func my_other_function () +begin + var x + x := 42 + return x +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps5-codegen1/shadow.vsl b/exercises/06/vslc/vsl_programs/ps5-codegen1/shadow.vsl new file mode 100644 index 0000000..7995200 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps5-codegen1/shadow.vsl @@ -0,0 +1,24 @@ + +func main() begin + var a, b + a := 1 + begin + var a + a := 2 + b := 40 + begin + var a + a := b + 2 + print a, b + end + print a + begin + var b + b := 38 + a := b + 3 + print a, b + end + print a + end + print b +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps6-codegen2/euclid.vsl b/exercises/06/vslc/vsl_programs/ps6-codegen2/euclid.vsl new file mode 100644 index 0000000..e7cc91d --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps6-codegen2/euclid.vsl @@ -0,0 +1,20 @@ +func euclid ( a, b ) +begin + if a < 0 then a := -a + if b < 0 then b := -b + if gcd ( a, b ) > 1 then + print "Greatest common divisor of", a, "and", b, "is", gcd ( a, b ) + else + print a, "and", b, "are relative primes" + return 0 +end + +func gcd( a, b ) +begin + var g + if b > 0 then + g := gcd ( b, a - ((a/b)*b) ) + else + g := a + return g +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps6-codegen2/newton.vsl b/exercises/06/vslc/vsl_programs/ps6-codegen2/newton.vsl new file mode 100644 index 0000000..2554332 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps6-codegen2/newton.vsl @@ -0,0 +1,21 @@ +// Approximate square root by the Newton/Raphson method for f(x) = x^2 - n +// f(x) = x^2 - n = 0 +// f'(x) = 2x +// x{n+1} = x{n} - (x^2-n) / 2x + +func newton ( n ) +begin + print "The square root of", n, "is", improve ( n, 1 ) + return 0 +end + +func improve ( n, estimate ) +begin + var next + next := estimate - ( (estimate * estimate - n) / ( 2 * estimate ) ) + if next - estimate = 0 then + // Integer precision converges at smallest int greater than the square + return next-1 + else + return improve ( n, next ) +end \ No newline at end of file diff --git a/exercises/06/vslc/vsl_programs/ps6-codegen2/while_test.vsl b/exercises/06/vslc/vsl_programs/ps6-codegen2/while_test.vsl new file mode 100644 index 0000000..6b1c322 --- /dev/null +++ b/exercises/06/vslc/vsl_programs/ps6-codegen2/while_test.vsl @@ -0,0 +1,23 @@ +// This program is a simple test of while loops, counting down from 19 to 0 +// and skipping 10 (if continue is implemented) + +func while_test () +begin + var a + a := 20 + print a + if a > 0 then print "foobar" + while a > 0 do + begin + if a = 10 then + begin + a -= 1 + print "Skip..." + continue + end + else + a -= 1 + print a + end + return 0 +end \ No newline at end of file