diff --git a/exercises/06/vslc/include/ir.h b/exercises/06/vslc/include/ir.h index 78150e1..66224e9 100644 --- a/exercises/06/vslc/include/ir.h +++ b/exercises/06/vslc/include/ir.h @@ -10,21 +10,42 @@ typedef struct n { struct n **children; } node_t; -// Export the initializer function, it is needed by the parser -void node_init ( - node_t *nd, node_index_t type, void *data, uint64_t n_children, ... +/**Export the initializer function, it is needed by the parser + * @param *nd node to initialize + * @param type type of node (see nodetype.h) + * @param *data associated data. Declared void to allow any type + * @param n_children number of children + * @param ... variable argument list of child nodes (node_t *) + * + * @return Pointer to the initialized node + * */ +node_t* node_init ( + node_t* nd, + node_index_t type, + void* data, + uint64_t n_children, + ... ); typedef enum { - SYM_GLOBAL_VAR, SYM_FUNCTION, SYM_PARAMETER, SYM_LOCAL_VAR + SYM_GLOBAL_VAR, + SYM_FUNCTION, + SYM_PARAMETER, + SYM_LOCAL_VAR } symtype_t; typedef struct s { - char *name; - symtype_t type; - node_t *node; - size_t seq; - size_t nparms; - tlhash_t *locals; + char* name; + symtype_t type; + node_t* node; + size_t seq; + size_t nparms; + tlhash_t* locals; } symbol_t; #endif + +#define GLOBAL_BUCKET_SIZE 32 +#define LOCAL_BUCKET_SIZE 16 + +#define DEFAULT_STRING_LIST_SIZE 8 +#define DEFAULT_NO_SCOPES 1 diff --git a/exercises/06/vslc/include/vslc.h b/exercises/06/vslc/include/vslc.h index 1e4d9aa..8907763 100644 --- a/exercises/06/vslc/include/vslc.h +++ b/exercises/06/vslc/include/vslc.h @@ -25,6 +25,7 @@ int yyerror ( const char *error ); /* These are defined in the parser generated by bison */ extern int yylineno; extern int yylex ( void ); +extern int yylex_destroy( void ); extern char yytext[]; /* Global state */ diff --git a/exercises/06/vslc/src/generator.c b/exercises/06/vslc/src/generator.c index ec87fb4..63d121e 100644 --- a/exercises/06/vslc/src/generator.c +++ b/exercises/06/vslc/src/generator.c @@ -1,84 +1,556 @@ #include -void generate_stringtable ( void ); -void generate_global_variables ( void ); -void generate_function ( symbol_t *function ); +#define ASM(opcode, args...) puts("\t"#opcode"\t"#args) +#define LABEL(label) printf("_%s:\n", (char*)label) +#define NO_REG_RECORD 6 +#define NO_CALLE_SAVED_REG 10 + +/**Generate table of strings in a rodata section. */ +void generate_stringtable ( void ); +/**Declare global variables in a bss section */ +void generate_global_variables ( void ); +/**Generate function entry code + * @param function symbol table entry of function */ +void generate_function ( symbol_t *function ); +/**Generate code for a node in the AST, to be called recursively from + * generate_function + * @param node root node of current code block */ static void generate_node ( node_t *node ); +/**Initializes program (already implemented) */ void generate_main ( symbol_t *first ); -static void generate_function_call ( node_t *call ); #define MIN(a,b) (((a)<(b)) ? (a):(b)) -static const char *record[6] = { +static const char *record[NO_REG_RECORD] = { "%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9" }; -static symbol_t *current_function = NULL; +static const char *calle_saved_reg[NO_CALLE_SAVED_REG] = { + "%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%rsp", "%r8", "%r9", "%r10", "%r11" +}; +// Helper funcs for generating different nodes + +/** + * Generates assembly for printing + * + * @param node print statement node with children to print + */ +static void generate_print(node_t* node); + +/** + * Generate identfier for a variable in memory + * + * @param node identifier we want the addres of + */ +static void generate_var_ident(node_t *node); + +/** + * Main function to calculate and solve the expressions. + * Based on a stack machine. Result is stored on stack. + * + * @param node root node for expression + */ +static void solve_expressions(node_t *node); + +/** + * Generates a funciton call + * + * @param node root node for function + */ +static void generate_function_call(node_t *node); + +/** + * Generates the return part of a function + * + * @param node node containing the return statement + */ +static void generate_function_return(node_t *node); + +/** + * Used for calculating and evaluating the add/sub/mul/div statements. + * Turns the statement into an expression, adds the result of rhs to lhs + * and stores the value back to the indentifier + * + * @param node node to the statement + * @param operator one of the following +, -, *, / + */ +static void solve_statements(node_t *node, char *operator); + +/** + * Generate assembly to fetch a variable on stack + * + * @param node node to the variable to be fetched + * @param dest where to put the value + */ +static void fetch_variable(node_t *node, const char* dest); + +/** + * Same as fetch_variable, but stores it back to memory. + * + * @param node variable to be stored + * @param src from where should the data come from + */ +static void writeback_variable(node_t *node, char* src); + +// Helper func for fetching all symbols in a table +static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list); void generate_program ( void ) { - - size_t n_globals = tlhash_size(global_names); - symbol_t *global_list[n_globals]; - tlhash_values ( global_names, (void **)&global_list ); - - symbol_t *first_function; - for ( size_t i=0; itype == SYM_FUNCTION ) - { - // Allows the use of main as name to override entry point - if (!strcmp(global_list[i]->name, "main")) { - first_function = global_list[i]; - break; - } - else if (global_list[i]->seq == 0) { - first_function = global_list[i]; - } - } - generate_stringtable(); generate_global_variables(); - generate_main ( first_function ); - for ( size_t i=0; itype == SYM_FUNCTION ) - generate_function ( global_list[i] ); + symbol_t **global_list; + uint64_t no_globals = fetch_symbols(global_names, &global_list); + + bool main_generated = false; + uint64_t seq0_index = -1; + for (uint64_t g = 0; g < no_globals; g++) + { + if (global_list[g]->type != SYM_FUNCTION) + continue; + + // If the name of the function is main + if (!strcmp(global_list[g]->name, "main")) + { + generate_main(global_list[g]); + main_generated = true; + } + + if (!global_list[g]->seq) + seq0_index = g; + } + + // If no main was found, use the first function instead. + // That means the function with seq = 0 + if (!main_generated) + generate_main(global_list[seq0_index]); + + for (uint64_t g = 0; g < no_globals; g++) + { + if (global_list[g]->type == SYM_FUNCTION) + generate_function(global_list[g]); + } + + free(global_list); } - void generate_stringtable ( void ) { - puts ( ".section .rodata" ); - puts ( ".intout: .string \"\%ld \"" ); - puts ( ".strout: .string \"\%s \"" ); - puts ( ".errout: .string \"Wrong number of arguments\"" ); - for ( size_t s=0; stype == SYM_GLOBAL_VAR) + printf(".%s:\n", global_list[g]->name); + } + putchar('\n'); + free(global_list); + +} + +void +generate_function ( symbol_t *function ) +{ + // TODO: Generate code for declaring and entering function, then generate its body + + printf("# func %s(nparams: %ld)\n", function->name, function->nparms); + puts(".text"); + printf(".global _%s\n", function->name); + LABEL(function->name); + ASM(pushq, %rbp); + ASM(movq, %rsp, %rbp); + + // Push params to stack + for (int arg = 0; arg < MIN(NO_REG_RECORD,function->nparms); arg++) + printf("\tpushq\t%s\n", record[arg] ); + + // How many local variables are inside function + uint64_t no_locals = function->locals->size - function->nparms; + + // IF the stack alignment is not 16 bytes, + // add one now as all local var also is 0 + if ((MIN(6,function->nparms) + no_locals) % 2) + ASM(pushq, $0); + + // Make room for the local vars + while(no_locals--) + ASM(pushq, $0); + + // Now the stack ptr should be 16 byte aligned. + + generate_node(function->node); + + putchar('\n'); +} + + +void +generate_node ( node_t *node) +{ + // TODO: Generate code corresponding to node + + // All statements have the same structure. + // [0] is the lhs, needs to be identifier, parser ensures this + // [1] is thr rhs + switch (node->type) { - if ( syms[n]->type == SYM_GLOBAL_VAR ) - printf ( "._%s: .zero 8\n", syms[n]->name ); + case ASSIGNMENT_STATEMENT: + solve_expressions(node->children[1]); + ASM(popq, %rax); + writeback_variable(node->children[0], "%rax"); + break; + + case ADD_STATEMENT: + // The following way is the naive way of doing an assignment + /* + fetch_variable(node->children[0], "%rax"); + ASM(pushq, %rax); + solve_expressions(node->children[1]); + ASM(popq, %r10); + ASM(popq, %rax); + ASM(addq, %r10, %rax); + writeback_variable(node->children[0], "%rax"); + */ + /* The thing is that add/sub/mul/div assignments + have the same structure as expressions. + We can therefore just say that the assignment is + an expression, but remembering to do the writeback afterwards. + */ + puts("# Add statement"); + solve_statements(node, "+"); + break; + + case SUBTRACT_STATEMENT: + puts("# Subtract statement"); + solve_statements(node, "-"); + break; + + case MULTIPLY_STATEMENT: + puts("# Multiply statement"); + solve_statements(node, "*"); + break; + + case DIVIDE_STATEMENT: + puts("# Divide statement"); + solve_statements(node, "/"); + break; + + case PRINT_STATEMENT: + puts("# Print statement"); + generate_print(node); + break; + + case RETURN_STATEMENT: + puts("# Return statement"); + generate_function_return(node); + break; + + + + case IF_STATEMENT: + case WHILE_STATEMENT: + /* DO NOTHING YET */ + break; + case NULL_STATEMENT: + /* USED IN WHILE/IF */ + break; + + + case DECLARATION_LIST: + /* List of blocks we dont need to traverse */ + break; + + default: + for (int c = 0; c < node->n_children; c++) + generate_node(node->children[c]); + break; + } } +void +generate_print(node_t* node) +{ + // Push rdi and rsi to stack incase there are data in them + //ASM(pushq, %rdi); + //ASM(pushq, %rsi); + for (uint64_t p = 0; p < node->n_children; p++) + { + node_t *curr_print = node->children[p]; + + switch (curr_print->type) + { + case EXPRESSION: + solve_expressions(curr_print); + ASM(popq, %rax); + ASM(movq, $.intout, %rdi); + ASM(movq, %rax, %rsi); + break; + + case STRING_DATA: + ASM(movq, $.strout, %rdi); + printf("\tmovq\t$.STR%03ld, %%rsi\n", *(uint64_t*)curr_print->data); + break; + + case IDENTIFIER_DATA: + ASM(movq, $.intout, %rdi); + fetch_variable(curr_print, "%rsi"); + break; + default: + break; + } + ASM(call, printf); + } + // Adds a newline + ASM(movq, $'\n', %rdi); + ASM(call, putchar); + + //ASM(popq, %rsi); + //ASM(popq, %rdi); +} + + +// This will put the value of var in node in dest +void +fetch_variable(node_t *node, const char* dest) +{ + printf("\tmovq\t"); + generate_var_ident(node); + printf(", %s\t\t# Fetched: %s\n", dest, node->entry->name); +} + +// This will put the value in dest to the var in node +void +writeback_variable(node_t *node, char* src) +{ + printf("\tmovq\t%s,", src); + generate_var_ident(node); + printf("\t\t# Writeback: %s\n", node->entry->name); +} + + +void +generate_var_ident(node_t *node) +{ + symbol_t *ident_sym = node->entry; + switch (ident_sym->type) + { + case SYM_GLOBAL_VAR: + printf("$.%s", ident_sym->name); + break; + + case SYM_PARAMETER: + // If it is a paramter is one of the first 6, seacrch below bp + if (ident_sym->seq < 6) + printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); + else + // This requires that the parameters on + // stack is in reversed order... easier to implement + printf("%ld(%%rbp)", 8 * (ident_sym->seq - 6 + 1 )); + break; + + case SYM_LOCAL_VAR: + printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); + break; + } +} + + +// This should allways push the result to stack +void +solve_expressions(node_t *node) +{ + if (node->data) + { // Check if the expression is a function call + bool is_function_call = !strcmp(node->data, "function_call"); + if (is_function_call) + { + generate_function_call(node); + return; + } + } + + + switch (node->n_children) + { + case 0: + switch (node->type) + { + case IDENTIFIER_DATA: + fetch_variable(node, "%rax"); + ASM(pushq, %rax); + break; + case NUMBER_DATA: + printf("\tmovq\t$%ld,%%rax\n",*(int64_t*)node->data); + ASM(pushq, %rax); + break; + } + break; + case 1: + solve_expressions(node->children[0]); + ASM(popq, %rax); + + switch (*(char*)node->data) + { + case '-': + ASM(negq, %rax); + break; + case '~': + ASM(notq, %rax); + break; + } + ASM(pushq, %rax); + + break; + case 2: + + // First fetch lhs of expr and then rhs + // Push results on stack + for (int i = 0; i < 2; i++) + solve_expressions(node->children[i]); + + + // Put rhs in %r10 + ASM(popq, %r10); + // put lhs in %rax + ASM(popq, %rax); + + switch (*(char*)node->data) + { + /* Assignments */ + case '|': ASM(orq, %r10, %rax); break; // Bitwise or of %rax and %r10 + case '^': ASM(xorq, %r10, %rax); break; // Bitwise xor of %rax and %r10 + case '&': ASM(andq, %r10, %rax); break; // Bitwise and of %rax and %r10 + case '+': ASM(addq, %r10, %rax); break; // Add %rax and %r10 + case '-': ASM(subq, %r10, %rax); break; // Subtract %r10 from %rax + case '*': ASM(imulq, %r10); break; // Mulitply %rax with %r10 + case '/': + ASM(cqto); // Convert rax to octaword, %rdx:%rax + ASM(idivq, %r10); // Divide %rdx:%rax by %r10 + break; + } + + // Push result to stack. + ASM(pushq, %rax); + break; + } +} + +void +generate_function_call(node_t *node) +{ + printf("# Function call\n"); + + node_t *arg_list = node->children[1]; + if (arg_list->n_children) + arg_list = arg_list->children[0]; + + for (int arg = 0; arg < MIN(NO_REG_RECORD, arg_list->n_children); arg++) + { + if (arg_list->children[arg]->type == NUMBER_DATA) + printf("\tmovq\t$%ld, %s\n", + *(int64_t*)arg_list->children[arg]->data, + record[arg] + ); + else + fetch_variable(arg_list->children[arg], record[arg]); + } + + if (arg_list->n_children > NO_REG_RECORD) + { + for (int arg = arg_list->n_children - 1; arg >= NO_REG_RECORD; arg--) + { + if (arg_list->children[arg]->type == NUMBER_DATA) + printf("\tpushq\t$%ld\n", + *(int64_t*)arg_list->children[arg]->data + ); + else + { + printf("\tpushq\t"); + generate_var_ident(arg_list->children[arg]); + putchar('\n'); + } + } + if (arg_list->n_children % 2) + ASM(pushq, $0); + } + + printf("\tcall\t_%s\n", (char*)node->children[0]->data); + ASM(pushq, %rax); + printf("# End of function call\n"); + + /* + for (int reg = 0; reg < NO_CALLE_SAVED_REG; reg++) + printf("\tpushq\t%s \t\t# Pushing %s to stack\n", + calle_saved_reg[reg], + calle_saved_reg[reg] + ); + + for (int reg = NO_CALLE_SAVED_REG; reg > 0; reg--) + printf("\tpopq\t%s \t\t# Poping %s from stack\n", + calle_saved_reg[reg], + calle_saved_reg[reg] + );*/ + +} + +void +generate_function_return(node_t *node) +{ + solve_expressions(node->children[0]); + ASM(popq, %rax); + ASM(leave); + ASM(ret); +} + +void +solve_statements(node_t *node, char *operator) +{ + node->type = EXPRESSION; + node->data = strdup(operator); + + solve_expressions(node); + + ASM(popq, %rax); + writeback_variable(node->children[0], "%rax"); +} + +/**Generates the main function with argument parsing and calling of our + * main function (first, if no function is named main) + * @param first Symbol table entry of our main function */ void generate_main ( symbol_t *first ) { + puts("###### Entry point for GAS #####"); puts ( ".globl main" ); puts ( ".section .text" ); puts ( "main:" ); @@ -87,368 +559,54 @@ generate_main ( symbol_t *first ) printf ( "\tsubq\t$1,%%rdi\n" ); printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms ); - printf ( "\tjne\tABORT\n" ); + printf ( "\tjne \tABORT\n" ); printf ( "\tcmpq\t$0,%%rdi\n" ); - printf ( "\tjz\tSKIP_ARGS\n" ); + printf ( "\tjz \tSKIP_ARGS\n" ); printf ( "\tmovq\t%%rdi,%%rcx\n" ); - printf ( "\taddq $%zu, %%rsi\n", 8*first->nparms ); + printf ( "\taddq\t$%zu, %%rsi\n", 8*first->nparms ); printf ( "PARSE_ARGV:\n" ); - printf ( "\tpushq %%rcx\n" ); - printf ( "\tpushq %%rsi\n" ); + printf ( "\tpushq\t%%rcx\n" ); + printf ( "\tpushq\t%%rsi\n" ); printf ( "\tmovq\t(%%rsi),%%rdi\n" ); printf ( "\tmovq\t$0,%%rsi\n" ); printf ( "\tmovq\t$10,%%rdx\n" ); printf ( "\tcall\tstrtol\n" ); - /* Now a new argument is an integer in rax */ +/* Now a new argument is an integer in rax */ - printf ( "\tpopq %%rsi\n" ); - printf ( "\tpopq %%rcx\n" ); - printf ( "\tpushq %%rax\n" ); - printf ( "\tsubq $8, %%rsi\n" ); - printf ( "\tloop PARSE_ARGV\n" ); + printf ( "\tpopq\t%%rsi\n" ); + printf ( "\tpopq\t%%rcx\n" ); + printf ( "\tpushq\t%%rax\n" ); + printf ( "\tsubq\t$8, %%rsi\n" ); + printf ( "\tloop\tPARSE_ARGV\n" ); /* Now the arguments are in order on stack */ - for ( int arg=0; argnparms); arg++ ) + for (int arg = 0; arg < MIN(6,first->nparms); arg++) printf ( "\tpopq\t%s\n", record[arg] ); printf ( "SKIP_ARGS:\n" ); printf ( "\tcall\t_%s\n", first->name ); - printf ( "\tjmp\tEND\n" ); + printf ( "\tjmp \tEND\n" ); printf ( "ABORT:\n" ); printf ( "\tmovq\t$.errout, %%rdi\n" ); - printf ( "\tcall puts\n" ); + printf ( "\tcall\tputs\n" ); printf ( "END:\n" ); - puts ( "\tmovq %rax, %rdi" ); - puts ( "\tcall exit" ); + puts ( "\tmovq \t%rax, %rdi" ); + puts ( "\tcall \texit" ); + puts("###### FUNCTIONS FROM VSL BELOW #####"); + putchar('\n'); } - -static void -generate_identifier ( node_t *ident ) +static uint64_t +fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list) { - symbol_t *symbol = ident->entry; - int64_t argument_offset; - switch ( symbol->type ) - { - case SYM_GLOBAL_VAR: - /* Global variables called by name */ - printf ( "._%s", symbol->name ); - break; - case SYM_PARAMETER: - if ( symbol->seq > 5 ) - /* Extra parameters pushed in decreasing order */ - printf ( "%ld(%%rbp)", 8+8*(symbol->seq-5) ); - else - /* First six parameters directly after base poiter */ - printf ( "%ld(%%rbp)", -8*(symbol->seq+1) ); - break; - case SYM_LOCAL_VAR: - /* Local variables places after parameters in stack */ - argument_offset = -8*MIN(6,current_function->nparms); - printf ( "%ld(%%rbp)", -8*(symbol->seq+1) + argument_offset ); - break; - } -} - - -static void -generate_expression ( node_t *expr ) -{ - if ( expr->type == IDENTIFIER_DATA ) - { - printf ( "\tmovq\t" ); - generate_identifier ( expr ); - printf ( ", %%rax\n" ); - } - else if ( expr->type == NUMBER_DATA ) - { - printf ( "\tmovq\t$%ld, %%rax\n", *(int64_t *)expr->data ); - } - else if ( expr->n_children == 1 ) - { - switch ( *((char*)(expr->data)) ) - { - case '-': - generate_expression ( expr->children[0] ); - printf ( "\tnegq\t%%rax\n" ); - break; - case '~': - generate_expression ( expr->children[0] ); - printf ( "\tnotq\t%%rax\n" ); - break; - } - } - else if ( expr->n_children == 2 ) - { - if ( expr->data != NULL ) - { - switch ( *((char *)expr->data) ) - { - case '+': - generate_expression ( expr->children[0] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[1] ); - printf ( "\taddq\t%%rax, (%%rsp)\n" ); - printf ( "\tpopq\t%%rax\n" ); - break; - case '-': - generate_expression ( expr->children[0] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[1] ); - printf ( "\tsubq\t%%rax, (%%rsp)\n" ); - printf ( "\tpopq\t%%rax\n" ); - break; - case '*': - printf ( "\tpushq\t%%rdx\n" ); - generate_expression ( expr->children[1] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[0] ); - printf ( "\tmulq\t(%%rsp)\n" ); - printf ( "\tpopq\t%%rdx\n" ); - printf ( "\tpopq\t%%rdx\n" ); - break; - case '/': - printf ( "\tpushq\t%%rdx\n" ); - generate_expression ( expr->children[1] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[0] ); - printf ( "\tcqo\n" ); - printf ( "\tidivq\t(%%rsp)\n" ); - printf ( "\tpopq\t%%rdx\n" ); - printf ( "\tpopq\t%%rdx\n" ); - break; - case '|': - generate_expression ( expr->children[0] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[1] ); - printf ( "\torq\t%%rax, (%%rsp)\n" ); - printf ( "\tpopq\t%%rax\n" ); - break; - case '^': - generate_expression ( expr->children[0] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[1] ); - printf ( "\txorq\t%%rax, (%%rsp)\n" ); - printf ( "\tpopq\t%%rax\n" ); - break; - case '&': - generate_expression ( expr->children[0] ); - printf ( "\tpushq\t%%rax\n" ); - generate_expression ( expr->children[1] ); - printf ( "\tandq\t%%rax, (%%rsp)\n" ); - printf ( "\tpopq\t%%rax\n" ); - break; - } - } else { - generate_function_call ( expr ); - } - } -} - - -static void -generate_function_call ( node_t *call ) -{ - /* Check function call */ - size_t n_arguments = 0; - if ( call->children[1] != NULL ) - n_arguments = call->children[1]->n_children; - symbol_t *function = call->children[0]->entry; - if ( n_arguments != function->nparms ) - { - fprintf ( stderr, - "Function %s has %zu parameters, called with %zu arguments\n", - (char *) call->children[0]->data, - (size_t) call->children[0]->entry->nparms, - n_arguments - ); - exit ( EXIT_FAILURE ); - } - - /* Generate function call: */ - - /* Push all the arguments */ - node_t *arglist = call->children[1]; - if ( arglist != NULL ) - { - for ( size_t p=arglist->n_children; p>0; p-- ) - { - generate_expression ( arglist->children[(p-1)] ); - if ( (p-1)>5 ) - printf ( "\tpushq\t%%rax\n" ); - else - printf ( "\tmovq\t%%rax, %s\n", record[(p-1)] ); - } - } - /* Call the function */ - printf ( "\tcall _%s\n", (char *)call->children[0]->data ); -} - - -static void -generate_assignment_statement ( node_t *statement ) -{ - switch ( statement->type ) - { - case ASSIGNMENT_STATEMENT: - generate_expression ( statement->children[1] ); - printf ( "\tmovq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - break; - case ADD_STATEMENT: - generate_expression ( statement->children[1] ); - printf ( "\taddq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - break; - case SUBTRACT_STATEMENT: - generate_expression ( statement->children[1] ); - printf ( "\tsubq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - break; - case MULTIPLY_STATEMENT: - generate_expression ( statement->children[1] ); - printf ( "\tmulq\t " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - printf ( "\tmovq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - break; - case DIVIDE_STATEMENT: - generate_expression ( statement->children[1] ); - printf ( "\txchgq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - printf ( "\tcqo\n" ); - printf ( "\tidivq\t" ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - printf ( "\txchgq\t%%rax, " ); - generate_identifier ( statement->children[0] ); - printf ( "\n" ); - break; - } -} - - -static void -generate_print_statement ( node_t *statement ) -{ - for ( size_t i=0; in_children; i++ ) - { - node_t *item = statement->children[i]; - switch ( item->type ) - { - case STRING_DATA: - printf ( "\tmovq\t$.STR%zu, %%rsi\n", *((size_t *)item->data) ); - printf ( "\tmovq\t$.strout, %%rdi\n" ); - break; - case NUMBER_DATA: - printf ("\tmovq\t$%ld, %%rsi\n", *((int64_t *)item->data) ); - printf ( "\tmovq\t$.intout, %%rdi\n" ); - break; - case IDENTIFIER_DATA: - printf ( "\tmovq\t" ); - generate_identifier ( item ); - printf ( ", %%rsi\n" ); - printf ( "\tmovq\t$.intout, %%rdi\n" ); - break; - case EXPRESSION: - generate_expression ( item ); - printf ( "\tmovq\t%%rax, %%rsi\n" ); - printf ( "\tmovq\t$.intout, %%rdi\n" ); - break; - } - puts ( "\tmovq\t$0, %rax\n" // Clear rax to indicate not to use SSE instructions - "\tcall\tprintf" ); - } - printf ( "\tmovq\t$0x0A, %%rdi\n" ); // Finish statement by inserting a newline - puts ( "\tcall\tputchar" ); -} - - -static void -generate_if_statement ( node_t *statement ) -{ - // TODO: Handle if statement - // statement->nodetype = IF_STATEMENT -} - - -static void -generate_while_statement ( node_t *statement ) -{ - // TODO: Handle while statement - // statement->nodetype = WHILE_STATEMENT -} - - -static void -generate_node ( node_t *node ) -{ - switch (node->type) - { - case PRINT_STATEMENT: - generate_print_statement ( node ); - break; - case ASSIGNMENT_STATEMENT: - case ADD_STATEMENT: - case SUBTRACT_STATEMENT: - case MULTIPLY_STATEMENT: - case DIVIDE_STATEMENT: - generate_assignment_statement ( node ); - break; - case RETURN_STATEMENT: - generate_expression ( node->children[0] ); - printf ( "\tleave\n" ); - printf ( "\tret\n" ); - break; - case IF_STATEMENT: - // TODO: Implement - break; - case WHILE_STATEMENT: - // TODO: Implement - break; - case NULL_STATEMENT: - // TODO: Implement - break; - default: - for ( size_t i=0; in_children; i++ ) - generate_node ( node->children[i] ); - break; - } -} - - -void -generate_function ( symbol_t *function ) -{ - current_function = function; - printf ( "_%s:\n", function->name ); - puts ( "\tpushq %rbp" ); - puts ( "\tmovq %rsp, %rbp" ); - - /* Save arguments in local stack frame */ - for ( size_t arg=1; arg<=MIN(6,function->nparms); arg++ ) - printf ( "\tpushq\t%s\n", record[arg-1] ); - /* Make space for locals in local stack frame */ - size_t local_vars = tlhash_size(function->locals) - function->nparms; - if ( local_vars > 0 ) - printf ( "\tsubq $%zu, %%rsp\n", 8*local_vars ); - if ( (tlhash_size(function->locals)&1) == 1 ) - puts ( "\tpushq\t$0 /* Stack padding for 16-byte alignment */" ); - generate_node ( function->node ); - printf( "\tmovq\t%%rbp, %%rsp\n" // movq %rbp, %rsp // restore stack pointer - "\tmovq\t$0, %%rax\n" // movq $0, %rax // return 0 if nothing else - "\tpopq\t%%rbp\n" // popq %rbp // restore base pointer - "\tret\n"); // ret - current_function = NULL; + uint64_t no_symbols = tlhash_size(symbol_table); + *symbol_list = malloc(no_symbols * sizeof(symbol_t)); + tlhash_values(symbol_table, (void **)*symbol_list ); + + return no_symbols; } diff --git a/exercises/06/vslc/src/ir.c b/exercises/06/vslc/src/ir.c index ae5ea61..33243ab 100644 --- a/exercises/06/vslc/src/ir.c +++ b/exercises/06/vslc/src/ir.c @@ -1,336 +1,602 @@ #include +#define ERRPRT(format, args...) {fprintf(stderr, "[ERROR] "); fprintf(stderr ,format, ##args);} + + // Externally visible, for the generator extern tlhash_t *global_names; extern char **string_list; extern size_t n_string_list, stringc; +// Functions from the skeleton -// Implementation choices, only relevant internally -static void find_globals ( void ); +static uint64_t find_globals ( void ); static void bind_names ( symbol_t *function, node_t *root ); -static void print_symbols ( tlhash_t *table ); -static void destroy_symtab ( void ); -// Internal details of name resolution -static size_t n_scopes = 1, scope_depth = 0; -static tlhash_t **scopes = NULL; +// Helper functions, see description in the definition -/* External interface */ +static void print_global_tree(symbol_t* global); +static void print_string_list(void); +static void destroy_global(symbol_t* global); +static void push_scope(void); +static void pop_scope(void); +static void insert_symbol(tlhash_t *hash_table, symbol_t* symbol); +static void insert_local_to_scope(symbol_t *local); +static void insert_local_to_func(symbol_t *function, symbol_t *root); +static void insert_local_var(symbol_t *function, node_t *root); +static void collect_string(node_t *root); +static symbol_t* lookup_var(symbol_t *function, char* var); +// Local "global" variables +static const char *symbol_names[4] = { + "GLOBAL_VAR", + "FUNCTION", + "PARAMETER", + "LOCAL_VAR" +}; +static uint64_t no_scopes, cur_scope_depth; +static tlhash_t **scopes; + + +/** + * Gather information and create a symbol table. + * + * Used in vslc.c + */ void create_symbol_table ( void ) { - find_globals(); - size_t n_globals = tlhash_size ( global_names ); - symbol_t *global_list[n_globals]; - tlhash_values ( global_names, (void **)&global_list ); - for ( size_t i=0; itype == SYM_FUNCTION ) - bind_names ( global_list[i], global_list[i]->node ); + + // Initialize string array + n_string_list = DEFAULT_STRING_LIST_SIZE; + string_list = malloc(n_string_list * sizeof(char*)); + stringc = 0; + + // Initialize scope array + no_scopes = DEFAULT_NO_SCOPES; + scopes = malloc(no_scopes * sizeof(tlhash_t)); + cur_scope_depth = 0; + + // Traverse the root node for globals + uint64_t no_globals = find_globals(); + + // Prepare a temp list of globals and fetch all globals + symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); + tlhash_values(global_names, (void **)global_list ); + + /* Iterate over the temporary list, bind names in each function */ + for (uint64_t g = 0; g < no_globals; g++ ) + { + if (global_list[g]->type == SYM_FUNCTION) + bind_names(global_list[g], global_list[g]->node); + } + // Free the temp list + free(global_list); } +/** + * Prints the symbol table and the string array + * + * Used in vslc.c + */ void print_symbol_table ( void ) { - print_symbols ( global_names ); + /* Get the number of symbols, size up a temporary list and fill it */ + uint64_t no_globals = tlhash_size(global_names); + symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); + tlhash_values(global_names, (void **)global_list ); + + /* Iterate over the temporary list, printing entries */ + for (uint64_t g = 0; g < no_globals; g++ ) + // Print the tree structure for each global + print_global_tree(global_list[g]); + + free(global_list); + + // Print strings + print_string_list(); } +/** + * Prins the tree of a global + * + * @param global pointer to the global to be printed + */ +static void +print_global_tree(symbol_t* global) +{ + // Check if null ptr + if (!global) + return; + + // Print global root + printf("─%s: %-16s [nparams=%2ld, seq=%2ld, node=%p]\n", + symbol_names[global->type], + global->name, + global->nparms, + global->seq, + global->node + ); + + // If the global does not have params or locals, return + if (!global->nparms && !global->locals) + {putchar('\n');return;} + + // Need to fetch the whole size, since nparams + // only count the params, not all locals + uint64_t no_locals = tlhash_size(global->locals); + symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t)); + tlhash_values(global->locals, (void **)locals_list ); + // Go through all locals + for (int l = 0; l < no_locals; l++) + { // Do some simple sorting, so seq num is in order + for (int ll = 0; ll < no_locals; ll++) + { + if (locals_list[ll]->seq == l) + { + printf(" %s─[%s]: %-22s\t[seq=%2ld, node=%p]\n", + (l < (no_locals - 1)) ? "├" : "└", + symbol_names[locals_list[ll]->type], + locals_list[ll]->name, + locals_list[ll]->seq, + locals_list[ll]->node + ); + break; + } + } + } + putchar('\n'); + free(locals_list); +} + + +/** + * Prints the array of strings + * + */ +static void +print_string_list(void) +{ // Print out all the collected strings + printf("─STRINGS [%ld]\n", stringc); + for (uint64_t i = 0; i < stringc; i++) + printf(" %s─[%ld]: %s\n", + (i < (stringc - 1)) ? "├" : "└", + i, + string_list[i] + ); +} + + +/** + * Destroys all the dynamicly allocated memory and all the hash tables. + * Frees up the array of strings as well. + * + * Used in vslc.c + */ void destroy_symbol_table ( void ) { - destroy_symtab(); -} + // FREE STRINGS + // Free all strings that are kept in the array + for (uint64_t c = 0; c < stringc; c++) + free(string_list[c]); + // Free the actual list + free(string_list); -/* Internal matters */ + // FREE SCOPES + // At the end of program, all scopes have to be popped + // Therefore only free the list + free(scopes); - -static void -print_symbols ( tlhash_t *table ) -{ - if ( table == NULL ) + // FREE GLOBAL NAMES + if (!global_names) return; - size_t n_entries = tlhash_size(table); - symbol_t *entry_list[n_entries]; - tlhash_values ( table, (void **)&entry_list ); - for ( size_t e=0; etype ) - { - case SYM_FUNCTION: - fprintf ( stderr, "function: %s\n", entry_list[e]->name ); - if ( entry_list[e]->type == SYM_FUNCTION ) - print_symbols ( entry_list[e]->locals ); - break; - case SYM_GLOBAL_VAR: - fprintf ( stderr, "global var: %s\n", entry_list[e]->name ); - break; - case SYM_PARAMETER: - fprintf ( stderr, "parameter: %s\n", entry_list[e]->name ); - break; - case SYM_LOCAL_VAR: - fprintf ( stderr, "local var: %s\n", entry_list[e]->name ); - break; - default: - /* This should never happen if all symbols have correct type */ - fprintf ( stderr, - "** Unknown symbol: %s\n", entry_list[e]->name - ); - break; - } - } + + // Fetch list of globals + uint64_t no_globals = tlhash_size(global_names); + symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); + tlhash_values(global_names, (void **)global_list ); + + // Destroy all global elements + for (uint64_t g = 0; g < no_globals; g++) + destroy_global(global_list[g]); + + // Destory the global hash table + tlhash_finalize(global_names); + // Free the global hash table + free(global_names); + + // Free the temp list + free(global_list); } -static void -add_global ( symbol_t *symbol ) +/** + * Destroys the supplied global symbol by + * finalizing each of the local tables + * + * @param global pointer to the global symbol to be destroyed + */ +static void +destroy_global(symbol_t* global) { - tlhash_insert ( - global_names, symbol->name, strlen(symbol->name), symbol - ); + if (!global) + return; + if (!global->locals) + { + free(global); + return; + } + + uint64_t no_locals = tlhash_size(global->locals); + symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t)); + tlhash_values(global->locals, (void **)locals_list ); + + for (int l = 0; l < no_locals; l++) + free(locals_list[l]); + + tlhash_finalize(global->locals); + free(global->locals); + free(global); + + free(locals_list); } -static void +/** + * Goes trough the root node and finds all global variables and functions + * + * @return Returns the number of globals found (functions + variables) + */ +static uint64_t find_globals ( void ) { - global_names = malloc ( sizeof(tlhash_t) ); - tlhash_init ( global_names, 32 ); - string_list = malloc ( n_string_list * sizeof(char * ) ); - size_t n_functions = 0; + tlhash_init(global_names = malloc(sizeof(tlhash_t)), GLOBAL_BUCKET_SIZE); - node_t *global_list = root->children[0]; - for ( uint64_t g=0; gn_children; g++ ) + uint64_t no_functions = 0, no_global_vars = 0; + node_t *global_list = root; + + // Check if not nullptr + if (!global_list) + return 0; + + symbol_t* global_symbol; + for (uint64_t global_i = 0; global_i < global_list->n_children; global_i++) { - node_t *global = global_list->children[g], *namelist; - symbol_t *symbol; - switch ( global->type ) + node_t *current_global = global_list->children[global_i]; + switch (current_global->type) { - case FUNCTION: - symbol = malloc ( sizeof(symbol_t) ); - *symbol = (symbol_t) { - .type = SYM_FUNCTION, - .name = global->children[0]->data, - .node = global->children[2], - .seq = n_functions, - .nparms = 0, - .locals = malloc ( sizeof(tlhash_t) ) - }; - n_functions++; + case VARIABLE_LIST: - tlhash_init ( symbol->locals, 32 ); - if ( global->children[1] != NULL ) - { - symbol->nparms = global->children[1]->n_children; - for ( int p=0; pnparms; p++ ) - { - node_t *param = global->children[1]->children[p]; - symbol_t *psym = malloc ( sizeof(symbol_t) ); - *psym = (symbol_t) { - .type = SYM_PARAMETER, - .name = param->data, - .node = NULL, - .seq = p, - .nparms = 0, - .locals = NULL - }; - tlhash_insert ( - symbol->locals, psym->name, strlen(psym->name), psym - ); - } - } - add_global ( symbol ); - break; - case DECLARATION: - namelist = global->children[0]; - for ( uint64_t d=0; dn_children; d++ ) - { - symbol = malloc ( sizeof(symbol_t) ); - *symbol = (symbol_t) { - .type = SYM_GLOBAL_VAR, - .name = namelist->children[d]->data, - .node = NULL, - .seq = 0, - .nparms = 0, - .locals = NULL - }; - add_global(symbol); - } - break; - } - } -} - - -static void -push_scope ( void ) -{ - if ( scopes == NULL ) - scopes = malloc ( n_scopes * sizeof(tlhash_t *) ); - tlhash_t *new_scope = malloc ( sizeof(tlhash_t) ); - tlhash_init ( new_scope, 32 ); - scopes[scope_depth] = new_scope; - - scope_depth += 1; - if ( scope_depth >= n_scopes ) - { - n_scopes *= 2; - scopes = realloc ( scopes, n_scopes*sizeof(tlhash_t **) ); - } - -} - - -static void -add_local ( symbol_t *local ) -{ - tlhash_insert ( - scopes[scope_depth-1],local->name,strlen(local->name),local - ); -} - - -static symbol_t * -lookup_local ( char *name ) -{ - symbol_t *result = NULL; - size_t depth = scope_depth; - while ( result == NULL && depth > 0 ) - { - depth -= 1; - tlhash_lookup ( scopes[depth], name, strlen(name), (void **)&result ); - } - return result; -} - - -static void -pop_scope ( void ) -{ - scope_depth -= 1; - tlhash_finalize ( scopes[scope_depth] ); - free ( scopes[scope_depth] ); - scopes[scope_depth] = NULL; -} - - -static void -add_string ( node_t *string ) -{ - string_list[stringc] = string->data; - string->data = malloc ( sizeof(size_t) ); - *((size_t *)string->data) = stringc; - stringc++; - if ( stringc >= n_string_list ) - { - n_string_list *= 2; - string_list = realloc ( string_list, n_string_list * sizeof(char *) ); - } - -} - - -static void -bind_names ( symbol_t *function, node_t *root ) -{ - if ( root == NULL ) - return; - else switch ( root->type ) - { - node_t *namelist; - symbol_t *entry; - - case BLOCK: - push_scope(); - for ( size_t c=0; cn_children; c++ ) - bind_names ( function, root->children[c] ); - pop_scope(); - break; - - case DECLARATION: - namelist = root->children[0]; - for ( uint64_t d=0; dn_children; d++ ) + // Go through the variable list and get all the global variables + for (uint64_t var_i = 0; var_i < current_global->n_children; var_i++) { - node_t *varname = namelist->children[d]; - size_t local_num = - tlhash_size(function->locals) - function->nparms; - symbol_t *symbol = malloc ( sizeof(symbol_t) ); - *symbol = (symbol_t) { - .type = SYM_LOCAL_VAR, - .name = varname->data, - .node = NULL, - .seq = local_num, + global_symbol = malloc(sizeof(symbol_t)); + *global_symbol = (symbol_t){ + .type = SYM_GLOBAL_VAR, + .name = current_global->children[var_i]->data, + .node = current_global->children[var_i], + .seq = 0, .nparms = 0, .locals = NULL }; - tlhash_insert ( - function->locals, &local_num, sizeof(size_t), symbol - ); - add_local ( symbol ); + insert_symbol(global_names, global_symbol); + no_global_vars++; } break; + case FUNCTION: + node_t *function = current_global; - case IDENTIFIER_DATA: - entry = lookup_local ( root->data ); - if ( entry == NULL ) - tlhash_lookup ( - function->locals, root->data, - strlen(root->data), (void**)&entry - ); - if ( entry == NULL ) - tlhash_lookup ( - global_names,root->data,strlen(root->data),(void**)&entry - ); - if ( entry == NULL ) + // Function node allways have the same structure, + // [0] are the identifier + // [1] are the variable list, within a paramerer_list + // [2] are the actual block + if (!function->children[0]) + break; + + // Create the function symbol + global_symbol = malloc(sizeof(symbol_t)); + *global_symbol = (symbol_t){ + .type = SYM_FUNCTION, + .name = current_global->children[0]->data, + .node = current_global->children[2], + .seq = no_functions++, + .nparms = 0, + .locals = malloc(sizeof(tlhash_t)) + }; + + // Initialize the local variable table + tlhash_init(global_symbol->locals, LOCAL_BUCKET_SIZE); + + // Insert the pointer to the newly created symbol + insert_symbol(global_names, global_symbol); + + // If there are no parameters in function, break. + if (!current_global->children[1]->n_children) + break; + + + // Find all params and insert into hash table in global_symbol + symbol_t *param_sym; + node_t *param_list = current_global->children[1]->children[0]; + global_symbol->nparms = param_list->n_children; + + for (uint64_t param_i = 0; param_i < param_list->n_children; param_i++) { - fprintf ( stderr, "Identifier '%s' does not exist in scope\n", - (char *)root->data - ); - exit ( EXIT_FAILURE ); + param_sym = malloc(sizeof(symbol_t)); + *param_sym = (symbol_t){ + .type = SYM_PARAMETER, + .name = param_list->children[param_i]->data, + .node = param_list->children[param_i], + .seq = param_i, + .nparms = 0, + .locals = NULL + }; + + insert_symbol(global_symbol->locals, param_sym); } - root->entry = entry; break; - - case STRING_DATA: - add_string ( root ); - break; - - default: - for ( size_t c=0; cn_children; c++ ) - bind_names ( function, root->children[c] ); - break; - } -} - - -void -destroy_symtab ( void ) -{ - for ( size_t i=0; ilocals != NULL ) - { - size_t n_locals = tlhash_size ( glob->locals ); - symbol_t *locals[n_locals]; - tlhash_values ( glob->locals, (void **)&locals ); - for ( size_t l=0; llocals ); - free ( glob->locals ); } - free ( glob ); } - tlhash_finalize ( global_names ); - free ( global_names ); - free ( scopes ); + return no_functions + no_global_vars; +} + + +/** + * Inserts a symbol into a hash table, key is defined in the name field in the symbol supplied. + * + * @param hash_table pointer to the hash table the symbol is inserted into + * @param symbol pointer to the symbol to be inserted + */ +void +insert_symbol(tlhash_t *hash_table, symbol_t* symbol) +{ + tlhash_insert(hash_table, symbol->name, strlen(symbol->name), symbol); +} + + +/** + * @brief Traverse a node root, and find all variables and strings + * + * @param function pointer to the current function + * @param root pointer to the root node + */ +static void +bind_names ( symbol_t *function, node_t *root ) +{ // NULL check + if (!function) + return; + if (!root) + return; + + // Can't declare variables inside switch unless + // it is in a new scope + node_t *declarations; + // We want do top to bottom traverse, so do not + // call recusivly unless we need to go deeper + switch (root->type) + { + // If new BLOCK start, push the scope and recurse from here. + case BLOCK: + push_scope(); + for (uint64_t i = 0; i < root->n_children; i++) + bind_names(function, root->children[i]); + pop_scope(); + break; + + // If DECLARATION_LIST, find all the identifiers + // and insert local into scope and function + case DECLARATION_LIST: + if (!root->children[0]) + break; + + declarations = root->children[0]; + for (uint64_t i = 0; i < declarations->n_children; i++) + // Insert each of the local variables in the declaration + insert_local_var(function, declarations->children[i]); + + break; + + // If IDENTIFIER_DATA, look up the identifier in all the scopes. + // If not found (NULL), crash the compiler with a somewhat helpful message. + case IDENTIFIER_DATA: + if (!root->data) + break; + + if (!(root->entry = lookup_var(function, root->data))) + { + ERRPRT("Could not find %s in scope!\n", (char*)root->data) + exit(EXIT_FAILURE); + } + break; + + // If STRING_DATA, collect the string and point the + // data in the corresponding node to the array index + case STRING_DATA: + collect_string(root); + break; + + // If none of the above, go deeper if possible. + default: + for (uint64_t i = 0; i < root->n_children; i++) + bind_names(function, root->children[i]); + break; + } +} + +/** + * Pushes a new hash table to the scope stack. + * + * Increases the size of the stack if too small. + * + */ +static void +push_scope(void) +{ + // Allocate memory for the hash table and initialize + scopes[cur_scope_depth] = malloc(sizeof(tlhash_t)); + tlhash_init(scopes[cur_scope_depth++], LOCAL_BUCKET_SIZE); + + // Grow the amount of scopes if not enough + if (cur_scope_depth >= no_scopes) + { + no_scopes *= 2; + tlhash_t **new_scopes = realloc(scopes, no_scopes * sizeof(tlhash_t)); + if (!new_scopes) + { + ERRPRT("Could not realloc scopes!\n"); + exit(EXIT_FAILURE); + } + scopes = new_scopes; + } +} + + +/** + * Pops the dynamicy allocated hash table for the current scope depth + * + */ +static void +pop_scope(void) +{ + tlhash_finalize(scopes[--cur_scope_depth]); + free(scopes[cur_scope_depth]); +} + + +/** + * Allocates and inserts a local symbol into + * the scope stack and into the function + * + * @param function pointer to the current function + * @param root pointer to the root node for the symbol + */ +static void +insert_local_var(symbol_t *function, node_t *root) +{ // Null ptr check + if (!root->data) + return; + + // Get the sequence num, is the size + size_t sequence = tlhash_size(function->locals); + + symbol_t *variable = malloc(sizeof(symbol_t)); + *variable = (symbol_t){ + .type = SYM_LOCAL_VAR, + .name = root->data, + .node = root, + .seq = sequence, //! Use sequence as name in var list of function, strictly growing + .nparms = 0, + .locals = NULL + }; + insert_local_to_scope(variable); + insert_local_to_func(function, variable); +} + + +/** + * Inserts a symbol to the top most scope in stack + * + * @param local pointer to the local to be inserted + */ +static void +insert_local_to_scope(symbol_t *local) +{ + insert_symbol(scopes[cur_scope_depth - 1], local); +} + + +/** + * Insert local symbol to the functions table of local variables + * uses the seq num as key as this is strictly growing + * + * @param function pointer to the function to insert the symbol + * @param local pointer to the symbol to be inserted in the table + */ +void +insert_local_to_func(symbol_t *function, symbol_t *local) +{ + tlhash_insert( + function->locals, //! Insert local to the function var table + &local->seq, //! The key is a number, unique, strictly growing + sizeof(local->seq), //! Size of key + local //! The local symbol + ); +} + + +/** + * Collects strings to the string array and + * points the data in the associated node + * to the array position + * + * @param root pointer to the root node of the string + */ +static void +collect_string(node_t *root) +{ // Null ptr check + if (!root->data) + return; + + // Get the string and allocate room for array index of string + string_list[stringc] = root->data; + root->data = malloc(sizeof(size_t)); + // Set the data ptr + *((size_t*)root->data) = stringc++; + + // Grow string array if nessecary + if (stringc >= n_string_list) + { + n_string_list *= 2; + char **new_string_list = realloc(string_list, n_string_list * sizeof(char*)); + if (!new_string_list) + { + ERRPRT("Could not realloc string list!\n"); + exit(EXIT_FAILURE); + } + string_list = new_string_list; + } +} + + +/** + * Looks up a variable identifier in all the scopes. + * Start with the scopes, then the parameters and + * the the globals + * + * @param function pointer to the function + * @param var identifier to the variable + * @return Returns the pointer to the "closest" matched identifier. NULL if not found. + */ +static symbol_t* +lookup_var(symbol_t *function, char* var) +{ + // Symbol to store the stymbol to be found + symbol_t* symbol = NULL; + // Result stores the result of the hash lookups + int result; + + // Try the local scopes first + for (int64_t d = cur_scope_depth - 1; d >= 0; d--) + { + result = tlhash_lookup(scopes[d], var, strlen(var), (void**)&symbol); + if (result == TLHASH_SUCCESS) + return symbol; + } + + // Then move to parameters + result = tlhash_lookup(function->locals, var, strlen(var), (void**)&symbol); + if (result == TLHASH_SUCCESS) + return symbol; + + // Last try global parameters + result = tlhash_lookup(global_names, var, strlen(var), (void**)&symbol); + if (result == TLHASH_SUCCESS) + return symbol; + + // If nothing is found, return NULL + return NULL; } diff --git a/exercises/06/vslc/src/parser.y b/exercises/06/vslc/src/parser.y index f1abcc8..1057d47 100644 --- a/exercises/06/vslc/src/parser.y +++ b/exercises/06/vslc/src/parser.y @@ -1,176 +1,303 @@ %{ #include -#define N0C(n,t,d) do { \ - node_init ( n = malloc(sizeof(node_t)), t, d, 0 ); \ -} while ( false ) -#define N1C(n,t,d,a) do { \ - node_init ( n = malloc(sizeof(node_t)), t, d, 1, a ); \ -} while ( false ) -#define N2C(n,t,d,a,b) do { \ - node_init ( n = malloc(sizeof(node_t)), t, d, 2, a, b ); \ -} while ( false ) -#define N3C(n,t,d,a,b,c) do { \ - node_init ( n = malloc(sizeof(node_t)), t, d, 3, a, b, c ); \ -} while ( false ) - +#define NODE(type, data, n_children, children...) node_init(malloc(sizeof(node_t)), type, data, n_children, ##children) %} -%left '|' -%left '^' -%left '&' +%define api.value.type {node_t} +%token FUNC PRINT RETURN CONTINUE IF THEN ELSE WHILE DO OPENBLOCK CLOSEBLOCK +%token VAR NUMBER IDENTIFIER STRING + +%left '|' '&' '^' %left '+' '-' %left '*' '/' %nonassoc UMINUS %right '~' -%expect 1 +//%expect 1 -%token FUNC PRINT RETURN CONTINUE IF THEN ELSE WHILE DO OPENBLOCK CLOSEBLOCK -%token VAR NUMBER IDENTIFIER STRING +%nonassoc IF THEN +%nonassoc ELSE + +/* Tried fixing vscode complaining about the type for the non-terminals, didn't work +%union { + node_t* node; +} + +%type global_list global +%type statement_list print_list expression_list variable_list argument_list parameter_list declaration_list +%type function statement block +%type assignment_statement return_statement print_statement null_statement if_statement while_statement +%type relation expression declaration print_item identifier number string +*/ %% -program : - global_list { N1C ( root, PROGRAM, NULL, $1 ); } - ; -global_list : - global { N1C ( $$, GLOBAL_LIST, NULL, $1 ); } - | global_list global { N2C ( $$, GLOBAL_LIST, NULL, $1, $2 ); } - ; + +program: + global_list { + root = NODE(PROGRAM, NULL, 1, $1); + } + ; + +global_list: + global { + $$ = NODE(GLOBAL_LIST, NULL, 1, $1); + } + | global_list global { + $$ = NODE(GLOBAL_LIST, NULL, 2, $1, $2); + } + ; + global: - function { N1C ( $$, GLOBAL, NULL, $1 ); } - | declaration { N1C ( $$, GLOBAL, NULL, $1 ); } - ; -statement_list : - statement { N1C ( $$, STATEMENT_LIST, NULL, $1 ); } - | statement_list statement { N2C ( $$, STATEMENT_LIST, NULL, $1, $2 ); } - ; -print_list : - print_item { N1C ( $$, PRINT_LIST, NULL, $1 ); } - | print_list ',' print_item { N2C ( $$, PRINT_LIST, NULL, $1, $3 ); } - ; -expression_list : - expression { N1C ( $$, EXPRESSION_LIST, NULL, $1 ); } - | expression_list ',' expression { N2C($$, EXPRESSION_LIST, NULL, $1, $3); } - ; -variable_list : - identifier { N1C ( $$, VARIABLE_LIST, NULL, $1 ); } - | variable_list ',' identifier { N2C ( $$, VARIABLE_LIST, NULL, $1, $3 ); } - ; -argument_list : - expression_list { N1C ( $$, ARGUMENT_LIST, NULL, $1 ); } - | /* epsilon */ { $$ = NULL; } - ; -parameter_list : - variable_list { N1C ( $$, PARAMETER_LIST, NULL, $1 ); } - | /* epsilon */ { $$ = NULL; } - ; -declaration_list : - declaration { N1C ( $$, DECLARATION_LIST, NULL, $1 ); } - | declaration_list declaration { N2C ($$, DECLARATION_LIST, NULL, $1, $2); } - ; -function : - FUNC identifier '(' parameter_list ')' statement - { N3C ( $$, FUNCTION, NULL, $2, $4, $6 ); } - ; -statement : - assignment_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | return_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | print_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | if_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | while_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | null_statement { N1C ( $$, STATEMENT, NULL, $1 ); } - | block { N1C ( $$, STATEMENT, NULL, $1 ); } - ; -block : - OPENBLOCK declaration_list statement_list CLOSEBLOCK - { N2C ($$, BLOCK, NULL, $2, $3); } - | OPENBLOCK statement_list CLOSEBLOCK { N1C ($$, BLOCK, NULL, $2 ); } - ; -assignment_statement : - identifier ':' '=' expression - { N2C ( $$, ASSIGNMENT_STATEMENT, NULL, $1, $4 ); } - | identifier '+' '=' expression - { N2C ( $$, ADD_STATEMENT, NULL, $1, $4 ); } - | identifier '-' '=' expression - { N2C ( $$, SUBTRACT_STATEMENT, NULL, $1, $4 ); } - | identifier '*' '=' expression - { N2C ( $$, MULTIPLY_STATEMENT, NULL, $1, $4 ); } - | identifier '/' '=' expression - { N2C ( $$, DIVIDE_STATEMENT, NULL, $1, $4 ); } - ; -return_statement : - RETURN expression - { N1C ( $$, RETURN_STATEMENT, NULL, $2 ); } - ; -print_statement : - PRINT print_list - { N1C ( $$, PRINT_STATEMENT, NULL, $2 ); } - ; -null_statement : - CONTINUE - { N0C ( $$, NULL_STATEMENT, NULL ); } - ; -if_statement : - IF relation THEN statement - { N2C ( $$, IF_STATEMENT, NULL, $2, $4 ); } - | IF relation THEN statement ELSE statement - { N3C ( $$, IF_STATEMENT, NULL, $2, $4, $6 ); } - ; -while_statement : - WHILE relation DO statement - { N2C ( $$, WHILE_STATEMENT, NULL, $2, $4 ); } - ; + function { + $$ = NODE(GLOBAL, NULL, 1, $1); + } + | declaration { + $$ = NODE(GLOBAL, NULL, 1, $1); + } + ; + +statement_list: + statement { + $$ = NODE(STATEMENT_LIST, NULL, 1, $1); + } + | statement_list statement { + $$ = NODE(STATEMENT_LIST, NULL, 2, $1, $2); + } + ; + +print_list: + print_item { + $$ = NODE(PRINT_LIST, NULL, 1, $1); + } + | print_list ',' print_item { + $$ = NODE(PRINT_LIST, NULL, 2, $1, $3); + } + ; + +expression_list: + expression { + $$ = NODE(EXPRESSION_LIST, NULL, 1, $1); + } + | expression_list ',' expression { + $$ = NODE(EXPRESSION_LIST, NULL, 2, $1, $3); + } + ; + +variable_list: + identifier { + $$ = NODE(VARIABLE_LIST, NULL, 1, $1); + } + | variable_list ',' identifier { + $$ = NODE(VARIABLE_LIST, NULL, 2, $1, $3); + } + ; + +argument_list: + expression_list { + $$ = NODE(ARGUMENT_LIST, NULL, 1, $1); + } + | /* epsilon */ { + $$ = NODE(ARGUMENT_LIST, NULL, 0); + } + ; + +parameter_list: + variable_list { + $$ = NODE(PARAMETER_LIST, NULL, 1, $1); + } + | /* epsilon */ { + $$ = NODE(PARAMETER_LIST, NULL, 0); + } + ; + +declaration_list: + declaration { + $$ = NODE(DECLARATION_LIST, NULL, 1, $1); + } + | declaration_list declaration { + $$ = NODE(DECLARATION_LIST, NULL, 2, $1, $2); + } + ; + +function: + FUNC identifier '(' parameter_list ')' statement { + $$ = NODE(FUNCTION, NULL, 3, $2, $4, $6); + } + ; + +statement: + assignment_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | return_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | print_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | if_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | while_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | null_statement { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + | block { + $$ = NODE(STATEMENT, NULL, 1, $1); + } + ; + +block: + OPENBLOCK declaration_list statement_list CLOSEBLOCK { + $$ = NODE(BLOCK, NULL, 2, $2, $3); + } + | OPENBLOCK statement_list CLOSEBLOCK { + $$ = NODE(BLOCK, NULL, 1, $2); + } + ; + +assignment_statement: + identifier ':' '=' expression { + $$ = NODE(ASSIGNMENT_STATEMENT, NULL, 2, $1, $4); + } + | identifier '+' '=' expression { + $$ = NODE(ADD_STATEMENT, NULL, 2, $1, $4); + } + | identifier '-' '=' expression { + $$ = NODE(SUBTRACT_STATEMENT, NULL, 2, $1, $4); + } + | identifier '*' '=' expression { + $$ = NODE(MULTIPLY_STATEMENT, NULL, 2, $1, $4); + } + | identifier '/' '=' expression { + $$ = NODE(DIVIDE_STATEMENT, NULL, 2, $1, $4); + } + ; + +return_statement: + RETURN expression { + $$ = NODE(RETURN_STATEMENT, NULL, 1, $2); + } + ; + +print_statement: + PRINT print_list { + $$ = NODE(PRINT_STATEMENT, NULL, 1, $2); + } + ; + +null_statement: + CONTINUE { + $$ = NODE(NULL_STATEMENT, NULL, 0); + } + ; + +if_statement: + IF relation THEN statement { + $$ = NODE(IF_STATEMENT, NULL, 2, $2, $4); + } + | IF relation THEN statement ELSE statement { + $$ = NODE(IF_STATEMENT, NULL, 3, $2, $4, $6); + } + ; + +while_statement: + WHILE relation DO statement { + $$ = NODE(WHILE_STATEMENT, NULL, 2, $2, $4); + } + ; + relation: - expression '=' expression - { N2C ( $$, RELATION, strdup("="), $1, $3 ); } - | expression '<' expression - { N2C ( $$, RELATION, strdup("<"), $1, $3 ); } - | expression '>' expression - { N2C ( $$, RELATION, strdup(">"), $1, $3 ); } - ; -expression : - expression '|' expression - { N2C ( $$, EXPRESSION, strdup("|"), $1, $3 ); } - | expression '^' expression - { N2C ( $$, EXPRESSION, strdup("^"), $1, $3 ); } - | expression '&' expression - { N2C ( $$, EXPRESSION, strdup("&"), $1, $3 ); } - | expression '+' expression - { N2C ( $$, EXPRESSION, strdup("+"), $1, $3 ); } - | expression '-' expression - { N2C ( $$, EXPRESSION, strdup("-"), $1, $3 ); } - | expression '*' expression - { N2C ( $$, EXPRESSION, strdup("*"), $1, $3 ); } - | expression '/' expression - { N2C ( $$, EXPRESSION, strdup("/"), $1, $3 ); } - | '-' expression %prec UMINUS - { N1C ( $$, EXPRESSION, strdup("-"), $2 ); } - | '~' expression %prec UMINUS - { N1C ( $$, EXPRESSION, strdup("~"), $2 ); } - | '(' expression ')' { $$ = $2; } - | number { N1C ( $$, EXPRESSION, NULL, $1 ); } - | identifier - { N1C ( $$, EXPRESSION, NULL, $1 ); } - | identifier '(' argument_list ')' - { N2C ( $$, EXPRESSION, NULL, $1, $3 ); } - ; -declaration : - VAR variable_list { N1C ( $$, DECLARATION, NULL, $2 ); } - ; -print_item : - expression - { N1C ( $$, PRINT_ITEM, NULL, $1 ); } - | string - { N1C ( $$, PRINT_ITEM, NULL, $1 ); } - ; -identifier: IDENTIFIER { N0C($$, IDENTIFIER_DATA, strdup(yytext) ); } -number: NUMBER - { - int64_t *value = malloc ( sizeof(int64_t) ); - *value = strtol ( yytext, NULL, 10 ); - N0C($$, NUMBER_DATA, value ); - } -string: STRING { N0C($$, STRING_DATA, strdup(yytext) ); } + expression '=' expression { + $$ = NODE(RELATION, strdup("="), 2, $1, $3); + } + | expression '<' expression { + $$ = NODE(RELATION, strdup("<"), 2, $1, $3); + } + | expression '>' expression { + $$ = NODE(RELATION, strdup(">"), 2, $1, $3); + } + ; + +expression: + expression '|' expression { + $$ = NODE(EXPRESSION, strdup("|"), 2, $1, $3); + } + | expression '^' expression { + $$ = NODE(EXPRESSION, strdup("^"), 2, $1, $3); + } + | expression '&' expression { + $$ = NODE(EXPRESSION, strdup("&"), 2, $1, $3); + } + | expression '+' expression { + $$ = NODE(EXPRESSION, strdup("+"), 2, $1, $3); + } + | expression '-' expression { + $$ = NODE(EXPRESSION, strdup("-"), 2, $1, $3); + } + | expression '*' expression { + $$ = NODE(EXPRESSION, strdup("*"), 2, $1, $3); + } + | expression '/' expression { + $$ = NODE(EXPRESSION, strdup("/"), 2, $1, $3); + } + | '-' expression %prec UMINUS { + $$ = NODE(EXPRESSION, strdup("-"), 1, $2); + } + | '~' expression { + $$ = NODE(EXPRESSION, strdup("~"), 1, $2); + } + | '(' expression ')' { + $$ = NODE(EXPRESSION, NULL /*strdup("group")*/, 1, $2); + } + | number { + $$ = NODE(EXPRESSION, NULL /*strdup("number")*/, 1, $1); + } + | identifier { + $$ = NODE(EXPRESSION, NULL /*strdup("identifier")*/, 1, $1); + } + | identifier '(' argument_list ')' { + $$ = NODE(EXPRESSION, /*NULL*/ strdup("function_call"), 2, $1, $3); + } + ; + +declaration: + VAR variable_list { + $$ = NODE(DECLARATION, NULL, 1, $2); + } + ; + +print_item: + expression { + $$ = NODE(PRINT_ITEM, NULL, 1, $1); + } + | string { + $$ = NODE(PRINT_ITEM, NULL, 1, $1); + } + ; + +identifier: + IDENTIFIER { + $$ = NODE(IDENTIFIER_DATA, strdup(yytext), 0); // Zero children + } + ; + +number: + NUMBER { + uint64_t* p_number = malloc(sizeof(uint64_t)); + *p_number = strtol(yytext, NULL, 10); + $$ = NODE(NUMBER_DATA, p_number, 0); // Zero children + } + ; + +string: + STRING { + $$ = NODE(STRING_DATA, strdup(yytext), 0); // Zero children + } + ; + %% int diff --git a/exercises/06/vslc/src/tree.c b/exercises/06/vslc/src/tree.c index ef7658c..b17d8cd 100644 --- a/exercises/06/vslc/src/tree.c +++ b/exercises/06/vslc/src/tree.c @@ -12,6 +12,10 @@ tree_print(node_t* root, stem head); static void destroy_subtree ( node_t *discard ); +static void prune_children(node_t **simplified, node_t *root); +static void resolve_constant_expressions(node_t **simplified, node_t *root); +static void flatten(node_t **simplified, node_t *root); + /* External interface */ void @@ -40,7 +44,8 @@ print_syntax_tree ( void ) } -void +// Changed so it returns the pointer to the new node, can be used as before, but makes the parser file cleaner +node_t* node_init (node_t *nd, node_index_t type, void *data, uint64_t n_children, ...) { va_list child_list; @@ -55,6 +60,8 @@ node_init (node_t *nd, node_index_t type, void *data, uint64_t n_children, ...) for ( uint64_t i=0; ichildren[i] = va_arg ( child_list, node_t * ); va_end ( child_list ); + + return nd; } @@ -82,9 +89,10 @@ tree_print(node_t* root, stem head) return; } printf("─%s", node_string[root->type]); - if ( root->type == IDENTIFIER_DATA || - root->type == STRING_DATA || - root->type == EXPRESSION ) + if ( root->type == IDENTIFIER_DATA || + root->type == STRING_DATA || + root->type == EXPRESSION || + root->type == RELATION) printf("(%s)", (char *) root->data); else if (root->type == NUMBER_DATA) printf("(%ld)", *((int64_t *)root->data)); @@ -152,81 +160,241 @@ destroy_subtree ( node_t *discard ) static void -simplify_tree ( node_t **simplified, node_t *root ) +flatten(node_t **simplified, node_t *root) { - if ( root == NULL ) + /* This will flatten left-expanded lists */ + if (!root) return; - /* Simplify subtrees before examining this node */ - for ( uint64_t i=0; in_children; i++ ) - simplify_tree ( &root->children[i], root->children[i] ); + /* Do this recursivly */ + for (int i = 0; i < root->n_children; i++) + flatten(&root->children[i], root->children[i]); - node_t *discard, *result = root; - switch ( root->type ) + node_t **new_children, *result = root; + switch (root->type) { - /* Structures of purely syntactic function */ - case PARAMETER_LIST: case ARGUMENT_LIST: - case STATEMENT: case PRINT_ITEM: case GLOBAL: - result = root->children[0]; - node_finalize ( root ); + case GLOBAL_LIST: + case STATEMENT_LIST: + case PRINT_LIST: + case EXPRESSION_LIST: + case VARIABLE_LIST: + case DECLARATION_LIST: + // Check if node have more than two children + if (root->n_children < 2) break; - case PRINT_STATEMENT: - result = root->children[0]; + + result = root->children[0]; + result->n_children++; + + // Realloc the array of children to the new size + if (!(new_children = realloc(result->children, result->n_children * sizeof(node_t*)))) + break; + // if successs, insert the new array + result->children = new_children; + + // Insert child at the end + result->children[result->n_children - 1] = root->children[1]; + + node_finalize(root); + break; + } + + *simplified = result; +} + + +static void +prune_children(node_t **simplified, node_t *root) +{ + if (!root) + return; + + /* Do this recursivly */ + for (int i = 0; i < root->n_children; i++) + prune_children(&root->children[i], root->children[i]); + + node_t *result = root; + switch (root->type) + { + case PROGRAM: + case GLOBAL: + //case ARGUMENT_LIST: // For this to work, need to change order of operations + //case PARAMETER_LIST: // For this to work, need to change order of operations + //case VARIABLE_LIST: + //case EXPRESSION_LIST: + case DECLARATION: + case STATEMENT: + case PRINT_ITEM: + case PRINT_STATEMENT: + result = root->children[0]; + + // The print_statement only contains a print_list, still need a print_statement. + if (root->type == PRINT_STATEMENT) result->type = PRINT_STATEMENT; + + node_finalize(root); + break; + } + + *simplified = result; +} + + +static void +resolve_constant_expressions(node_t **simplified, node_t *root) +{ + if (!root) + return; + + /* Do this recursivly */ + for (int i = 0; i < root->n_children; i++) + resolve_constant_expressions(&root->children[i], root->children[i]); + + if (root->type != EXPRESSION) + return; + + node_t *result = root; + + switch (root->n_children) + { + case 1: + result = root->children[0]; + + if (root->data && + result->type == NUMBER_DATA && + result->data) + { + switch (*((char*)root->data)) + { + case '-': + *((int64_t*)result->data) *= -1; + break; + case '~': + *((int64_t*)result->data) = ~*((int64_t*)result->data); + break; + } node_finalize(root); - /* Flatten lists: - * Take left child, append right child, substitute left for root. - */ - case STATEMENT_LIST: case DECLARATION_LIST: case GLOBAL_LIST: - case PRINT_LIST: case EXPRESSION_LIST: case VARIABLE_LIST: - if ( root->n_children >= 2 ) + } + else if (!root->data) + node_finalize(root); + else + result = root; + + break; + + case 2: + // Both children needs to be numbers to resolve constants + if (root->children[0]->type == NUMBER_DATA && + root->children[1]->type == NUMBER_DATA) + { + // Check if children does not contain null pointers + if (!root->children[0]->data) + break; + if (!root->children[1]->data) + break; + + // Check if data field is not null pointer + if (!root->data) + break; + + result = root->children[0]; + int64_t + *lhs = result->data, + *rhs = root->children[1]->data; + + switch (*(char*)root->data) { - result = root->children[0]; - result->n_children += 1; - result->children = realloc ( - result->children, result->n_children * sizeof(node_t *) - ); - result->children[result->n_children-1] = root->children[1]; - node_finalize ( root ); - } - break; - case EXPRESSION: - switch ( root->n_children ) - { - case 1: - if ( root->children[0]->type == NUMBER_DATA ) - { - result = root->children[0]; - if ( root->data != NULL ) - *((int64_t *)result->data) *= -1; - node_finalize (root); - } - else if ( root->data == NULL ) - { - result = root->children[0]; - node_finalize (root); - } - break; - case 2: - if ( root->children[0]->type == NUMBER_DATA && - root->children[1]->type == NUMBER_DATA - ) { - result = root->children[0]; - int64_t - *x = result->data, - *y = root->children[1]->data; - switch ( *((char *)root->data) ) - { - case '+': *x += *y; break; - case '-': *x -= *y; break; - case '*': *x *= *y; break; - case '/': *x /= *y; break; - } - node_finalize ( root->children[1] ); - node_finalize ( root ); - } - break; + /* Assignments */ + case '|': *lhs |= *rhs; break; + case '^': *lhs ^= *rhs; break; + case '&': *lhs &= *rhs; break; + case '+': *lhs += *rhs; break; + case '-': *lhs -= *rhs; break; + case '*': *lhs *= *rhs; break; + case '/': *lhs /= *rhs; break; } + + node_finalize(root->children[1]); + node_finalize(root); + } + + + break; } *simplified = result; } + + +static void +resolve_constant_relations( node_t** simplified, node_t* root) +{ + if (!root) + return; + + /* Do this recursivly */ + for (int i = 0; i < root->n_children; i++) + resolve_constant_relations(&root->children[i], root->children[i]); + + if (root->type != RELATION)//|| root->type != RELATION) + return; + + node_t *result = root; + + if (root->n_children != 2) + return; + + // Both children must be constant numbers + if (root->children[0]->type != NUMBER_DATA || + root->children[1]->type != NUMBER_DATA) + return; + + // Check if children does not contain null pointers + if (!root->children[0]->data) + return; + if (!root->children[1]->data) + return; + + // Check if data field is not null pointer + if (!root->data) + return; + + result = root->children[0]; + int64_t + *lhs = result->data, + *rhs = root->children[1]->data; + + switch (*(char*)root->data) + { + /* Relations */ + case '=': *lhs = (*lhs == *rhs); break; + case '<': *lhs = (*lhs < *rhs); break; + case '>': *lhs = (*lhs > *rhs); break; + } + + node_finalize(root->children[1]); + node_finalize(root); + + *simplified = result; +} + + +static void +simplify_tree ( node_t **simplified, node_t *root ) +{ + if (!root) + return; + + /* + Each of the functions do their operations recursivly. + This opens up for a lot more flexibility, like removing + variable list after it is flatten + */ + flatten(&root, root); + prune_children(&root, root); + resolve_constant_expressions(&root, root); + + // The following is experimental, will resolve the constant relations + resolve_constant_relations(&root, root); + + *simplified = root; +} diff --git a/exercises/06/vslc/src/vslc.c b/exercises/06/vslc/src/vslc.c index 3d239a4..0d6ad36 100644 --- a/exercises/06/vslc/src/vslc.c +++ b/exercises/06/vslc/src/vslc.c @@ -6,20 +6,20 @@ /* Global state */ -node_t *root; // Syntax tree -tlhash_t *global_names; // Symbol table -char **string_list; // List of strings in the source -size_t n_string_list = 8; // Initial string list capacity (grow on demand) -size_t stringc = 0; // Initial string count +node_t *root; // Syntax tree +tlhash_t *global_names; // Symbol table +char **string_list; // List of strings in the source +size_t n_string_list = 8; // Initial string list capacity (grow on demand) +size_t stringc = 0; // Initial string count /* Command line option parsing for the main function */ static void options ( int argc, char **argv ); bool - print_full_tree = false, - print_simplified_tree = false, + print_full_tree = false, + print_simplified_tree = false, print_symbol_table_contents = false, - print_generated_program = true, - new_print_style = true; + print_generated_program = true, + new_print_style = true; /* Entry point */ @@ -28,7 +28,8 @@ main ( int argc, char **argv ) { options ( argc, argv ); - yyparse(); // Generated from grammar/bison, constructs syntax tree + yyparse(); // Generated from grammar/bison, constructs syntax tree + yylex_destroy(); // Free heap used by flex if ( print_full_tree ) print_syntax_tree (); @@ -36,7 +37,7 @@ main ( int argc, char **argv ) if ( print_simplified_tree ) print_syntax_tree (); - create_symbol_table (); // In ir.c + create_symbol_table (); // In ir.c if ( print_symbol_table_contents ) print_symbol_table();