From 454d642f80907f60e7e6905a4c75321f3931537c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind?= Date: Sun, 3 Apr 2022 22:41:37 +0200 Subject: [PATCH] Generates all the nesessary code for running - can compile simple stuff --- exercises/05/vslc/src/generator.c | 412 +++++++++++++++++++++++++----- 1 file changed, 353 insertions(+), 59 deletions(-) diff --git a/exercises/05/vslc/src/generator.c b/exercises/05/vslc/src/generator.c index 27d5f3e..63d121e 100644 --- a/exercises/05/vslc/src/generator.c +++ b/exercises/05/vslc/src/generator.c @@ -3,6 +3,9 @@ #define ASM(opcode, args...) puts("\t"#opcode"\t"#args) #define LABEL(label) printf("_%s:\n", (char*)label) +#define NO_REG_RECORD 6 +#define NO_CALLE_SAVED_REG 10 + /**Generate table of strings in a rodata section. */ void generate_stringtable ( void ); /**Declare global variables in a bss section */ @@ -19,56 +22,133 @@ void generate_main ( symbol_t *first ); #define MIN(a,b) (((a)<(b)) ? (a):(b)) -static const char *record[6] = { +static const char *record[NO_REG_RECORD] = { "%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9" }; +static const char *calle_saved_reg[NO_CALLE_SAVED_REG] = { + "%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%rsp", "%r8", "%r9", "%r10", "%r11" +}; + // Helper funcs for generating different nodes + +/** + * Generates assembly for printing + * + * @param node print statement node with children to print + */ static void generate_print(node_t* node); + +/** + * Generate identfier for a variable in memory + * + * @param node identifier we want the addres of + */ static void generate_var_ident(node_t *node); + +/** + * Main function to calculate and solve the expressions. + * Based on a stack machine. Result is stored on stack. + * + * @param node root node for expression + */ static void solve_expressions(node_t *node); +/** + * Generates a funciton call + * + * @param node root node for function + */ +static void generate_function_call(node_t *node); + +/** + * Generates the return part of a function + * + * @param node node containing the return statement + */ +static void generate_function_return(node_t *node); + +/** + * Used for calculating and evaluating the add/sub/mul/div statements. + * Turns the statement into an expression, adds the result of rhs to lhs + * and stores the value back to the indentifier + * + * @param node node to the statement + * @param operator one of the following +, -, *, / + */ +static void solve_statements(node_t *node, char *operator); + +/** + * Generate assembly to fetch a variable on stack + * + * @param node node to the variable to be fetched + * @param dest where to put the value + */ +static void fetch_variable(node_t *node, const char* dest); + +/** + * Same as fetch_variable, but stores it back to memory. + * + * @param node variable to be stored + * @param src from where should the data come from + */ +static void writeback_variable(node_t *node, char* src); + // Helper func for fetching all symbols in a table static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list); void generate_program ( void ) { - /* TODO: Emit assembly instructions for functions, function calls, - * print statements and expressions. - * The provided function 'generate_main' creates a program entry point - * for the function symbol it is given as argument. - */ - - // TODO: Implement - // - Generate code for all functions - // - Generate main (function already implemented) by assigning either the - // function named main or the first function of the source file if no - // main exists. generate_stringtable(); generate_global_variables(); symbol_t **global_list; uint64_t no_globals = fetch_symbols(global_names, &global_list); + bool main_generated = false; + uint64_t seq0_index = -1; + for (uint64_t g = 0; g < no_globals; g++) + { + if (global_list[g]->type != SYM_FUNCTION) + continue; + + // If the name of the function is main + if (!strcmp(global_list[g]->name, "main")) + { + generate_main(global_list[g]); + main_generated = true; + } + + if (!global_list[g]->seq) + seq0_index = g; + } + + // If no main was found, use the first function instead. + // That means the function with seq = 0 + if (!main_generated) + generate_main(global_list[seq0_index]); + for (uint64_t g = 0; g < no_globals; g++) { if (global_list[g]->type == SYM_FUNCTION) generate_function(global_list[g]); } + + free(global_list); } void generate_stringtable ( void ) { - /* These can be used to emit numbers, strings and a run-time - * error msg. from main - */ + /* These can be used to emit numbers, strings and a run-time + * error msg. from main + */ puts("# DATA SECTION"); - puts(".data"); - puts(".intout:\t.asciz \"\%ld \""); - puts(".strout:\t.asciz \"\%s \""); - puts(".errout:\t.asciz \"Wrong number of arguments\""); + puts(".data"); + puts(".intout:\t.asciz \"\%ld \""); + puts(".strout:\t.asciz \"\%s \""); + puts(".errout:\t.asciz \"Wrong number of arguments\""); for (uint64_t s = 0; s < stringc; s++) { @@ -109,7 +189,7 @@ generate_function ( symbol_t *function ) ASM(movq, %rsp, %rbp); // Push params to stack - for (int arg = 0; arg < MIN(6,function->nparms); arg++) + for (int arg = 0; arg < MIN(NO_REG_RECORD,function->nparms); arg++) printf("\tpushq\t%s\n", record[arg] ); // How many local variables are inside function @@ -137,38 +217,82 @@ generate_node ( node_t *node) { // TODO: Generate code corresponding to node + // All statements have the same structure. + // [0] is the lhs, needs to be identifier, parser ensures this + // [1] is thr rhs switch (node->type) { case ASSIGNMENT_STATEMENT: + solve_expressions(node->children[1]); + ASM(popq, %rax); + writeback_variable(node->children[0], "%rax"); break; + case ADD_STATEMENT: + // The following way is the naive way of doing an assignment + /* + fetch_variable(node->children[0], "%rax"); + ASM(pushq, %rax); + solve_expressions(node->children[1]); + ASM(popq, %r10); + ASM(popq, %rax); + ASM(addq, %r10, %rax); + writeback_variable(node->children[0], "%rax"); + */ + /* The thing is that add/sub/mul/div assignments + have the same structure as expressions. + We can therefore just say that the assignment is + an expression, but remembering to do the writeback afterwards. + */ + puts("# Add statement"); + solve_statements(node, "+"); break; + case SUBTRACT_STATEMENT: + puts("# Subtract statement"); + solve_statements(node, "-"); break; + case MULTIPLY_STATEMENT: + puts("# Multiply statement"); + solve_statements(node, "*"); break; + case DIVIDE_STATEMENT: + puts("# Divide statement"); + solve_statements(node, "/"); break; + case PRINT_STATEMENT: + puts("# Print statement"); generate_print(node); break; + case RETURN_STATEMENT: - ASM(movq, %rbp, %rsp); - ASM(popq, %rbp); - ASM(ret); - break; - case NULL_STATEMENT: + puts("# Return statement"); + generate_function_return(node); break; + + case IF_STATEMENT: case WHILE_STATEMENT: /* DO NOTHING YET */ break; + case NULL_STATEMENT: + /* USED IN WHILE/IF */ + break; + + case DECLARATION_LIST: + /* List of blocks we dont need to traverse */ + break; + default: for (int c = 0; c < node->n_children; c++) generate_node(node->children[c]); break; + } } @@ -176,19 +300,19 @@ generate_node ( node_t *node) void generate_print(node_t* node) { + // Push rdi and rsi to stack incase there are data in them + //ASM(pushq, %rdi); + //ASM(pushq, %rsi); for (uint64_t p = 0; p < node->n_children; p++) { node_t *curr_print = node->children[p]; - // Push rdi and rsi to stack incase there are data in them - //ASM(pushq, %rdi); - //ASM(pushq, %rsi); switch (curr_print->type) { case EXPRESSION: + solve_expressions(curr_print); + ASM(popq, %rax); ASM(movq, $.intout, %rdi); - // Since this is a stack machine, rax allways - // contain the result of expression ASM(movq, %rax, %rsi); break; @@ -199,28 +323,41 @@ generate_print(node_t* node) case IDENTIFIER_DATA: ASM(movq, $.intout, %rdi); - // Need to find the sequence number of var - printf("\tmovq\t"); - generate_var_ident(curr_print); - printf(", %%rsi\n"); + fetch_variable(curr_print, "%rsi"); break; default: break; } ASM(call, printf); - - - // Adds a newline - ASM(movq, $'\n', %rdi); - ASM(call, putchar); - - //ASM(popq, %rsi); - //ASM(popq, %rdi); - - } + // Adds a newline + ASM(movq, $'\n', %rdi); + ASM(call, putchar); + + //ASM(popq, %rsi); + //ASM(popq, %rdi); } + +// This will put the value of var in node in dest +void +fetch_variable(node_t *node, const char* dest) +{ + printf("\tmovq\t"); + generate_var_ident(node); + printf(", %s\t\t# Fetched: %s\n", dest, node->entry->name); +} + +// This will put the value in dest to the var in node +void +writeback_variable(node_t *node, char* src) +{ + printf("\tmovq\t%s,", src); + generate_var_ident(node); + printf("\t\t# Writeback: %s\n", node->entry->name); +} + + void generate_var_ident(node_t *node) { @@ -237,7 +374,7 @@ generate_var_ident(node_t *node) printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); else // This requires that the parameters on - // stack is in reversed order + // stack is in reversed order... easier to implement printf("%ld(%%rbp)", 8 * (ident_sym->seq - 6 + 1 )); break; @@ -248,10 +385,163 @@ generate_var_ident(node_t *node) } +// This should allways push the result to stack void solve_expressions(node_t *node) { + if (node->data) + { // Check if the expression is a function call + bool is_function_call = !strcmp(node->data, "function_call"); + if (is_function_call) + { + generate_function_call(node); + return; + } + } + + switch (node->n_children) + { + case 0: + switch (node->type) + { + case IDENTIFIER_DATA: + fetch_variable(node, "%rax"); + ASM(pushq, %rax); + break; + case NUMBER_DATA: + printf("\tmovq\t$%ld,%%rax\n",*(int64_t*)node->data); + ASM(pushq, %rax); + break; + } + break; + case 1: + solve_expressions(node->children[0]); + ASM(popq, %rax); + + switch (*(char*)node->data) + { + case '-': + ASM(negq, %rax); + break; + case '~': + ASM(notq, %rax); + break; + } + ASM(pushq, %rax); + + break; + case 2: + + // First fetch lhs of expr and then rhs + // Push results on stack + for (int i = 0; i < 2; i++) + solve_expressions(node->children[i]); + + + // Put rhs in %r10 + ASM(popq, %r10); + // put lhs in %rax + ASM(popq, %rax); + + switch (*(char*)node->data) + { + /* Assignments */ + case '|': ASM(orq, %r10, %rax); break; // Bitwise or of %rax and %r10 + case '^': ASM(xorq, %r10, %rax); break; // Bitwise xor of %rax and %r10 + case '&': ASM(andq, %r10, %rax); break; // Bitwise and of %rax and %r10 + case '+': ASM(addq, %r10, %rax); break; // Add %rax and %r10 + case '-': ASM(subq, %r10, %rax); break; // Subtract %r10 from %rax + case '*': ASM(imulq, %r10); break; // Mulitply %rax with %r10 + case '/': + ASM(cqto); // Convert rax to octaword, %rdx:%rax + ASM(idivq, %r10); // Divide %rdx:%rax by %r10 + break; + } + + // Push result to stack. + ASM(pushq, %rax); + break; + } +} + +void +generate_function_call(node_t *node) +{ + printf("# Function call\n"); + + node_t *arg_list = node->children[1]; + if (arg_list->n_children) + arg_list = arg_list->children[0]; + + for (int arg = 0; arg < MIN(NO_REG_RECORD, arg_list->n_children); arg++) + { + if (arg_list->children[arg]->type == NUMBER_DATA) + printf("\tmovq\t$%ld, %s\n", + *(int64_t*)arg_list->children[arg]->data, + record[arg] + ); + else + fetch_variable(arg_list->children[arg], record[arg]); + } + + if (arg_list->n_children > NO_REG_RECORD) + { + for (int arg = arg_list->n_children - 1; arg >= NO_REG_RECORD; arg--) + { + if (arg_list->children[arg]->type == NUMBER_DATA) + printf("\tpushq\t$%ld\n", + *(int64_t*)arg_list->children[arg]->data + ); + else + { + printf("\tpushq\t"); + generate_var_ident(arg_list->children[arg]); + putchar('\n'); + } + } + if (arg_list->n_children % 2) + ASM(pushq, $0); + } + + printf("\tcall\t_%s\n", (char*)node->children[0]->data); + ASM(pushq, %rax); + printf("# End of function call\n"); + + /* + for (int reg = 0; reg < NO_CALLE_SAVED_REG; reg++) + printf("\tpushq\t%s \t\t# Pushing %s to stack\n", + calle_saved_reg[reg], + calle_saved_reg[reg] + ); + + for (int reg = NO_CALLE_SAVED_REG; reg > 0; reg--) + printf("\tpopq\t%s \t\t# Poping %s from stack\n", + calle_saved_reg[reg], + calle_saved_reg[reg] + );*/ + +} + +void +generate_function_return(node_t *node) +{ + solve_expressions(node->children[0]); + ASM(popq, %rax); + ASM(leave); + ASM(ret); +} + +void +solve_statements(node_t *node, char *operator) +{ + node->type = EXPRESSION; + node->data = strdup(operator); + + solve_expressions(node); + + ASM(popq, %rax); + writeback_variable(node->children[0], "%rax"); } /**Generates the main function with argument parsing and calling of our @@ -260,6 +550,7 @@ solve_expressions(node_t *node) void generate_main ( symbol_t *first ) { + puts("###### Entry point for GAS #####"); puts ( ".globl main" ); puts ( ".section .text" ); puts ( "main:" ); @@ -268,15 +559,15 @@ generate_main ( symbol_t *first ) printf ( "\tsubq\t$1,%%rdi\n" ); printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms ); - printf ( "\tjne\tABORT\n" ); + printf ( "\tjne \tABORT\n" ); printf ( "\tcmpq\t$0,%%rdi\n" ); - printf ( "\tjz\tSKIP_ARGS\n" ); + printf ( "\tjz \tSKIP_ARGS\n" ); printf ( "\tmovq\t%%rdi,%%rcx\n" ); - printf ( "\taddq $%zu, %%rsi\n", 8*first->nparms ); + printf ( "\taddq\t$%zu, %%rsi\n", 8*first->nparms ); printf ( "PARSE_ARGV:\n" ); - printf ( "\tpushq %%rcx\n" ); - printf ( "\tpushq %%rsi\n" ); + printf ( "\tpushq\t%%rcx\n" ); + printf ( "\tpushq\t%%rsi\n" ); printf ( "\tmovq\t(%%rsi),%%rdi\n" ); printf ( "\tmovq\t$0,%%rsi\n" ); @@ -285,11 +576,11 @@ generate_main ( symbol_t *first ) /* Now a new argument is an integer in rax */ - printf ( "\tpopq %%rsi\n" ); - printf ( "\tpopq %%rcx\n" ); - printf ( "\tpushq %%rax\n" ); - printf ( "\tsubq $8, %%rsi\n" ); - printf ( "\tloop PARSE_ARGV\n" ); + printf ( "\tpopq\t%%rsi\n" ); + printf ( "\tpopq\t%%rcx\n" ); + printf ( "\tpushq\t%%rax\n" ); + printf ( "\tsubq\t$8, %%rsi\n" ); + printf ( "\tloop\tPARSE_ARGV\n" ); /* Now the arguments are in order on stack */ for (int arg = 0; arg < MIN(6,first->nparms); arg++) @@ -297,14 +588,17 @@ generate_main ( symbol_t *first ) printf ( "SKIP_ARGS:\n" ); printf ( "\tcall\t_%s\n", first->name ); - printf ( "\tjmp\tEND\n" ); + printf ( "\tjmp \tEND\n" ); printf ( "ABORT:\n" ); printf ( "\tmovq\t$.errout, %%rdi\n" ); - printf ( "\tcall puts\n" ); + printf ( "\tcall\tputs\n" ); printf ( "END:\n" ); - puts ( "\tmovq %rax, %rdi" ); - puts ( "\tcall exit" ); + puts ( "\tmovq \t%rax, %rdi" ); + puts ( "\tcall \texit" ); + puts("###### FUNCTIONS FROM VSL BELOW #####"); + putchar('\n'); + } static uint64_t