#include #define ERRPRT(format, args...) fprintf(stderr, "[ERROR] "); fprintf(stderr ,format, ##args) #define ASM(opcode, args...) puts("\t"#opcode"\t"#args) #define LABEL(label) printf("_%s:\n", (char*)label) #define COMMENT(format, args...) printf("# "format"\n", ##args) // The PUSH and POP macros also increments/decrements the stack_depth to keep track of the stack #define PUSH(param) printf("\tpushq\t%s\t\t\t\t# PUSH: %ld\n", #param, ++stack_depth) #define POP(param) printf("\tpopq\t%s\t\t\t\t# POP: %ld\n", #param, --stack_depth) #define NO_REG_RECORD 6 // Keep track of sequence of stack depth, ifs and whiles static uint64_t stack_depth, if_seq, while_seq, closest_while; /**Generate table of strings in a rodata section. */ void generate_stringtable ( void ); /**Declare global variables in a bss section */ void generate_global_variables ( void ); /**Generate function entry code * @param function symbol table entry of function */ void generate_function ( symbol_t *function ); /**Generate code for a node in the AST, to be called recursively from * generate_function * @param node root node of current code block */ static void generate_node ( node_t *node ); /**Initializes program (already implemented) */ void generate_main ( symbol_t *first ); #define MIN(a,b) (((a)<(b)) ? (a):(b)) static const char *record[NO_REG_RECORD] = { "%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9" }; // Helper funcs for generating different nodes /** * Generates assembly for printing * * @param node print statement node with children to print */ static void generate_print(node_t* node); /** * Generate identfier for a variable in memory * * @param node identifier we want the addres of */ static void generate_var_ident(node_t *node); /** * Main function to calculate and solve the expressions. * Based on a stack machine. Result is stored on stack. * * @param node root node for expression */ static void solve_expressions(node_t *node); /** * Generates a funciton call * * @param node root node for function */ static void generate_function_call(node_t *node); /** * Generates the return part of a function * * @param node node containing the return statement */ static void generate_function_return(node_t *node); /** * Used for calculating and evaluating the add/sub/mul/div statements. * Turns the statement into an expression, adds the result of rhs to lhs * and stores the value back to the indentifier * * @param node node to the statement * @param operator one of the following +, -, *, / */ static void solve_statements(node_t *node, char *operator); /** * Solves a relation and leaves the result (1 or 0, True or False) in %rax * If the relation node is just a number, just put the number in %rax. * This is used with a cmp instruction to check if the relation is true. * * This makes it possible to compute the constant relations. * * @param node pointer to the node the suspected relation is in */ static void solve_relations(node_t *node); /** * Solves a node that is an if statement * Uses the solve realtions function to solve the relations * * @param node pointer to the if node */ static void generate_if_statement(node_t *node); /** * Solves a node that is an while statement * Uses the solve realtions function to solve the relations * * @param node pointer to the while node */ static void generate_while_statement(node_t *node); /** * Inserts a jump to the inner most while. * This also works when multiple whiles are nested. * */ static void solve_continue_statement(); /** * Generate assembly to fetch a variable on stack * * @param node node to the variable to be fetched * @param dest where to put the value */ static void fetch_variable(node_t *node, const char* dest); /** * Same as fetch_variable, but stores it back to memory. * * @param node variable to be stored * @param src from where should the data come from */ static void writeback_variable(node_t *node, char* src); // Helper func for fetching all symbols in a table static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list); void generate_program ( void ) { // Generate the string table at the top generate_stringtable(); // Fetch all the global elements (functions and global vars) symbol_t **global_list; uint64_t no_globals = fetch_symbols(global_names, &global_list); // Find the number of actual global variables uint64_t no_global_vars = 0; for (uint64_t g = 0; g < no_globals; g++) if (global_list[g]->type == SYM_GLOBAL_VAR) no_global_vars++; // Generate globbal variables if there are any if (no_global_vars) generate_global_variables(); // Find the function called main and keep track of if it found an generated bool main_generated = false; uint64_t seq0_index = -1; for (uint64_t g = 0; g < no_globals; g++) { if (global_list[g]->type != SYM_FUNCTION) continue; // If the name of the function is main if (!strcmp(global_list[g]->name, "main")) { generate_main(global_list[g]); main_generated = true; } if (!global_list[g]->seq) seq0_index = g; } // If no main was found, use the first function instead. // That means the function with seq = 0 if (!main_generated) generate_main(global_list[seq0_index]); // Then generate all the functions from vsl for (uint64_t g = 0; g < no_globals; g++) { if (global_list[g]->type == SYM_FUNCTION) generate_function(global_list[g]); } free(global_list); } void generate_stringtable ( void ) { /* These can be used to emit numbers, strings and a run-time * error msg. from main */ puts("# DATA SECTION"); puts(".section .data"); puts(".intout:\t.asciz \"\%ld \""); puts(".strout:\t.asciz \"\%s \""); puts(".errout:\t.asciz \"Wrong number of arguments\""); for (uint64_t s = 0; s < stringc; s++) { printf(".STR%03ld:\t.asciz %s\n", s, string_list[s]); } putchar('\n'); } void generate_global_variables ( void ) { symbol_t **global_list; uint64_t no_globals = fetch_symbols(global_names, &global_list); puts("# GLOBAL VARIABLES"); puts(".bss"); puts(".align 8"); for (uint64_t g = 0; g < no_globals; g++) if (global_list[g]->type == SYM_GLOBAL_VAR) printf(".%s:\n", global_list[g]->name); putchar('\n'); free(global_list); } void generate_function ( symbol_t *function ) { // Keep track of the stack size in each of the functions stack_depth = 0; printf("# func %s(nparams: %ld)\n", function->name, function->nparms); puts(".section .text"); printf(".global _%s\n", function->name); LABEL(function->name); PUSH(%rbp); ASM(movq, %rsp, %rbp); // Push params to stack for (int arg = 0; arg < MIN(NO_REG_RECORD,function->nparms); arg++) printf("\tpushq\t%s\t\t\t\t# PUSH: %ld\n", record[arg], ++stack_depth ); // How many local variables are inside function uint64_t no_locals = function->locals->size - function->nparms; // Make room for the local vars while(no_locals--) PUSH($0); // IF the stack alignment is not 16 bytes, // add one now as all local var also is 0 if (stack_depth % 2) PUSH($0); // Now the stack ptr should be 16 byte aligned. // Then generate the body of the function generate_node(function->node); putchar('\n'); } void generate_node ( node_t *node) { // All statements have the same structure. // [0] is the lhs, needs to be identifier, parser ensures this // [1] is thr rhs switch (node->type) { case ASSIGNMENT_STATEMENT: // First solve the rhs solve_expressions(node->children[1]); // Then store in lhs writeback_variable(node->children[0], "%rax"); break; case ADD_STATEMENT: // The following way is the naive way of doing an assignment /* fetch_variable(node->children[0], "%rax"); ASM(pushq, %rax); solve_expressions(node->children[1]); ASM(popq, %r10); ASM(popq, %rax); ASM(addq, %r10, %rax); writeback_variable(node->children[0], "%rax"); */ /* The thing is that add/sub/mul/div assignments have the same structure as expressions. We can therefore just say that the assignment is an expression, but remembering to do the writeback afterwards. */ puts("# Add statement"); solve_statements(node, "+"); break; case SUBTRACT_STATEMENT: puts("# Subtract statement"); solve_statements(node, "-"); break; case MULTIPLY_STATEMENT: puts("# Multiply statement"); solve_statements(node, "*"); break; case DIVIDE_STATEMENT: puts("# Divide statement"); solve_statements(node, "/"); break; case PRINT_STATEMENT: puts("# Print statement"); generate_print(node); break; case RETURN_STATEMENT: puts("# Return statement"); generate_function_return(node); break; case IF_STATEMENT: generate_if_statement(node); break; case WHILE_STATEMENT: generate_while_statement(node); break; case NULL_STATEMENT: solve_continue_statement(); break; case DECLARATION_LIST: /* List of blocks we dont need to traverse */ break; default: // Otherwise, generate the traverse for (int c = 0; c < node->n_children; c++) generate_node(node->children[c]); break; } } // Generate the print node void generate_print(node_t* node) { for (uint64_t p = 0; p < node->n_children; p++) { node_t *curr_print = node->children[p]; switch (curr_print->type) { case EXPRESSION: solve_expressions(curr_print); ASM(movq, $.intout, %rdi); ASM(movq, %rax, %rsi); break; case STRING_DATA: ASM(movq, $.strout, %rdi); printf("\tmovq\t$.STR%03ld, %%rsi\n", *(uint64_t*)curr_print->data); break; case IDENTIFIER_DATA: ASM(movq, $.intout, %rdi); fetch_variable(curr_print, "%rsi"); break; default: break; } COMMENT("printf call"); ASM(call, printf); } // Adds a newline ASM(movq, $'\n', %rdi); ASM(call, putchar); } // This will put the value of var in node in dest void fetch_variable(node_t *node, const char* dest) { printf("\tmovq\t"); generate_var_ident(node); printf(", %s\t\t# Fetched: %s and put in %s\n", dest, node->entry->name, dest ); } // This will put the value in dest to the var in node void writeback_variable(node_t *node, char* src) { printf("\tmovq\t%s,", src); generate_var_ident(node); printf("\t\t# Writeback '%s' from %s\n", node->entry->name, src ); } // Generate variable identifier, // if local var -> find offset from sb // if parameter -> find offset from sb // if global -> insert global tag void generate_var_ident(node_t *node) { symbol_t *ident_sym = node->entry; switch (ident_sym->type) { case SYM_GLOBAL_VAR: printf("$.%s", ident_sym->name); break; case SYM_PARAMETER: // If it is a paramter is one of the first 6, seacrch below bp if (ident_sym->seq < 6) printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); else // This requires that the parameters on // stack is in reversed order. easier to implement... printf("%ld(%%rbp)", 8 * (ident_sym->seq - 6 + 1 )); break; case SYM_LOCAL_VAR: printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); break; } } // This should allways push the result to stack // no no no noooo, it should leave it in rax! As it does :)))) void solve_expressions(node_t *node) { if (node->data) { // Check if the expression is a function call bool is_function_call = !strcmp(node->data, "function_call"); if (is_function_call) { generate_function_call(node); return; } } switch (node->n_children) { case 0: switch (node->type) { case IDENTIFIER_DATA: fetch_variable(node, "%rax"); break; case NUMBER_DATA: printf("\tmovq\t$%ld,%%rax\n",*(int64_t*)node->data); break; } break; case 1: solve_expressions(node->children[0]); switch (*(char*)node->data) { case '-': ASM(negq, %rax); break; case '~': ASM(notq, %rax); break; } break; case 2: // First fetch lhs of expr and then rhs // Push results on stack for (int i = 0; i < 2; i++) { solve_expressions(node->children[i]); PUSH(%rax); } // Put rhs in %r10 POP(%r10); // put lhs in %rax POP(%rax); // All operators below leaves result in %rax switch (*(char*)node->data) { /* Assignments */ case '|': ASM(orq, %r10, %rax); break; // Bitwise or of %rax and %r10 case '^': ASM(xorq, %r10, %rax); break; // Bitwise xor of %rax and %r10 case '&': ASM(andq, %r10, %rax); break; // Bitwise and of %rax and %r10 case '+': ASM(addq, %r10, %rax); break; // Add %rax and %r10 case '-': ASM(subq, %r10, %rax); break; // Subtract %r10 from %rax case '*': ASM(imulq, %r10); break; // Mulitply %rax with %r10 case '/': ASM(cqto); // Convert rax to octaword, %rdx:%rax ASM(idivq, %r10); // Divide %rdx:%rax by %r10 break; } break; } } void generate_function_call(node_t *node) { printf("# Function call\n"); bool isStack16ByteAligned = !(stack_depth % 2); // If the stack is 16 byte alligned here, offset // by 1 because call pushes return addr to stack // Therefore after call, the stack is 16 byte aligned if (isStack16ByteAligned) PUSH($0); // Arg list is allways the second children, lies within a parameter list // Type of this is therefore PARAMETER_LIST node_t *arg_list = node->children[1]; if (arg_list->n_children) arg_list = arg_list->children[0]; // This is the acutal parameter list for (int arg = 0; arg < MIN(NO_REG_RECORD, arg_list->n_children); arg++) { switch (arg_list->children[arg]->type) { case NUMBER_DATA: printf("\tmovq\t$%ld, %s\n", *(int64_t*)arg_list->children[arg]->data, record[arg] ); break; case EXPRESSION: solve_expressions(arg_list->children[arg]); printf("\tmovq\t%%rax, %s\n",record[arg]); break; default: fetch_variable(arg_list->children[arg], record[arg]); break; } } if (arg_list->n_children > NO_REG_RECORD) { // If there is an odd number of args to push to stack, add 1 if (arg_list->n_children % 2) PUSH($0); for (int arg = arg_list->n_children - 1; arg >= NO_REG_RECORD; arg--) { switch (arg_list->children[arg]->type) { case NUMBER_DATA: printf("\tpushq\t$%ld\t\t\t\t# PUSH: %ld\n", *(int64_t*)arg_list->children[arg]->data, ++stack_depth ); break; // Arg can be an expression case EXPRESSION: solve_expressions(arg_list->children[arg]); PUSH(%rax); break; default: printf("\tpushq\t"); generate_var_ident(arg_list->children[arg]); printf("\t\t\t\t# PUSH: %ld", ++stack_depth); putchar('\n'); break; } } } printf("\tcall\t_%s\n", (char*)node->children[0]->data); // Aaaand pop the stack to return back to stack alignment if (isStack16ByteAligned) POP(%rcx); printf("# End of function call\n"); } void generate_function_return(node_t *node) { solve_expressions(node->children[0]); ASM(leave); ASM(ret); } void solve_statements(node_t *node, char *operator) { node->type = EXPRESSION; node->data = strdup(operator); solve_expressions(node); writeback_variable(node->children[0], "%rax"); } // Takes in a relation/number node and sets %rax to true if the statement is true void solve_relations(node_t *relation_root) { switch (relation_root->type) { case NUMBER_DATA: // Numberdata is boring, just leave value in %rax, only 1 is interperted as true solve_expressions(relation_root); break; case RELATION: if (relation_root->n_children != 2) { ERRPRT("Relation requires two expressions, one lhs and one rhs\n"); exit(EXIT_FAILURE); } for (int i = 0; i < relation_root->n_children; i++) { solve_expressions(relation_root->children[i]); PUSH(%rax); } POP(%r10); POP(%rax); ASM(cmp, %r10, %rax); ASM(movq, $0, %rax); switch (*(char*)relation_root->data) { case '=': ASM(sete, %al); break; // Set %al (0th byte of %rax) to 1 if lhs == rhs case '>': ASM(setg, %al); break; // Set %al (0th byte of %rax) to 1 if lhs > rhs case '<': ASM(setl, %al); break; // Set %al (0th byte of %rax) to 1 if lhs < rhs } break; } } void generate_if_statement(node_t *node) { uint64_t current_if_seq = if_seq++; COMMENT("Begin IF %ld", current_if_seq); // The realtion is allways in the first part of the IF solve_relations(node->children[0]); // Compare to 0 (False) ASM(cmp, $0, %rax); // If False, jump to either ELSE or ENDIF based on no children in if printf("\tje \t%s%03ld\n", (node->n_children > 2) ? "ELSE" : "ENDIF", current_if_seq); // Then generate body generate_node(node->children[1]); // If else block aswell, add that if (node->n_children > 2) { printf("\tjmp \tENDIF%03ld\n", current_if_seq); printf("ELSE%03ld:\n", current_if_seq); // Generate ELSE body generate_node(node->children[2]); } COMMENT("End IF %ld", current_if_seq); printf("ENDIF%03ld:\n", current_if_seq); } void generate_while_statement(node_t *node) { // Keep local var of which WHILE this is uint64_t current_while_seq = while_seq++; // Also keep the previous closest while // in case a continue happens after // this (and this is inside an another while loop) uint64_t prev_closest_while = closest_while; closest_while = current_while_seq; COMMENT("Begin WHILE %ld", current_while_seq); printf("WHILE%03ld:\n", current_while_seq); // Relation is allways the first entry in a while solve_relations(node->children[0]); // Compare to 0 (False) ASM(cmp, $0, %rax); // If false, then exit while printf("\tje \tENDWHILE%03ld\n", current_while_seq); // Generate body generate_node(node->children[1]); // Restore the previous while closest_while = prev_closest_while; printf("\tjmp \tWHILE%03ld\n", current_while_seq); printf("ENDWHILE%03ld:\n", current_while_seq); } void solve_continue_statement() { COMMENT("Continue to WHILE%03ld", closest_while); printf("\tjmp \tWHILE%03ld\n", closest_while); } static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list) { uint64_t no_symbols = tlhash_size(symbol_table); *symbol_list = malloc(no_symbols * sizeof(symbol_t)); tlhash_values(symbol_table, (void **)*symbol_list ); return no_symbols; } /**Generates the main function with argument parsing and calling of our * main function (first, if no function is named main) * @param first Symbol table entry of our main function */ void generate_main ( symbol_t *first ) { puts("###### Entry point for GAS #####"); puts ( ".globl main" ); puts ( ".section .text" ); puts ( "main:" ); puts ( "\tpushq %rbp" ); // Added this for stack alignment puts ( "\tpushq %rbp" ); puts ( "\tmovq %rsp, %rbp" ); printf ( "\tsubq\t$1,%%rdi\n" ); printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms ); printf ( "\tjne \tABORT\n" ); printf ( "\tcmpq\t$0,%%rdi\n" ); printf ( "\tjz \tSKIP_ARGS\n" ); printf ( "\tmovq\t%%rdi,%%rcx\n" ); printf ( "\taddq\t$%zu, %%rsi\n", 8*first->nparms ); // Modification to mail call, remove this if no params supplied if (first->nparms) { printf ( "PARSE_ARGV:\n" ); printf ( "\tpushq\t%%rcx\n" ); printf ( "\tpushq\t%%rsi\n" ); printf ( "\tmovq\t(%%rsi),%%rdi\n" ); printf ( "\tmovq\t$0,%%rsi\n" ); printf ( "\tmovq\t$10,%%rdx\n" ); printf ( "\tcall\tstrtol\n" ); /* Now a new argument is an integer in rax */ printf ( "\tpopq\t%%rsi\n" ); printf ( "\tpopq\t%%rcx\n" ); printf ( "\tpushq\t%%rax\n" ); printf ( "\tsubq\t$8, %%rsi\n" ); printf ( "\tloop\tPARSE_ARGV\n" ); /* Now the arguments are in order on stack */ for (int arg = 0; arg < MIN(6,first->nparms); arg++) printf ( "\tpopq\t%s\n", record[arg] ); } printf ( "SKIP_ARGS:\n" ); printf ( "\tcall\t_%s\n", first->name ); printf ( "\tjmp \tEND\n" ); printf ( "ABORT:\n" ); printf ( "\tmovq\t$.errout, %%rdi\n" ); printf ( "\tcall\tputs\n" ); printf ( "END:\n" ); puts ( "\tmovq \t%rax, %rdi" ); puts ( "\tcall \texit" ); puts("###### FUNCTIONS FROM VSL BELOW #####"); putchar('\n'); }