Generates all the nesessary code for running

- can compile simple stuff
main
Øyvind Skaaden 2022-04-03 22:41:37 +02:00
parent 1594d045fd
commit 454d642f80
1 changed files with 353 additions and 59 deletions

View File

@ -3,6 +3,9 @@
#define ASM(opcode, args...) puts("\t"#opcode"\t"#args) #define ASM(opcode, args...) puts("\t"#opcode"\t"#args)
#define LABEL(label) printf("_%s:\n", (char*)label) #define LABEL(label) printf("_%s:\n", (char*)label)
#define NO_REG_RECORD 6
#define NO_CALLE_SAVED_REG 10
/**Generate table of strings in a rodata section. */ /**Generate table of strings in a rodata section. */
void generate_stringtable ( void ); void generate_stringtable ( void );
/**Declare global variables in a bss section */ /**Declare global variables in a bss section */
@ -19,56 +22,133 @@ void generate_main ( symbol_t *first );
#define MIN(a,b) (((a)<(b)) ? (a):(b)) #define MIN(a,b) (((a)<(b)) ? (a):(b))
static const char *record[6] = { static const char *record[NO_REG_RECORD] = {
"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9" "%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"
}; };
static const char *calle_saved_reg[NO_CALLE_SAVED_REG] = {
"%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%rsp", "%r8", "%r9", "%r10", "%r11"
};
// Helper funcs for generating different nodes // Helper funcs for generating different nodes
/**
* Generates assembly for printing
*
* @param node print statement node with children to print
*/
static void generate_print(node_t* node); static void generate_print(node_t* node);
/**
* Generate identfier for a variable in memory
*
* @param node identifier we want the addres of
*/
static void generate_var_ident(node_t *node); static void generate_var_ident(node_t *node);
/**
* Main function to calculate and solve the expressions.
* Based on a stack machine. Result is stored on stack.
*
* @param node root node for expression
*/
static void solve_expressions(node_t *node); static void solve_expressions(node_t *node);
/**
* Generates a funciton call
*
* @param node root node for function
*/
static void generate_function_call(node_t *node);
/**
* Generates the return part of a function
*
* @param node node containing the return statement
*/
static void generate_function_return(node_t *node);
/**
* Used for calculating and evaluating the add/sub/mul/div statements.
* Turns the statement into an expression, adds the result of rhs to lhs
* and stores the value back to the indentifier
*
* @param node node to the statement
* @param operator one of the following +, -, *, /
*/
static void solve_statements(node_t *node, char *operator);
/**
* Generate assembly to fetch a variable on stack
*
* @param node node to the variable to be fetched
* @param dest where to put the value
*/
static void fetch_variable(node_t *node, const char* dest);
/**
* Same as fetch_variable, but stores it back to memory.
*
* @param node variable to be stored
* @param src from where should the data come from
*/
static void writeback_variable(node_t *node, char* src);
// Helper func for fetching all symbols in a table // Helper func for fetching all symbols in a table
static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list); static uint64_t fetch_symbols(tlhash_t* symbol_table, symbol_t*** symbol_list);
void void
generate_program ( void ) generate_program ( void )
{ {
/* TODO: Emit assembly instructions for functions, function calls,
* print statements and expressions.
* The provided function 'generate_main' creates a program entry point
* for the function symbol it is given as argument.
*/
// TODO: Implement
// - Generate code for all functions
// - Generate main (function already implemented) by assigning either the
// function named main or the first function of the source file if no
// main exists.
generate_stringtable(); generate_stringtable();
generate_global_variables(); generate_global_variables();
symbol_t **global_list; symbol_t **global_list;
uint64_t no_globals = fetch_symbols(global_names, &global_list); uint64_t no_globals = fetch_symbols(global_names, &global_list);
bool main_generated = false;
uint64_t seq0_index = -1;
for (uint64_t g = 0; g < no_globals; g++)
{
if (global_list[g]->type != SYM_FUNCTION)
continue;
// If the name of the function is main
if (!strcmp(global_list[g]->name, "main"))
{
generate_main(global_list[g]);
main_generated = true;
}
if (!global_list[g]->seq)
seq0_index = g;
}
// If no main was found, use the first function instead.
// That means the function with seq = 0
if (!main_generated)
generate_main(global_list[seq0_index]);
for (uint64_t g = 0; g < no_globals; g++) for (uint64_t g = 0; g < no_globals; g++)
{ {
if (global_list[g]->type == SYM_FUNCTION) if (global_list[g]->type == SYM_FUNCTION)
generate_function(global_list[g]); generate_function(global_list[g]);
} }
free(global_list);
} }
void void
generate_stringtable ( void ) generate_stringtable ( void )
{ {
/* These can be used to emit numbers, strings and a run-time /* These can be used to emit numbers, strings and a run-time
* error msg. from main * error msg. from main
*/ */
puts("# DATA SECTION"); puts("# DATA SECTION");
puts(".data"); puts(".data");
puts(".intout:\t.asciz \"\%ld \""); puts(".intout:\t.asciz \"\%ld \"");
puts(".strout:\t.asciz \"\%s \""); puts(".strout:\t.asciz \"\%s \"");
puts(".errout:\t.asciz \"Wrong number of arguments\""); puts(".errout:\t.asciz \"Wrong number of arguments\"");
for (uint64_t s = 0; s < stringc; s++) for (uint64_t s = 0; s < stringc; s++)
{ {
@ -109,7 +189,7 @@ generate_function ( symbol_t *function )
ASM(movq, %rsp, %rbp); ASM(movq, %rsp, %rbp);
// Push params to stack // Push params to stack
for (int arg = 0; arg < MIN(6,function->nparms); arg++) for (int arg = 0; arg < MIN(NO_REG_RECORD,function->nparms); arg++)
printf("\tpushq\t%s\n", record[arg] ); printf("\tpushq\t%s\n", record[arg] );
// How many local variables are inside function // How many local variables are inside function
@ -137,38 +217,82 @@ generate_node ( node_t *node)
{ {
// TODO: Generate code corresponding to node // TODO: Generate code corresponding to node
// All statements have the same structure.
// [0] is the lhs, needs to be identifier, parser ensures this
// [1] is thr rhs
switch (node->type) switch (node->type)
{ {
case ASSIGNMENT_STATEMENT: case ASSIGNMENT_STATEMENT:
solve_expressions(node->children[1]);
ASM(popq, %rax);
writeback_variable(node->children[0], "%rax");
break; break;
case ADD_STATEMENT: case ADD_STATEMENT:
// The following way is the naive way of doing an assignment
/*
fetch_variable(node->children[0], "%rax");
ASM(pushq, %rax);
solve_expressions(node->children[1]);
ASM(popq, %r10);
ASM(popq, %rax);
ASM(addq, %r10, %rax);
writeback_variable(node->children[0], "%rax");
*/
/* The thing is that add/sub/mul/div assignments
have the same structure as expressions.
We can therefore just say that the assignment is
an expression, but remembering to do the writeback afterwards.
*/
puts("# Add statement");
solve_statements(node, "+");
break; break;
case SUBTRACT_STATEMENT: case SUBTRACT_STATEMENT:
puts("# Subtract statement");
solve_statements(node, "-");
break; break;
case MULTIPLY_STATEMENT: case MULTIPLY_STATEMENT:
puts("# Multiply statement");
solve_statements(node, "*");
break; break;
case DIVIDE_STATEMENT: case DIVIDE_STATEMENT:
puts("# Divide statement");
solve_statements(node, "/");
break; break;
case PRINT_STATEMENT: case PRINT_STATEMENT:
puts("# Print statement");
generate_print(node); generate_print(node);
break; break;
case RETURN_STATEMENT: case RETURN_STATEMENT:
ASM(movq, %rbp, %rsp); puts("# Return statement");
ASM(popq, %rbp); generate_function_return(node);
ASM(ret);
break;
case NULL_STATEMENT:
break; break;
case IF_STATEMENT: case IF_STATEMENT:
case WHILE_STATEMENT: case WHILE_STATEMENT:
/* DO NOTHING YET */ /* DO NOTHING YET */
break; break;
case NULL_STATEMENT:
/* USED IN WHILE/IF */
break;
case DECLARATION_LIST:
/* List of blocks we dont need to traverse */
break;
default: default:
for (int c = 0; c < node->n_children; c++) for (int c = 0; c < node->n_children; c++)
generate_node(node->children[c]); generate_node(node->children[c]);
break; break;
} }
} }
@ -176,19 +300,19 @@ generate_node ( node_t *node)
void void
generate_print(node_t* node) generate_print(node_t* node)
{ {
// Push rdi and rsi to stack incase there are data in them
//ASM(pushq, %rdi);
//ASM(pushq, %rsi);
for (uint64_t p = 0; p < node->n_children; p++) for (uint64_t p = 0; p < node->n_children; p++)
{ {
node_t *curr_print = node->children[p]; node_t *curr_print = node->children[p];
// Push rdi and rsi to stack incase there are data in them
//ASM(pushq, %rdi);
//ASM(pushq, %rsi);
switch (curr_print->type) switch (curr_print->type)
{ {
case EXPRESSION: case EXPRESSION:
solve_expressions(curr_print);
ASM(popq, %rax);
ASM(movq, $.intout, %rdi); ASM(movq, $.intout, %rdi);
// Since this is a stack machine, rax allways
// contain the result of expression
ASM(movq, %rax, %rsi); ASM(movq, %rax, %rsi);
break; break;
@ -199,28 +323,41 @@ generate_print(node_t* node)
case IDENTIFIER_DATA: case IDENTIFIER_DATA:
ASM(movq, $.intout, %rdi); ASM(movq, $.intout, %rdi);
// Need to find the sequence number of var fetch_variable(curr_print, "%rsi");
printf("\tmovq\t");
generate_var_ident(curr_print);
printf(", %%rsi\n");
break; break;
default: default:
break; break;
} }
ASM(call, printf); ASM(call, printf);
// Adds a newline
ASM(movq, $'\n', %rdi);
ASM(call, putchar);
//ASM(popq, %rsi);
//ASM(popq, %rdi);
} }
// Adds a newline
ASM(movq, $'\n', %rdi);
ASM(call, putchar);
//ASM(popq, %rsi);
//ASM(popq, %rdi);
} }
// This will put the value of var in node in dest
void
fetch_variable(node_t *node, const char* dest)
{
printf("\tmovq\t");
generate_var_ident(node);
printf(", %s\t\t# Fetched: %s\n", dest, node->entry->name);
}
// This will put the value in dest to the var in node
void
writeback_variable(node_t *node, char* src)
{
printf("\tmovq\t%s,", src);
generate_var_ident(node);
printf("\t\t# Writeback: %s\n", node->entry->name);
}
void void
generate_var_ident(node_t *node) generate_var_ident(node_t *node)
{ {
@ -237,7 +374,7 @@ generate_var_ident(node_t *node)
printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1)); printf("%ld(%%rbp)", -8 * (ident_sym->seq + 1));
else else
// This requires that the parameters on // This requires that the parameters on
// stack is in reversed order // stack is in reversed order... easier to implement
printf("%ld(%%rbp)", 8 * (ident_sym->seq - 6 + 1 )); printf("%ld(%%rbp)", 8 * (ident_sym->seq - 6 + 1 ));
break; break;
@ -248,10 +385,163 @@ generate_var_ident(node_t *node)
} }
// This should allways push the result to stack
void void
solve_expressions(node_t *node) solve_expressions(node_t *node)
{ {
if (node->data)
{ // Check if the expression is a function call
bool is_function_call = !strcmp(node->data, "function_call");
if (is_function_call)
{
generate_function_call(node);
return;
}
}
switch (node->n_children)
{
case 0:
switch (node->type)
{
case IDENTIFIER_DATA:
fetch_variable(node, "%rax");
ASM(pushq, %rax);
break;
case NUMBER_DATA:
printf("\tmovq\t$%ld,%%rax\n",*(int64_t*)node->data);
ASM(pushq, %rax);
break;
}
break;
case 1:
solve_expressions(node->children[0]);
ASM(popq, %rax);
switch (*(char*)node->data)
{
case '-':
ASM(negq, %rax);
break;
case '~':
ASM(notq, %rax);
break;
}
ASM(pushq, %rax);
break;
case 2:
// First fetch lhs of expr and then rhs
// Push results on stack
for (int i = 0; i < 2; i++)
solve_expressions(node->children[i]);
// Put rhs in %r10
ASM(popq, %r10);
// put lhs in %rax
ASM(popq, %rax);
switch (*(char*)node->data)
{
/* Assignments */
case '|': ASM(orq, %r10, %rax); break; // Bitwise or of %rax and %r10
case '^': ASM(xorq, %r10, %rax); break; // Bitwise xor of %rax and %r10
case '&': ASM(andq, %r10, %rax); break; // Bitwise and of %rax and %r10
case '+': ASM(addq, %r10, %rax); break; // Add %rax and %r10
case '-': ASM(subq, %r10, %rax); break; // Subtract %r10 from %rax
case '*': ASM(imulq, %r10); break; // Mulitply %rax with %r10
case '/':
ASM(cqto); // Convert rax to octaword, %rdx:%rax
ASM(idivq, %r10); // Divide %rdx:%rax by %r10
break;
}
// Push result to stack.
ASM(pushq, %rax);
break;
}
}
void
generate_function_call(node_t *node)
{
printf("# Function call\n");
node_t *arg_list = node->children[1];
if (arg_list->n_children)
arg_list = arg_list->children[0];
for (int arg = 0; arg < MIN(NO_REG_RECORD, arg_list->n_children); arg++)
{
if (arg_list->children[arg]->type == NUMBER_DATA)
printf("\tmovq\t$%ld, %s\n",
*(int64_t*)arg_list->children[arg]->data,
record[arg]
);
else
fetch_variable(arg_list->children[arg], record[arg]);
}
if (arg_list->n_children > NO_REG_RECORD)
{
for (int arg = arg_list->n_children - 1; arg >= NO_REG_RECORD; arg--)
{
if (arg_list->children[arg]->type == NUMBER_DATA)
printf("\tpushq\t$%ld\n",
*(int64_t*)arg_list->children[arg]->data
);
else
{
printf("\tpushq\t");
generate_var_ident(arg_list->children[arg]);
putchar('\n');
}
}
if (arg_list->n_children % 2)
ASM(pushq, $0);
}
printf("\tcall\t_%s\n", (char*)node->children[0]->data);
ASM(pushq, %rax);
printf("# End of function call\n");
/*
for (int reg = 0; reg < NO_CALLE_SAVED_REG; reg++)
printf("\tpushq\t%s \t\t# Pushing %s to stack\n",
calle_saved_reg[reg],
calle_saved_reg[reg]
);
for (int reg = NO_CALLE_SAVED_REG; reg > 0; reg--)
printf("\tpopq\t%s \t\t# Poping %s from stack\n",
calle_saved_reg[reg],
calle_saved_reg[reg]
);*/
}
void
generate_function_return(node_t *node)
{
solve_expressions(node->children[0]);
ASM(popq, %rax);
ASM(leave);
ASM(ret);
}
void
solve_statements(node_t *node, char *operator)
{
node->type = EXPRESSION;
node->data = strdup(operator);
solve_expressions(node);
ASM(popq, %rax);
writeback_variable(node->children[0], "%rax");
} }
/**Generates the main function with argument parsing and calling of our /**Generates the main function with argument parsing and calling of our
@ -260,6 +550,7 @@ solve_expressions(node_t *node)
void void
generate_main ( symbol_t *first ) generate_main ( symbol_t *first )
{ {
puts("###### Entry point for GAS #####");
puts ( ".globl main" ); puts ( ".globl main" );
puts ( ".section .text" ); puts ( ".section .text" );
puts ( "main:" ); puts ( "main:" );
@ -268,15 +559,15 @@ generate_main ( symbol_t *first )
printf ( "\tsubq\t$1,%%rdi\n" ); printf ( "\tsubq\t$1,%%rdi\n" );
printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms ); printf ( "\tcmpq\t$%zu,%%rdi\n", first->nparms );
printf ( "\tjne\tABORT\n" ); printf ( "\tjne \tABORT\n" );
printf ( "\tcmpq\t$0,%%rdi\n" ); printf ( "\tcmpq\t$0,%%rdi\n" );
printf ( "\tjz\tSKIP_ARGS\n" ); printf ( "\tjz \tSKIP_ARGS\n" );
printf ( "\tmovq\t%%rdi,%%rcx\n" ); printf ( "\tmovq\t%%rdi,%%rcx\n" );
printf ( "\taddq $%zu, %%rsi\n", 8*first->nparms ); printf ( "\taddq\t$%zu, %%rsi\n", 8*first->nparms );
printf ( "PARSE_ARGV:\n" ); printf ( "PARSE_ARGV:\n" );
printf ( "\tpushq %%rcx\n" ); printf ( "\tpushq\t%%rcx\n" );
printf ( "\tpushq %%rsi\n" ); printf ( "\tpushq\t%%rsi\n" );
printf ( "\tmovq\t(%%rsi),%%rdi\n" ); printf ( "\tmovq\t(%%rsi),%%rdi\n" );
printf ( "\tmovq\t$0,%%rsi\n" ); printf ( "\tmovq\t$0,%%rsi\n" );
@ -285,11 +576,11 @@ generate_main ( symbol_t *first )
/* Now a new argument is an integer in rax */ /* Now a new argument is an integer in rax */
printf ( "\tpopq %%rsi\n" ); printf ( "\tpopq\t%%rsi\n" );
printf ( "\tpopq %%rcx\n" ); printf ( "\tpopq\t%%rcx\n" );
printf ( "\tpushq %%rax\n" ); printf ( "\tpushq\t%%rax\n" );
printf ( "\tsubq $8, %%rsi\n" ); printf ( "\tsubq\t$8, %%rsi\n" );
printf ( "\tloop PARSE_ARGV\n" ); printf ( "\tloop\tPARSE_ARGV\n" );
/* Now the arguments are in order on stack */ /* Now the arguments are in order on stack */
for (int arg = 0; arg < MIN(6,first->nparms); arg++) for (int arg = 0; arg < MIN(6,first->nparms); arg++)
@ -297,14 +588,17 @@ generate_main ( symbol_t *first )
printf ( "SKIP_ARGS:\n" ); printf ( "SKIP_ARGS:\n" );
printf ( "\tcall\t_%s\n", first->name ); printf ( "\tcall\t_%s\n", first->name );
printf ( "\tjmp\tEND\n" ); printf ( "\tjmp \tEND\n" );
printf ( "ABORT:\n" ); printf ( "ABORT:\n" );
printf ( "\tmovq\t$.errout, %%rdi\n" ); printf ( "\tmovq\t$.errout, %%rdi\n" );
printf ( "\tcall puts\n" ); printf ( "\tcall\tputs\n" );
printf ( "END:\n" ); printf ( "END:\n" );
puts ( "\tmovq %rax, %rdi" ); puts ( "\tmovq \t%rax, %rdi" );
puts ( "\tcall exit" ); puts ( "\tcall \texit" );
puts("###### FUNCTIONS FROM VSL BELOW #####");
putchar('\n');
} }
static uint64_t static uint64_t