#include #define ERRPRT(format, args...) {fprintf(stderr, "[ERROR] "); fprintf(stderr ,format, ##args);} // Externally visible, for the generator extern tlhash_t *global_names; extern char **string_list; extern size_t n_string_list, stringc; // Functions from the skeleton static uint64_t find_globals ( void ); static void bind_names ( symbol_t *function, node_t *root ); // Helper functions, see description in the definition static void print_global_tree(symbol_t* global); static void print_string_list(void); static void destroy_global(symbol_t* global); static void push_scope(void); static void pop_scope(void); static void insert_symbol(tlhash_t *hash_table, symbol_t* symbol); static void insert_local_to_scope(symbol_t *local); static void insert_local_to_func(symbol_t *function, symbol_t *root); static void insert_local_var(symbol_t *function, node_t *root); static void collect_string(node_t *root); static symbol_t* lookup_var(symbol_t *function, char* var); // Local "global" variables static const char *symbol_names[4] = { "GLOBAL_VAR", "FUNCTION", "PARAMETER", "LOCAL_VAR" }; static uint64_t no_scopes, cur_scope_depth; static tlhash_t **scopes; /** * Gather information and create a symbol table. * * Used in vslc.c */ void create_symbol_table ( void ) { // Initialize string array n_string_list = DEFAULT_STRING_LIST_SIZE; string_list = malloc(n_string_list * sizeof(char*)); stringc = 0; // Initialize scope array no_scopes = DEFAULT_NO_SCOPES; scopes = malloc(no_scopes * sizeof(tlhash_t)); cur_scope_depth = 0; // Traverse the root node for globals uint64_t no_globals = find_globals(); // Prepare a temp list of globals and fetch all globals symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); tlhash_values(global_names, (void **)global_list ); /* Iterate over the temporary list, bind names in each function */ for (uint64_t g = 0; g < no_globals; g++ ) { if (global_list[g]->type == SYM_FUNCTION) bind_names(global_list[g], global_list[g]->node); } // Free the temp list free(global_list); } /** * Prints the symbol table and the string array * * Used in vslc.c */ void print_symbol_table ( void ) { /* Get the number of symbols, size up a temporary list and fill it */ uint64_t no_globals = tlhash_size(global_names); symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); tlhash_values(global_names, (void **)global_list ); /* Iterate over the temporary list, printing entries */ for (uint64_t g = 0; g < no_globals; g++ ) // Print the tree structure for each global print_global_tree(global_list[g]); free(global_list); // Print strings print_string_list(); } /** * Prins the tree of a global * * @param global pointer to the global to be printed */ static void print_global_tree(symbol_t* global) { // Check if null ptr if (!global) return; // Print global root printf("─%s: %-16s [nparams=%2ld, seq=%2ld, node=%p]\n", symbol_names[global->type], global->name, global->nparms, global->seq, global->node ); // If the global does not have params or locals, return if (!global->nparms && !global->locals) {putchar('\n');return;} // Need to fetch the whole size, since nparams // only count the params, not all locals uint64_t no_locals = tlhash_size(global->locals); symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t)); tlhash_values(global->locals, (void **)locals_list ); // Go through all locals for (int l = 0; l < no_locals; l++) { // Do some simple sorting, so seq num is in order for (int ll = 0; ll < no_locals; ll++) { if (locals_list[ll]->seq == l) { printf(" %s─[%s]: %-22s\t[seq=%2ld, node=%p]\n", (l < (no_locals - 1)) ? "├" : "└", symbol_names[locals_list[ll]->type], locals_list[ll]->name, locals_list[ll]->seq, locals_list[ll]->node ); break; } } } putchar('\n'); free(locals_list); } /** * Prints the array of strings * */ static void print_string_list(void) { // Print out all the collected strings printf("─STRINGS [%ld]\n", stringc); for (uint64_t i = 0; i < stringc; i++) printf(" %s─[%ld]: %s\n", (i < (stringc - 1)) ? "├" : "└", i, string_list[i] ); } /** * Destroys all the dynamicly allocated memory and all the hash tables. * Frees up the array of strings as well. * * Used in vslc.c */ void destroy_symbol_table ( void ) { // FREE STRINGS // Free all strings that are kept in the array for (uint64_t c = 0; c < stringc; c++) free(string_list[c]); // Free the actual list free(string_list); // FREE SCOPES // At the end of program, all scopes have to be popped // Therefore only free the list free(scopes); // FREE GLOBAL NAMES if (!global_names) return; // Fetch list of globals uint64_t no_globals = tlhash_size(global_names); symbol_t **global_list = malloc(no_globals * sizeof(symbol_t)); tlhash_values(global_names, (void **)global_list ); // Destroy all global elements for (uint64_t g = 0; g < no_globals; g++) destroy_global(global_list[g]); // Destory the global hash table tlhash_finalize(global_names); // Free the global hash table free(global_names); // Free the temp list free(global_list); } /** * Destroys the supplied global symbol by * finalizing each of the local tables * * @param global pointer to the global symbol to be destroyed */ static void destroy_global(symbol_t* global) { if (!global) return; if (!global->locals) { free(global); return; } uint64_t no_locals = tlhash_size(global->locals); symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t)); tlhash_values(global->locals, (void **)locals_list ); for (int l = 0; l < no_locals; l++) free(locals_list[l]); tlhash_finalize(global->locals); free(global->locals); free(global); free(locals_list); } /** * Goes trough the root node and finds all global variables and functions * * @return Returns the number of globals found (functions + variables) */ static uint64_t find_globals ( void ) { tlhash_init(global_names = malloc(sizeof(tlhash_t)), GLOBAL_BUCKET_SIZE); uint64_t no_functions = 0, no_global_vars = 0; node_t *global_list = root; // Check if not nullptr if (!global_list) return 0; symbol_t* global_symbol; for (uint64_t global_i = 0; global_i < global_list->n_children; global_i++) { node_t *current_global = global_list->children[global_i]; switch (current_global->type) { case VARIABLE_LIST: // Go through the variable list and get all the global variables for (uint64_t var_i = 0; var_i < current_global->n_children; var_i++) { global_symbol = malloc(sizeof(symbol_t)); *global_symbol = (symbol_t){ .type = SYM_GLOBAL_VAR, .name = current_global->children[var_i]->data, .node = current_global->children[var_i], .seq = 0, .nparms = 0, .locals = NULL }; insert_symbol(global_names, global_symbol); no_global_vars++; } break; case FUNCTION: node_t *function = current_global; // Function node allways have the same structure, // [0] are the identifier // [1] are the variable list, within a paramerer_list // [2] are the actual block if (!function->children[0]) break; // Create the function symbol global_symbol = malloc(sizeof(symbol_t)); *global_symbol = (symbol_t){ .type = SYM_FUNCTION, .name = current_global->children[0]->data, .node = current_global->children[2], .seq = no_functions++, .nparms = 0, .locals = malloc(sizeof(tlhash_t)) }; // Initialize the local variable table tlhash_init(global_symbol->locals, LOCAL_BUCKET_SIZE); // Insert the pointer to the newly created symbol insert_symbol(global_names, global_symbol); // If there are no parameters in function, break. if (!current_global->children[1]->n_children) break; // Find all params and insert into hash table in global_symbol symbol_t *param_sym; node_t *param_list = current_global->children[1]->children[0]; global_symbol->nparms = param_list->n_children; for (uint64_t param_i = 0; param_i < param_list->n_children; param_i++) { param_sym = malloc(sizeof(symbol_t)); *param_sym = (symbol_t){ .type = SYM_PARAMETER, .name = param_list->children[param_i]->data, .node = param_list->children[param_i], .seq = param_i, .nparms = 0, .locals = NULL }; insert_symbol(global_symbol->locals, param_sym); } break; } } return no_functions + no_global_vars; } /** * Inserts a symbol into a hash table, key is defined in the name field in the symbol supplied. * * @param hash_table pointer to the hash table the symbol is inserted into * @param symbol pointer to the symbol to be inserted */ void insert_symbol(tlhash_t *hash_table, symbol_t* symbol) { tlhash_insert(hash_table, symbol->name, strlen(symbol->name), symbol); } /** * @brief Traverse a node root, and find all variables and strings * * @param function pointer to the current function * @param root pointer to the root node */ static void bind_names ( symbol_t *function, node_t *root ) { // NULL check if (!function) return; if (!root) return; // Can't declare variables inside switch unless // it is in a new scope node_t *declarations; // We want do top to bottom traverse, so do not // call recusivly unless we need to go deeper switch (root->type) { // If new BLOCK start, push the scope and recurse from here. case BLOCK: push_scope(); for (uint64_t i = 0; i < root->n_children; i++) bind_names(function, root->children[i]); pop_scope(); break; // If DECLARATION_LIST, find all the identifiers // and insert local into scope and function case DECLARATION_LIST: if (!root->children[0]) break; declarations = root->children[0]; for (uint64_t i = 0; i < declarations->n_children; i++) // Insert each of the local variables in the declaration insert_local_var(function, declarations->children[i]); break; // If IDENTIFIER_DATA, look up the identifier in all the scopes. // If not found (NULL), crash the compiler with a somewhat helpful message. case IDENTIFIER_DATA: if (!root->data) break; if (!(root->entry = lookup_var(function, root->data))) { ERRPRT("Could not find %s in scope!\n", (char*)root->data) exit(EXIT_FAILURE); } break; // If STRING_DATA, collect the string and point the // data in the corresponding node to the array index case STRING_DATA: collect_string(root); break; // If none of the above, go deeper if possible. default: for (uint64_t i = 0; i < root->n_children; i++) bind_names(function, root->children[i]); break; } } /** * Pushes a new hash table to the scope stack. * * Increases the size of the stack if too small. * */ static void push_scope(void) { // Allocate memory for the hash table and initialize scopes[cur_scope_depth] = malloc(sizeof(tlhash_t)); tlhash_init(scopes[cur_scope_depth++], LOCAL_BUCKET_SIZE); // Grow the amount of scopes if not enough if (cur_scope_depth >= no_scopes) { no_scopes *= 2; tlhash_t **new_scopes = realloc(scopes, no_scopes * sizeof(tlhash_t)); if (!new_scopes) { ERRPRT("Could not realloc scopes!\n"); exit(EXIT_FAILURE); } scopes = new_scopes; } } /** * Pops the dynamicy allocated hash table for the current scope depth * */ static void pop_scope(void) { tlhash_finalize(scopes[--cur_scope_depth]); free(scopes[cur_scope_depth]); } /** * Allocates and inserts a local symbol into * the scope stack and into the function * * @param function pointer to the current function * @param root pointer to the root node for the symbol */ static void insert_local_var(symbol_t *function, node_t *root) { // Null ptr check if (!root->data) return; // Get the sequence num, is the size size_t sequence = tlhash_size(function->locals); symbol_t *variable = malloc(sizeof(symbol_t)); *variable = (symbol_t){ .type = SYM_LOCAL_VAR, .name = root->data, .node = root, .seq = sequence, //! Use sequence as name in var list of function, strictly growing .nparms = 0, .locals = NULL }; insert_local_to_scope(variable); insert_local_to_func(function, variable); } /** * Inserts a symbol to the top most scope in stack * * @param local pointer to the local to be inserted */ static void insert_local_to_scope(symbol_t *local) { insert_symbol(scopes[cur_scope_depth - 1], local); } /** * Insert local symbol to the functions table of local variables * uses the seq num as key as this is strictly growing * * @param function pointer to the function to insert the symbol * @param local pointer to the symbol to be inserted in the table */ void insert_local_to_func(symbol_t *function, symbol_t *local) { tlhash_insert( function->locals, //! Insert local to the function var table &local->seq, //! The key is a number, unique, strictly growing sizeof(local->seq), //! Size of key local //! The local symbol ); } /** * Collects strings to the string array and * points the data in the associated node * to the array position * * @param root pointer to the root node of the string */ static void collect_string(node_t *root) { // Null ptr check if (!root->data) return; // Get the string and allocate room for array index of string string_list[stringc] = root->data; root->data = malloc(sizeof(size_t)); // Set the data ptr *((size_t*)root->data) = stringc++; // Grow string array if nessecary if (stringc >= n_string_list) { n_string_list *= 2; char **new_string_list = realloc(string_list, n_string_list * sizeof(char*)); if (!new_string_list) { ERRPRT("Could not realloc string list!\n"); exit(EXIT_FAILURE); } string_list = new_string_list; } } /** * Looks up a variable identifier in all the scopes. * Start with the scopes, then the parameters and * the the globals * * @param function pointer to the function * @param var identifier to the variable * @return Returns the pointer to the "closest" matched identifier. NULL if not found. */ static symbol_t* lookup_var(symbol_t *function, char* var) { // Symbol to store the stymbol to be found symbol_t* symbol = NULL; // Result stores the result of the hash lookups int result; // Try the local scopes first for (int64_t d = cur_scope_depth - 1; d >= 0; d--) { result = tlhash_lookup(scopes[d], var, strlen(var), (void**)&symbol); if (result == TLHASH_SUCCESS) return symbol; } // Then move to parameters result = tlhash_lookup(function->locals, var, strlen(var), (void**)&symbol); if (result == TLHASH_SUCCESS) return symbol; // Last try global parameters result = tlhash_lookup(global_names, var, strlen(var), (void**)&symbol); if (result == TLHASH_SUCCESS) return symbol; // If nothing is found, return NULL return NULL; }