Moved my files into project

main
Øyvind Skaaden 2022-03-31 21:33:52 +02:00
parent 9f31718671
commit 71f757ecdc
8 changed files with 1125 additions and 532 deletions

14
exercises/05/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
# Buildfiles
*.o
vslc/src/vslc
parser.c
scanner.c
y.tab.h
# Submission
*.tar.xz
# VSL treefiles
*.ast
*.sast
*.sym

View File

@ -10,21 +10,42 @@ typedef struct n {
struct n **children;
} node_t;
// Export the initializer function, it is needed by the parser
void node_init (
node_t *nd, node_index_t type, void *data, uint64_t n_children, ...
/**Export the initializer function, it is needed by the parser
* @param *nd node to initialize
* @param type type of node (see nodetype.h)
* @param *data associated data. Declared void to allow any type
* @param n_children number of children
* @param ... variable argument list of child nodes (node_t *)
*
* @return Pointer to the initialized node
* */
node_t* node_init (
node_t* nd,
node_index_t type,
void* data,
uint64_t n_children,
...
);
typedef enum {
SYM_GLOBAL_VAR, SYM_FUNCTION, SYM_PARAMETER, SYM_LOCAL_VAR
SYM_GLOBAL_VAR,
SYM_FUNCTION,
SYM_PARAMETER,
SYM_LOCAL_VAR
} symtype_t;
typedef struct s {
char *name;
symtype_t type;
node_t *node;
size_t seq;
size_t nparms;
tlhash_t *locals;
char* name;
symtype_t type;
node_t* node;
size_t seq;
size_t nparms;
tlhash_t* locals;
} symbol_t;
#endif
#define GLOBAL_BUCKET_SIZE 32
#define LOCAL_BUCKET_SIZE 16
#define DEFAULT_STRING_LIST_SIZE 8
#define DEFAULT_NO_SCOPES 1

View File

@ -25,6 +25,7 @@ int yyerror ( const char *error );
/* These are defined in the parser generated by bison */
extern int yylineno;
extern int yylex ( void );
extern int yylex_destroy( void );
extern char yytext[];
/* Global state */

View File

@ -1,336 +1,602 @@
#include <vslc.h>
#define ERRPRT(format, args...) {fprintf(stderr, "[ERROR] "); fprintf(stderr ,format, ##args);}
// Externally visible, for the generator
extern tlhash_t *global_names;
extern char **string_list;
extern size_t n_string_list, stringc;
// Functions from the skeleton
// Implementation choices, only relevant internally
static void find_globals ( void );
static uint64_t find_globals ( void );
static void bind_names ( symbol_t *function, node_t *root );
static void print_symbols ( tlhash_t *table );
static void destroy_symtab ( void );
// Internal details of name resolution
static size_t n_scopes = 1, scope_depth = 0;
static tlhash_t **scopes = NULL;
// Helper functions, see description in the definition
/* External interface */
static void print_global_tree(symbol_t* global);
static void print_string_list(void);
static void destroy_global(symbol_t* global);
static void push_scope(void);
static void pop_scope(void);
static void insert_symbol(tlhash_t *hash_table, symbol_t* symbol);
static void insert_local_to_scope(symbol_t *local);
static void insert_local_to_func(symbol_t *function, symbol_t *root);
static void insert_local_var(symbol_t *function, node_t *root);
static void collect_string(node_t *root);
static symbol_t* lookup_var(symbol_t *function, char* var);
// Local "global" variables
static const char *symbol_names[4] = {
"GLOBAL_VAR",
"FUNCTION",
"PARAMETER",
"LOCAL_VAR"
};
static uint64_t no_scopes, cur_scope_depth;
static tlhash_t **scopes;
/**
* Gather information and create a symbol table.
*
* Used in vslc.c
*/
void
create_symbol_table ( void )
{
find_globals();
size_t n_globals = tlhash_size ( global_names );
symbol_t *global_list[n_globals];
tlhash_values ( global_names, (void **)&global_list );
for ( size_t i=0; i<n_globals; i++ )
if ( global_list[i]->type == SYM_FUNCTION )
bind_names ( global_list[i], global_list[i]->node );
// Initialize string array
n_string_list = DEFAULT_STRING_LIST_SIZE;
string_list = malloc(n_string_list * sizeof(char*));
stringc = 0;
// Initialize scope array
no_scopes = DEFAULT_NO_SCOPES;
scopes = malloc(no_scopes * sizeof(tlhash_t));
cur_scope_depth = 0;
// Traverse the root node for globals
uint64_t no_globals = find_globals();
// Prepare a temp list of globals and fetch all globals
symbol_t **global_list = malloc(no_globals * sizeof(symbol_t));
tlhash_values(global_names, (void **)global_list );
/* Iterate over the temporary list, bind names in each function */
for (uint64_t g = 0; g < no_globals; g++ )
{
if (global_list[g]->type == SYM_FUNCTION)
bind_names(global_list[g], global_list[g]->node);
}
// Free the temp list
free(global_list);
}
/**
* Prints the symbol table and the string array
*
* Used in vslc.c
*/
void
print_symbol_table ( void )
{
print_symbols ( global_names );
/* Get the number of symbols, size up a temporary list and fill it */
uint64_t no_globals = tlhash_size(global_names);
symbol_t **global_list = malloc(no_globals * sizeof(symbol_t));
tlhash_values(global_names, (void **)global_list );
/* Iterate over the temporary list, printing entries */
for (uint64_t g = 0; g < no_globals; g++ )
// Print the tree structure for each global
print_global_tree(global_list[g]);
free(global_list);
// Print strings
print_string_list();
}
/**
* Prins the tree of a global
*
* @param global pointer to the global to be printed
*/
static void
print_global_tree(symbol_t* global)
{
// Check if null ptr
if (!global)
return;
// Print global root
printf("─%s: %-16s [nparams=%2ld, seq=%2ld, node=%p]\n",
symbol_names[global->type],
global->name,
global->nparms,
global->seq,
global->node
);
// If the global does not have params or locals, return
if (!global->nparms && !global->locals)
{putchar('\n');return;}
// Need to fetch the whole size, since nparams
// only count the params, not all locals
uint64_t no_locals = tlhash_size(global->locals);
symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t));
tlhash_values(global->locals, (void **)locals_list );
// Go through all locals
for (int l = 0; l < no_locals; l++)
{ // Do some simple sorting, so seq num is in order
for (int ll = 0; ll < no_locals; ll++)
{
if (locals_list[ll]->seq == l)
{
printf(" %s─[%s]: %-22s\t[seq=%2ld, node=%p]\n",
(l < (no_locals - 1)) ? "" : "",
symbol_names[locals_list[ll]->type],
locals_list[ll]->name,
locals_list[ll]->seq,
locals_list[ll]->node
);
break;
}
}
}
putchar('\n');
free(locals_list);
}
/**
* Prints the array of strings
*
*/
static void
print_string_list(void)
{ // Print out all the collected strings
printf("─STRINGS [%ld]\n", stringc);
for (uint64_t i = 0; i < stringc; i++)
printf(" %s─[%ld]: %s\n",
(i < (stringc - 1)) ? "" : "",
i,
string_list[i]
);
}
/**
* Destroys all the dynamicly allocated memory and all the hash tables.
* Frees up the array of strings as well.
*
* Used in vslc.c
*/
void
destroy_symbol_table ( void )
{
destroy_symtab();
}
// FREE STRINGS
// Free all strings that are kept in the array
for (uint64_t c = 0; c < stringc; c++)
free(string_list[c]);
// Free the actual list
free(string_list);
/* Internal matters */
// FREE SCOPES
// At the end of program, all scopes have to be popped
// Therefore only free the list
free(scopes);
static void
print_symbols ( tlhash_t *table )
{
if ( table == NULL )
// FREE GLOBAL NAMES
if (!global_names)
return;
size_t n_entries = tlhash_size(table);
symbol_t *entry_list[n_entries];
tlhash_values ( table, (void **)&entry_list );
for ( size_t e=0; e<n_entries; e++ )
{
switch ( entry_list[e]->type )
{
case SYM_FUNCTION:
fprintf ( stderr, "function: %s\n", entry_list[e]->name );
if ( entry_list[e]->type == SYM_FUNCTION )
print_symbols ( entry_list[e]->locals );
break;
case SYM_GLOBAL_VAR:
fprintf ( stderr, "global var: %s\n", entry_list[e]->name );
break;
case SYM_PARAMETER:
fprintf ( stderr, "parameter: %s\n", entry_list[e]->name );
break;
case SYM_LOCAL_VAR:
fprintf ( stderr, "local var: %s\n", entry_list[e]->name );
break;
default:
/* This should never happen if all symbols have correct type */
fprintf ( stderr,
"** Unknown symbol: %s\n", entry_list[e]->name
);
break;
}
}
// Fetch list of globals
uint64_t no_globals = tlhash_size(global_names);
symbol_t **global_list = malloc(no_globals * sizeof(symbol_t));
tlhash_values(global_names, (void **)global_list );
// Destroy all global elements
for (uint64_t g = 0; g < no_globals; g++)
destroy_global(global_list[g]);
// Destory the global hash table
tlhash_finalize(global_names);
// Free the global hash table
free(global_names);
// Free the temp list
free(global_list);
}
static void
add_global ( symbol_t *symbol )
/**
* Destroys the supplied global symbol by
* finalizing each of the local tables
*
* @param global pointer to the global symbol to be destroyed
*/
static void
destroy_global(symbol_t* global)
{
tlhash_insert (
global_names, symbol->name, strlen(symbol->name), symbol
);
if (!global)
return;
if (!global->locals)
{
free(global);
return;
}
uint64_t no_locals = tlhash_size(global->locals);
symbol_t **locals_list = malloc(no_locals * sizeof(symbol_t));
tlhash_values(global->locals, (void **)locals_list );
for (int l = 0; l < no_locals; l++)
free(locals_list[l]);
tlhash_finalize(global->locals);
free(global->locals);
free(global);
free(locals_list);
}
static void
/**
* Goes trough the root node and finds all global variables and functions
*
* @return Returns the number of globals found (functions + variables)
*/
static uint64_t
find_globals ( void )
{
global_names = malloc ( sizeof(tlhash_t) );
tlhash_init ( global_names, 32 );
string_list = malloc ( n_string_list * sizeof(char * ) );
size_t n_functions = 0;
tlhash_init(global_names = malloc(sizeof(tlhash_t)), GLOBAL_BUCKET_SIZE);
node_t *global_list = root->children[0];
for ( uint64_t g=0; g<global_list->n_children; g++ )
uint64_t no_functions = 0, no_global_vars = 0;
node_t *global_list = root;
// Check if not nullptr
if (!global_list)
return 0;
symbol_t* global_symbol;
for (uint64_t global_i = 0; global_i < global_list->n_children; global_i++)
{
node_t *global = global_list->children[g], *namelist;
symbol_t *symbol;
switch ( global->type )
node_t *current_global = global_list->children[global_i];
switch (current_global->type)
{
case FUNCTION:
symbol = malloc ( sizeof(symbol_t) );
*symbol = (symbol_t) {
.type = SYM_FUNCTION,
.name = global->children[0]->data,
.node = global->children[2],
.seq = n_functions,
.nparms = 0,
.locals = malloc ( sizeof(tlhash_t) )
};
n_functions++;
case VARIABLE_LIST:
tlhash_init ( symbol->locals, 32 );
if ( global->children[1] != NULL )
{
symbol->nparms = global->children[1]->n_children;
for ( int p=0; p<symbol->nparms; p++ )
{
node_t *param = global->children[1]->children[p];
symbol_t *psym = malloc ( sizeof(symbol_t) );
*psym = (symbol_t) {
.type = SYM_PARAMETER,
.name = param->data,
.node = NULL,
.seq = p,
.nparms = 0,
.locals = NULL
};
tlhash_insert (
symbol->locals, psym->name, strlen(psym->name), psym
);
}
}
add_global ( symbol );
break;
case DECLARATION:
namelist = global->children[0];
for ( uint64_t d=0; d<namelist->n_children; d++ )
{
symbol = malloc ( sizeof(symbol_t) );
*symbol = (symbol_t) {
.type = SYM_GLOBAL_VAR,
.name = namelist->children[d]->data,
.node = NULL,
.seq = 0,
.nparms = 0,
.locals = NULL
};
add_global(symbol);
}
break;
}
}
}
static void
push_scope ( void )
{
if ( scopes == NULL )
scopes = malloc ( n_scopes * sizeof(tlhash_t *) );
tlhash_t *new_scope = malloc ( sizeof(tlhash_t) );
tlhash_init ( new_scope, 32 );
scopes[scope_depth] = new_scope;
scope_depth += 1;
if ( scope_depth >= n_scopes )
{
n_scopes *= 2;
scopes = realloc ( scopes, n_scopes*sizeof(tlhash_t **) );
}
}
static void
add_local ( symbol_t *local )
{
tlhash_insert (
scopes[scope_depth-1],local->name,strlen(local->name),local
);
}
static symbol_t *
lookup_local ( char *name )
{
symbol_t *result = NULL;
size_t depth = scope_depth;
while ( result == NULL && depth > 0 )
{
depth -= 1;
tlhash_lookup ( scopes[depth], name, strlen(name), (void **)&result );
}
return result;
}
static void
pop_scope ( void )
{
scope_depth -= 1;
tlhash_finalize ( scopes[scope_depth] );
free ( scopes[scope_depth] );
scopes[scope_depth] = NULL;
}
static void
add_string ( node_t *string )
{
string_list[stringc] = string->data;
string->data = malloc ( sizeof(size_t) );
*((size_t *)string->data) = stringc;
stringc++;
if ( stringc >= n_string_list )
{
n_string_list *= 2;
string_list = realloc ( string_list, n_string_list * sizeof(char *) );
}
}
static void
bind_names ( symbol_t *function, node_t *root )
{
if ( root == NULL )
return;
else switch ( root->type )
{
node_t *namelist;
symbol_t *entry;
case BLOCK:
push_scope();
for ( size_t c=0; c<root->n_children; c++ )
bind_names ( function, root->children[c] );
pop_scope();
break;
case DECLARATION:
namelist = root->children[0];
for ( uint64_t d=0; d<namelist->n_children; d++ )
// Go through the variable list and get all the global variables
for (uint64_t var_i = 0; var_i < current_global->n_children; var_i++)
{
node_t *varname = namelist->children[d];
size_t local_num =
tlhash_size(function->locals) - function->nparms;
symbol_t *symbol = malloc ( sizeof(symbol_t) );
*symbol = (symbol_t) {
.type = SYM_LOCAL_VAR,
.name = varname->data,
.node = NULL,
.seq = local_num,
global_symbol = malloc(sizeof(symbol_t));
*global_symbol = (symbol_t){
.type = SYM_GLOBAL_VAR,
.name = current_global->children[var_i]->data,
.node = current_global->children[var_i],
.seq = 0,
.nparms = 0,
.locals = NULL
};
tlhash_insert (
function->locals, &local_num, sizeof(size_t), symbol
);
add_local ( symbol );
insert_symbol(global_names, global_symbol);
no_global_vars++;
}
break;
case FUNCTION:
node_t *function = current_global;
case IDENTIFIER_DATA:
entry = lookup_local ( root->data );
if ( entry == NULL )
tlhash_lookup (
function->locals, root->data,
strlen(root->data), (void**)&entry
);
if ( entry == NULL )
tlhash_lookup (
global_names,root->data,strlen(root->data),(void**)&entry
);
if ( entry == NULL )
// Function node allways have the same structure,
// [0] are the identifier
// [1] are the variable list, within a paramerer_list
// [2] are the actual block
if (!function->children[0])
break;
// Create the function symbol
global_symbol = malloc(sizeof(symbol_t));
*global_symbol = (symbol_t){
.type = SYM_FUNCTION,
.name = current_global->children[0]->data,
.node = current_global->children[2],
.seq = no_functions++,
.nparms = 0,
.locals = malloc(sizeof(tlhash_t))
};
// Initialize the local variable table
tlhash_init(global_symbol->locals, LOCAL_BUCKET_SIZE);
// Insert the pointer to the newly created symbol
insert_symbol(global_names, global_symbol);
// If there are no parameters in function, break.
if (!current_global->children[1]->n_children)
break;
// Find all params and insert into hash table in global_symbol
symbol_t *param_sym;
node_t *param_list = current_global->children[1]->children[0];
global_symbol->nparms = param_list->n_children;
for (uint64_t param_i = 0; param_i < param_list->n_children; param_i++)
{
fprintf ( stderr, "Identifier '%s' does not exist in scope\n",
(char *)root->data
);
exit ( EXIT_FAILURE );
param_sym = malloc(sizeof(symbol_t));
*param_sym = (symbol_t){
.type = SYM_PARAMETER,
.name = param_list->children[param_i]->data,
.node = param_list->children[param_i],
.seq = param_i,
.nparms = 0,
.locals = NULL
};
insert_symbol(global_symbol->locals, param_sym);
}
root->entry = entry;
break;
case STRING_DATA:
add_string ( root );
break;
default:
for ( size_t c=0; c<root->n_children; c++ )
bind_names ( function, root->children[c] );
break;
}
}
void
destroy_symtab ( void )
{
for ( size_t i=0; i<stringc; i++ )
free ( string_list[i] );
free ( string_list );
size_t n_globals = tlhash_size ( global_names );
symbol_t *global_list[n_globals];
tlhash_values ( global_names, (void **)&global_list );
for ( size_t g=0; g<n_globals; g++ )
{
symbol_t *glob = global_list[g];
if ( glob->locals != NULL )
{
size_t n_locals = tlhash_size ( glob->locals );
symbol_t *locals[n_locals];
tlhash_values ( glob->locals, (void **)&locals );
for ( size_t l=0; l<n_locals; l++ )
free ( locals[l] );
tlhash_finalize ( glob->locals );
free ( glob->locals );
}
free ( glob );
}
tlhash_finalize ( global_names );
free ( global_names );
free ( scopes );
return no_functions + no_global_vars;
}
/**
* Inserts a symbol into a hash table, key is defined in the name field in the symbol supplied.
*
* @param hash_table pointer to the hash table the symbol is inserted into
* @param symbol pointer to the symbol to be inserted
*/
void
insert_symbol(tlhash_t *hash_table, symbol_t* symbol)
{
tlhash_insert(hash_table, symbol->name, strlen(symbol->name), symbol);
}
/**
* @brief Traverse a node root, and find all variables and strings
*
* @param function pointer to the current function
* @param root pointer to the root node
*/
static void
bind_names ( symbol_t *function, node_t *root )
{ // NULL check
if (!function)
return;
if (!root)
return;
// Can't declare variables inside switch unless
// it is in a new scope
node_t *declarations;
// We want do top to bottom traverse, so do not
// call recusivly unless we need to go deeper
switch (root->type)
{
// If new BLOCK start, push the scope and recurse from here.
case BLOCK:
push_scope();
for (uint64_t i = 0; i < root->n_children; i++)
bind_names(function, root->children[i]);
pop_scope();
break;
// If DECLARATION_LIST, find all the identifiers
// and insert local into scope and function
case DECLARATION_LIST:
if (!root->children[0])
break;
declarations = root->children[0];
for (uint64_t i = 0; i < declarations->n_children; i++)
// Insert each of the local variables in the declaration
insert_local_var(function, declarations->children[i]);
break;
// If IDENTIFIER_DATA, look up the identifier in all the scopes.
// If not found (NULL), crash the compiler with a somewhat helpful message.
case IDENTIFIER_DATA:
if (!root->data)
break;
if (!(root->entry = lookup_var(function, root->data)))
{
ERRPRT("Could not find %s in scope!\n", (char*)root->data)
exit(EXIT_FAILURE);
}
break;
// If STRING_DATA, collect the string and point the
// data in the corresponding node to the array index
case STRING_DATA:
collect_string(root);
break;
// If none of the above, go deeper if possible.
default:
for (uint64_t i = 0; i < root->n_children; i++)
bind_names(function, root->children[i]);
break;
}
}
/**
* Pushes a new hash table to the scope stack.
*
* Increases the size of the stack if too small.
*
*/
static void
push_scope(void)
{
// Allocate memory for the hash table and initialize
scopes[cur_scope_depth] = malloc(sizeof(tlhash_t));
tlhash_init(scopes[cur_scope_depth++], LOCAL_BUCKET_SIZE);
// Grow the amount of scopes if not enough
if (cur_scope_depth >= no_scopes)
{
no_scopes *= 2;
tlhash_t **new_scopes = realloc(scopes, no_scopes * sizeof(tlhash_t));
if (!new_scopes)
{
ERRPRT("Could not realloc scopes!\n");
exit(EXIT_FAILURE);
}
scopes = new_scopes;
}
}
/**
* Pops the dynamicy allocated hash table for the current scope depth
*
*/
static void
pop_scope(void)
{
tlhash_finalize(scopes[--cur_scope_depth]);
free(scopes[cur_scope_depth]);
}
/**
* Allocates and inserts a local symbol into
* the scope stack and into the function
*
* @param function pointer to the current function
* @param root pointer to the root node for the symbol
*/
static void
insert_local_var(symbol_t *function, node_t *root)
{ // Null ptr check
if (!root->data)
return;
// Get the sequence num, is the size
size_t sequence = tlhash_size(function->locals);
symbol_t *variable = malloc(sizeof(symbol_t));
*variable = (symbol_t){
.type = SYM_LOCAL_VAR,
.name = root->data,
.node = root,
.seq = sequence, //! Use sequence as name in var list of function, strictly growing
.nparms = 0,
.locals = NULL
};
insert_local_to_scope(variable);
insert_local_to_func(function, variable);
}
/**
* Inserts a symbol to the top most scope in stack
*
* @param local pointer to the local to be inserted
*/
static void
insert_local_to_scope(symbol_t *local)
{
insert_symbol(scopes[cur_scope_depth - 1], local);
}
/**
* Insert local symbol to the functions table of local variables
* uses the seq num as key as this is strictly growing
*
* @param function pointer to the function to insert the symbol
* @param local pointer to the symbol to be inserted in the table
*/
void
insert_local_to_func(symbol_t *function, symbol_t *local)
{
tlhash_insert(
function->locals, //! Insert local to the function var table
&local->seq, //! The key is a number, unique, strictly growing
sizeof(local->seq), //! Size of key
local //! The local symbol
);
}
/**
* Collects strings to the string array and
* points the data in the associated node
* to the array position
*
* @param root pointer to the root node of the string
*/
static void
collect_string(node_t *root)
{ // Null ptr check
if (!root->data)
return;
// Get the string and allocate room for array index of string
string_list[stringc] = root->data;
root->data = malloc(sizeof(size_t));
// Set the data ptr
*((size_t*)root->data) = stringc++;
// Grow string array if nessecary
if (stringc >= n_string_list)
{
n_string_list *= 2;
char **new_string_list = realloc(string_list, n_string_list * sizeof(char*));
if (!new_string_list)
{
ERRPRT("Could not realloc string list!\n");
exit(EXIT_FAILURE);
}
string_list = new_string_list;
}
}
/**
* Looks up a variable identifier in all the scopes.
* Start with the scopes, then the parameters and
* the the globals
*
* @param function pointer to the function
* @param var identifier to the variable
* @return Returns the pointer to the "closest" matched identifier. NULL if not found.
*/
static symbol_t*
lookup_var(symbol_t *function, char* var)
{
// Symbol to store the stymbol to be found
symbol_t* symbol = NULL;
// Result stores the result of the hash lookups
int result;
// Try the local scopes first
for (int64_t d = cur_scope_depth - 1; d >= 0; d--)
{
result = tlhash_lookup(scopes[d], var, strlen(var), (void**)&symbol);
if (result == TLHASH_SUCCESS)
return symbol;
}
// Then move to parameters
result = tlhash_lookup(function->locals, var, strlen(var), (void**)&symbol);
if (result == TLHASH_SUCCESS)
return symbol;
// Last try global parameters
result = tlhash_lookup(global_names, var, strlen(var), (void**)&symbol);
if (result == TLHASH_SUCCESS)
return symbol;
// If nothing is found, return NULL
return NULL;
}

View File

@ -1,176 +1,303 @@
%{
#include <vslc.h>
#define N0C(n,t,d) do { \
node_init ( n = malloc(sizeof(node_t)), t, d, 0 ); \
} while ( false )
#define N1C(n,t,d,a) do { \
node_init ( n = malloc(sizeof(node_t)), t, d, 1, a ); \
} while ( false )
#define N2C(n,t,d,a,b) do { \
node_init ( n = malloc(sizeof(node_t)), t, d, 2, a, b ); \
} while ( false )
#define N3C(n,t,d,a,b,c) do { \
node_init ( n = malloc(sizeof(node_t)), t, d, 3, a, b, c ); \
} while ( false )
#define NODE(type, data, n_children, children...) node_init(malloc(sizeof(node_t)), type, data, n_children, ##children)
%}
%left '|'
%left '^'
%left '&'
%define api.value.type {node_t}
%token FUNC PRINT RETURN CONTINUE IF THEN ELSE WHILE DO OPENBLOCK CLOSEBLOCK
%token VAR NUMBER IDENTIFIER STRING
%left '|' '&' '^'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%right '~'
%expect 1
//%expect 1
%token FUNC PRINT RETURN CONTINUE IF THEN ELSE WHILE DO OPENBLOCK CLOSEBLOCK
%token VAR NUMBER IDENTIFIER STRING
%nonassoc IF THEN
%nonassoc ELSE
/* Tried fixing vscode complaining about the type for the non-terminals, didn't work
%union {
node_t* node;
}
%type <node> global_list global
%type <node> statement_list print_list expression_list variable_list argument_list parameter_list declaration_list
%type <node> function statement block
%type <node> assignment_statement return_statement print_statement null_statement if_statement while_statement
%type <node> relation expression declaration print_item identifier number string
*/
%%
program :
global_list { N1C ( root, PROGRAM, NULL, $1 ); }
;
global_list :
global { N1C ( $$, GLOBAL_LIST, NULL, $1 ); }
| global_list global { N2C ( $$, GLOBAL_LIST, NULL, $1, $2 ); }
;
program:
global_list {
root = NODE(PROGRAM, NULL, 1, $1);
}
;
global_list:
global {
$$ = NODE(GLOBAL_LIST, NULL, 1, $1);
}
| global_list global {
$$ = NODE(GLOBAL_LIST, NULL, 2, $1, $2);
}
;
global:
function { N1C ( $$, GLOBAL, NULL, $1 ); }
| declaration { N1C ( $$, GLOBAL, NULL, $1 ); }
;
statement_list :
statement { N1C ( $$, STATEMENT_LIST, NULL, $1 ); }
| statement_list statement { N2C ( $$, STATEMENT_LIST, NULL, $1, $2 ); }
;
print_list :
print_item { N1C ( $$, PRINT_LIST, NULL, $1 ); }
| print_list ',' print_item { N2C ( $$, PRINT_LIST, NULL, $1, $3 ); }
;
expression_list :
expression { N1C ( $$, EXPRESSION_LIST, NULL, $1 ); }
| expression_list ',' expression { N2C($$, EXPRESSION_LIST, NULL, $1, $3); }
;
variable_list :
identifier { N1C ( $$, VARIABLE_LIST, NULL, $1 ); }
| variable_list ',' identifier { N2C ( $$, VARIABLE_LIST, NULL, $1, $3 ); }
;
argument_list :
expression_list { N1C ( $$, ARGUMENT_LIST, NULL, $1 ); }
| /* epsilon */ { $$ = NULL; }
;
parameter_list :
variable_list { N1C ( $$, PARAMETER_LIST, NULL, $1 ); }
| /* epsilon */ { $$ = NULL; }
;
declaration_list :
declaration { N1C ( $$, DECLARATION_LIST, NULL, $1 ); }
| declaration_list declaration { N2C ($$, DECLARATION_LIST, NULL, $1, $2); }
;
function :
FUNC identifier '(' parameter_list ')' statement
{ N3C ( $$, FUNCTION, NULL, $2, $4, $6 ); }
;
statement :
assignment_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| return_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| print_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| if_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| while_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| null_statement { N1C ( $$, STATEMENT, NULL, $1 ); }
| block { N1C ( $$, STATEMENT, NULL, $1 ); }
;
block :
OPENBLOCK declaration_list statement_list CLOSEBLOCK
{ N2C ($$, BLOCK, NULL, $2, $3); }
| OPENBLOCK statement_list CLOSEBLOCK { N1C ($$, BLOCK, NULL, $2 ); }
;
assignment_statement :
identifier ':' '=' expression
{ N2C ( $$, ASSIGNMENT_STATEMENT, NULL, $1, $4 ); }
| identifier '+' '=' expression
{ N2C ( $$, ADD_STATEMENT, NULL, $1, $4 ); }
| identifier '-' '=' expression
{ N2C ( $$, SUBTRACT_STATEMENT, NULL, $1, $4 ); }
| identifier '*' '=' expression
{ N2C ( $$, MULTIPLY_STATEMENT, NULL, $1, $4 ); }
| identifier '/' '=' expression
{ N2C ( $$, DIVIDE_STATEMENT, NULL, $1, $4 ); }
;
return_statement :
RETURN expression
{ N1C ( $$, RETURN_STATEMENT, NULL, $2 ); }
;
print_statement :
PRINT print_list
{ N1C ( $$, PRINT_STATEMENT, NULL, $2 ); }
;
null_statement :
CONTINUE
{ N0C ( $$, NULL_STATEMENT, NULL ); }
;
if_statement :
IF relation THEN statement
{ N2C ( $$, IF_STATEMENT, NULL, $2, $4 ); }
| IF relation THEN statement ELSE statement
{ N3C ( $$, IF_STATEMENT, NULL, $2, $4, $6 ); }
;
while_statement :
WHILE relation DO statement
{ N2C ( $$, WHILE_STATEMENT, NULL, $2, $4 ); }
;
function {
$$ = NODE(GLOBAL, NULL, 1, $1);
}
| declaration {
$$ = NODE(GLOBAL, NULL, 1, $1);
}
;
statement_list:
statement {
$$ = NODE(STATEMENT_LIST, NULL, 1, $1);
}
| statement_list statement {
$$ = NODE(STATEMENT_LIST, NULL, 2, $1, $2);
}
;
print_list:
print_item {
$$ = NODE(PRINT_LIST, NULL, 1, $1);
}
| print_list ',' print_item {
$$ = NODE(PRINT_LIST, NULL, 2, $1, $3);
}
;
expression_list:
expression {
$$ = NODE(EXPRESSION_LIST, NULL, 1, $1);
}
| expression_list ',' expression {
$$ = NODE(EXPRESSION_LIST, NULL, 2, $1, $3);
}
;
variable_list:
identifier {
$$ = NODE(VARIABLE_LIST, NULL, 1, $1);
}
| variable_list ',' identifier {
$$ = NODE(VARIABLE_LIST, NULL, 2, $1, $3);
}
;
argument_list:
expression_list {
$$ = NODE(ARGUMENT_LIST, NULL, 1, $1);
}
| /* epsilon */ {
$$ = NODE(ARGUMENT_LIST, NULL, 0);
}
;
parameter_list:
variable_list {
$$ = NODE(PARAMETER_LIST, NULL, 1, $1);
}
| /* epsilon */ {
$$ = NODE(PARAMETER_LIST, NULL, 0);
}
;
declaration_list:
declaration {
$$ = NODE(DECLARATION_LIST, NULL, 1, $1);
}
| declaration_list declaration {
$$ = NODE(DECLARATION_LIST, NULL, 2, $1, $2);
}
;
function:
FUNC identifier '(' parameter_list ')' statement {
$$ = NODE(FUNCTION, NULL, 3, $2, $4, $6);
}
;
statement:
assignment_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| return_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| print_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| if_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| while_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| null_statement {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
| block {
$$ = NODE(STATEMENT, NULL, 1, $1);
}
;
block:
OPENBLOCK declaration_list statement_list CLOSEBLOCK {
$$ = NODE(BLOCK, NULL, 2, $2, $3);
}
| OPENBLOCK statement_list CLOSEBLOCK {
$$ = NODE(BLOCK, NULL, 1, $2);
}
;
assignment_statement:
identifier ':' '=' expression {
$$ = NODE(ASSIGNMENT_STATEMENT, NULL, 2, $1, $4);
}
| identifier '+' '=' expression {
$$ = NODE(ADD_STATEMENT, NULL, 2, $1, $4);
}
| identifier '-' '=' expression {
$$ = NODE(SUBTRACT_STATEMENT, NULL, 2, $1, $4);
}
| identifier '*' '=' expression {
$$ = NODE(MULTIPLY_STATEMENT, NULL, 2, $1, $4);
}
| identifier '/' '=' expression {
$$ = NODE(DIVIDE_STATEMENT, NULL, 2, $1, $4);
}
;
return_statement:
RETURN expression {
$$ = NODE(RETURN_STATEMENT, NULL, 1, $2);
}
;
print_statement:
PRINT print_list {
$$ = NODE(PRINT_STATEMENT, NULL, 1, $2);
}
;
null_statement:
CONTINUE {
$$ = NODE(NULL_STATEMENT, NULL, 0);
}
;
if_statement:
IF relation THEN statement {
$$ = NODE(IF_STATEMENT, NULL, 2, $2, $4);
}
| IF relation THEN statement ELSE statement {
$$ = NODE(IF_STATEMENT, NULL, 3, $2, $4, $6);
}
;
while_statement:
WHILE relation DO statement {
$$ = NODE(WHILE_STATEMENT, NULL, 2, $2, $4);
}
;
relation:
expression '=' expression
{ N2C ( $$, RELATION, strdup("="), $1, $3 ); }
| expression '<' expression
{ N2C ( $$, RELATION, strdup("<"), $1, $3 ); }
| expression '>' expression
{ N2C ( $$, RELATION, strdup(">"), $1, $3 ); }
;
expression :
expression '|' expression
{ N2C ( $$, EXPRESSION, strdup("|"), $1, $3 ); }
| expression '^' expression
{ N2C ( $$, EXPRESSION, strdup("^"), $1, $3 ); }
| expression '&' expression
{ N2C ( $$, EXPRESSION, strdup("&"), $1, $3 ); }
| expression '+' expression
{ N2C ( $$, EXPRESSION, strdup("+"), $1, $3 ); }
| expression '-' expression
{ N2C ( $$, EXPRESSION, strdup("-"), $1, $3 ); }
| expression '*' expression
{ N2C ( $$, EXPRESSION, strdup("*"), $1, $3 ); }
| expression '/' expression
{ N2C ( $$, EXPRESSION, strdup("/"), $1, $3 ); }
| '-' expression %prec UMINUS
{ N1C ( $$, EXPRESSION, strdup("-"), $2 ); }
| '~' expression %prec UMINUS
{ N1C ( $$, EXPRESSION, strdup("~"), $2 ); }
| '(' expression ')' { $$ = $2; }
| number { N1C ( $$, EXPRESSION, NULL, $1 ); }
| identifier
{ N1C ( $$, EXPRESSION, NULL, $1 ); }
| identifier '(' argument_list ')'
{ N2C ( $$, EXPRESSION, NULL, $1, $3 ); }
;
declaration :
VAR variable_list { N1C ( $$, DECLARATION, NULL, $2 ); }
;
print_item :
expression
{ N1C ( $$, PRINT_ITEM, NULL, $1 ); }
| string
{ N1C ( $$, PRINT_ITEM, NULL, $1 ); }
;
identifier: IDENTIFIER { N0C($$, IDENTIFIER_DATA, strdup(yytext) ); }
number: NUMBER
{
int64_t *value = malloc ( sizeof(int64_t) );
*value = strtol ( yytext, NULL, 10 );
N0C($$, NUMBER_DATA, value );
}
string: STRING { N0C($$, STRING_DATA, strdup(yytext) ); }
expression '=' expression {
$$ = NODE(RELATION, strdup("="), 2, $1, $3);
}
| expression '<' expression {
$$ = NODE(RELATION, strdup("<"), 2, $1, $3);
}
| expression '>' expression {
$$ = NODE(RELATION, strdup(">"), 2, $1, $3);
}
;
expression:
expression '|' expression {
$$ = NODE(EXPRESSION, strdup("|"), 2, $1, $3);
}
| expression '^' expression {
$$ = NODE(EXPRESSION, strdup("^"), 2, $1, $3);
}
| expression '&' expression {
$$ = NODE(EXPRESSION, strdup("&"), 2, $1, $3);
}
| expression '+' expression {
$$ = NODE(EXPRESSION, strdup("+"), 2, $1, $3);
}
| expression '-' expression {
$$ = NODE(EXPRESSION, strdup("-"), 2, $1, $3);
}
| expression '*' expression {
$$ = NODE(EXPRESSION, strdup("*"), 2, $1, $3);
}
| expression '/' expression {
$$ = NODE(EXPRESSION, strdup("/"), 2, $1, $3);
}
| '-' expression %prec UMINUS {
$$ = NODE(EXPRESSION, strdup("-"), 1, $2);
}
| '~' expression {
$$ = NODE(EXPRESSION, strdup("~"), 1, $2);
}
| '(' expression ')' {
$$ = NODE(EXPRESSION, /*NULL*/ strdup("group"), 1, $2);
}
| number {
$$ = NODE(EXPRESSION, /*NULL*/ strdup("number"), 1, $1);
}
| identifier {
$$ = NODE(EXPRESSION, /*NULL*/ strdup("identifier"), 1, $1);
}
| identifier '(' argument_list ')' {
$$ = NODE(EXPRESSION, /*NULL*/ strdup("function_call"), 2, $1, $3);
}
;
declaration:
VAR variable_list {
$$ = NODE(DECLARATION, NULL, 1, $2);
}
;
print_item:
expression {
$$ = NODE(PRINT_ITEM, NULL, 1, $1);
}
| string {
$$ = NODE(PRINT_ITEM, NULL, 1, $1);
}
;
identifier:
IDENTIFIER {
$$ = NODE(IDENTIFIER_DATA, strdup(yytext), 0); // Zero children
}
;
number:
NUMBER {
uint64_t* p_number = malloc(sizeof(uint64_t));
*p_number = strtol(yytext, NULL, 10);
$$ = NODE(NUMBER_DATA, p_number, 0); // Zero children
}
;
string:
STRING {
$$ = NODE(STRING_DATA, strdup(yytext), 0); // Zero children
}
;
%%
int

View File

@ -12,6 +12,10 @@ tree_print(node_t* root, stem head);
static void destroy_subtree ( node_t *discard );
static void prune_children(node_t **simplified, node_t *root);
static void resolve_constant_expressions(node_t **simplified, node_t *root);
static void flatten(node_t **simplified, node_t *root);
/* External interface */
void
@ -40,7 +44,8 @@ print_syntax_tree ( void )
}
void
// Changed so it returns the pointer to the new node, can be used as before, but makes the parser file cleaner
node_t*
node_init (node_t *nd, node_index_t type, void *data, uint64_t n_children, ...)
{
va_list child_list;
@ -55,6 +60,8 @@ node_init (node_t *nd, node_index_t type, void *data, uint64_t n_children, ...)
for ( uint64_t i=0; i<n_children; i++ )
nd->children[i] = va_arg ( child_list, node_t * );
va_end ( child_list );
return nd;
}
@ -82,9 +89,10 @@ tree_print(node_t* root, stem head)
return;
}
printf("─%s", node_string[root->type]);
if ( root->type == IDENTIFIER_DATA ||
root->type == STRING_DATA ||
root->type == EXPRESSION )
if ( root->type == IDENTIFIER_DATA ||
root->type == STRING_DATA ||
root->type == EXPRESSION ||
root->type == RELATION)
printf("(%s)", (char *) root->data);
else if (root->type == NUMBER_DATA)
printf("(%ld)", *((int64_t *)root->data));
@ -152,82 +160,237 @@ destroy_subtree ( node_t *discard )
static void
simplify_tree ( node_t **simplified, node_t *root )
flatten(node_t **simplified, node_t *root)
{
if ( root == NULL )
/* This will flatten left-expanded lists */
if (!root)
return;
/* Simplify subtrees before examining this node */
for ( uint64_t i=0; i<root->n_children; i++ )
simplify_tree ( &root->children[i], root->children[i] );
/* Do this recursivly */
for (int i = 0; i < root->n_children; i++)
flatten(&root->children[i], root->children[i]);
node_t *discard, *result = root;
switch ( root->type )
node_t **new_children, *result = root;
switch (root->type)
{
/* Structures of purely syntactic function */
case PARAMETER_LIST: case ARGUMENT_LIST:
case STATEMENT: case PRINT_ITEM: case GLOBAL:
result = root->children[0];
node_finalize ( root );
case GLOBAL_LIST:
case STATEMENT_LIST:
case PRINT_LIST:
case EXPRESSION_LIST:
case VARIABLE_LIST:
case DECLARATION_LIST:
// Check if node have more than two children
if (root->n_children < 2)
break;
case PRINT_STATEMENT:
result = root->children[0];
result = root->children[0];
result->n_children++;
// Realloc the array of children to the new size
if (!(new_children = realloc(result->children, result->n_children * sizeof(node_t*))))
break;
// if successs, insert the new array
result->children = new_children;
// Insert child at the end
result->children[result->n_children - 1] = root->children[1];
node_finalize(root);
break;
}
*simplified = result;
}
static void
prune_children(node_t **simplified, node_t *root)
{
if (!root)
return;
/* Do this recursivly */
for (int i = 0; i < root->n_children; i++)
prune_children(&root->children[i], root->children[i]);
node_t *result = root;
switch (root->type)
{
case PROGRAM:
case GLOBAL:
//case ARGUMENT_LIST: // For this to work, need to change order of operations
//case PARAMETER_LIST: // For this to work, need to change order of operations
//case VARIABLE_LIST:
//case EXPRESSION_LIST:
case DECLARATION:
case STATEMENT:
case PRINT_ITEM:
case PRINT_STATEMENT:
result = root->children[0];
// The print_statement only contains a print_list, still need a print_statement.
if (root->type == PRINT_STATEMENT)
result->type = PRINT_STATEMENT;
node_finalize(root);
break;
}
*simplified = result;
}
static void
resolve_constant_expressions(node_t **simplified, node_t *root)
{
if (!root)
return;
/* Do this recursivly */
for (int i = 0; i < root->n_children; i++)
resolve_constant_expressions(&root->children[i], root->children[i]);
if (root->type != EXPRESSION)
return;
node_t *result = root;
switch (root->n_children)
{
case 1:
result = root->children[0];
if (root->data &&
result->type == NUMBER_DATA &&
result->data)
{
switch (*((char*)root->data))
{
case '-':
*((int64_t*)result->data) *= -1;
break;
case '~':
*((int64_t*)result->data) = ~*((int64_t*)result->data);
break;
}
}
node_finalize(root);
break;
case 2:
// Both children needs to be numbers to resolve constants
if (root->children[0]->type == NUMBER_DATA &&
root->children[1]->type == NUMBER_DATA)
{
// Check if children does not contain null pointers
if (!root->children[0]->data)
break;
if (!root->children[1]->data)
break;
// Check if data field is not null pointer
if (!root->data)
break;
result = root->children[0];
int64_t
*lhs = result->data,
*rhs = root->children[1]->data;
switch (*(char*)root->data)
{
/* Assignments */
case '|': *lhs |= *rhs; break;
case '^': *lhs ^= *rhs; break;
case '&': *lhs &= *rhs; break;
case '+': *lhs += *rhs; break;
case '-': *lhs -= *rhs; break;
case '*': *lhs *= *rhs; break;
case '/': *lhs /= *rhs; break;
}
node_finalize(root->children[1]);
node_finalize(root);
break;
/* Flatten lists:
* Take left child, append right child, substitute left for root.
*/
case STATEMENT_LIST: case DECLARATION_LIST: case GLOBAL_LIST:
case PRINT_LIST: case EXPRESSION_LIST: case VARIABLE_LIST:
if ( root->n_children >= 2 )
{
result = root->children[0];
result->n_children += 1;
result->children = realloc (
result->children, result->n_children * sizeof(node_t *)
);
result->children[result->n_children-1] = root->children[1];
node_finalize ( root );
}
break;
case EXPRESSION:
switch ( root->n_children )
{
case 1:
if ( root->children[0]->type == NUMBER_DATA )
{
result = root->children[0];
if ( root->data != NULL )
*((int64_t *)result->data) *= -1;
node_finalize (root);
}
else if ( root->data == NULL )
{
result = root->children[0];
node_finalize (root);
}
break;
case 2:
if ( root->children[0]->type == NUMBER_DATA &&
root->children[1]->type == NUMBER_DATA
) {
result = root->children[0];
int64_t
*x = result->data,
*y = root->children[1]->data;
switch ( *((char *)root->data) )
{
case '+': *x += *y; break;
case '-': *x -= *y; break;
case '*': *x *= *y; break;
case '/': *x /= *y; break;
}
node_finalize ( root->children[1] );
node_finalize ( root );
}
break;
}
}
break;
}
*simplified = result;
}
static void
resolve_constant_relations( node_t** simplified, node_t* root)
{
if (!root)
return;
/* Do this recursivly */
for (int i = 0; i < root->n_children; i++)
resolve_constant_relations(&root->children[i], root->children[i]);
if (root->type != RELATION)//|| root->type != RELATION)
return;
node_t *result = root;
if (root->n_children != 2)
return;
// Both children must be constant numbers
if (root->children[0]->type != NUMBER_DATA ||
root->children[1]->type != NUMBER_DATA)
return;
// Check if children does not contain null pointers
if (!root->children[0]->data)
return;
if (!root->children[1]->data)
return;
// Check if data field is not null pointer
if (!root->data)
return;
result = root->children[0];
int64_t
*lhs = result->data,
*rhs = root->children[1]->data;
switch (*(char*)root->data)
{
/* Relations */
case '=': *lhs = (*lhs == *rhs); break;
case '<': *lhs = (*lhs < *rhs); break;
case '>': *lhs = (*lhs > *rhs); break;
}
node_finalize(root->children[1]);
node_finalize(root);
*simplified = result;
}
static void
simplify_tree ( node_t **simplified, node_t *root )
{
if (!root)
return;
/*
Each of the functions do their operations recursivly.
This opens up for a lot more flexibility, like removing
variable list after it is flatten
*/
flatten(&root, root);
prune_children(&root, root);
resolve_constant_expressions(&root, root);
// The following is experimental, will resolve the constant relations
resolve_constant_relations(&root, root);
*simplified = root;
}

Binary file not shown.

View File

@ -6,20 +6,20 @@
/* Global state */
node_t *root; // Syntax tree
tlhash_t *global_names; // Symbol table
char **string_list; // List of strings in the source
size_t n_string_list = 8; // Initial string list capacity (grow on demand)
size_t stringc = 0; // Initial string count
node_t *root; // Syntax tree
tlhash_t *global_names; // Symbol table
char **string_list; // List of strings in the source
size_t n_string_list = 8; // Initial string list capacity (grow on demand)
size_t stringc = 0; // Initial string count
/* Command line option parsing for the main function */
static void options ( int argc, char **argv );
bool
print_full_tree = false,
print_simplified_tree = false,
print_full_tree = false,
print_simplified_tree = false,
print_symbol_table_contents = false,
print_generated_program = true,
new_print_style = true;
print_generated_program = true,
new_print_style = true;
/* Entry point */
@ -28,7 +28,8 @@ main ( int argc, char **argv )
{
options ( argc, argv );
yyparse(); // Generated from grammar/bison, constructs syntax tree
yyparse(); // Generated from grammar/bison, constructs syntax tree
yylex_destroy(); // Free heap used by flex
if ( print_full_tree )
print_syntax_tree ();
@ -36,7 +37,7 @@ main ( int argc, char **argv )
if ( print_simplified_tree )
print_syntax_tree ();
create_symbol_table (); // In ir.c
create_symbol_table (); // In ir.c
if ( print_symbol_table_contents )
print_symbol_table();