diff --git a/.gitignore b/.gitignore index 685e9c6..5d4f84a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ y.tab.h parser.c scanner.c -vslc \ No newline at end of file +vslc +vgcore* \ No newline at end of file diff --git a/src/ir.c b/src/ir.c index 4f457e6..39fffa9 100644 --- a/src/ir.c +++ b/src/ir.c @@ -6,6 +6,11 @@ extern tlhash_t *global_names; extern char **string_list; extern size_t n_string_list,stringc; +struct scope_stack { + struct scope_stack *next; + tlhash_t *scope; +}; + /* External interface */ void create_symbol_table(void); @@ -22,7 +27,10 @@ void create_symbol_table ( void ) { global_names = malloc(sizeof(tlhash_t)); - tlhash_init(global_names, 20); + if (tlhash_init(global_names, 1) != TLHASH_SUCCESS) { + printf("Couldn't initialize symbol table\n"); + exit(1); + } find_globals(); size_t n_globals = tlhash_size ( global_names ); symbol_t *global_list[n_globals]; @@ -142,12 +150,31 @@ destroy_symbol_table ( void ) destroy_symtab(); } -// Helper function +// Insert a global symbol into the symbol table +// These symbols are unique, so we use their name as the key void tlhash_insert_symbol(tlhash_t *tab, symbol_t *symbol) { tlhash_insert(tab, symbol->name, strlen(symbol->name), symbol); } +// Insert a local symbol into the symbol table +// These symbols can have the same names as other symbols, +// so we generate an unique name for each +void tlhash_insert_local_symbol(tlhash_t *tab, symbol_t *symbol, char *function_name, int seq) +{ + size_t size = strlen(function_name) + strlen(symbol->name) + 6; + char *key = calloc(size, sizeof(char*)); + int result = snprintf(key, size, "%s:%s:%d", function_name, symbol->name, seq); + + if (result < 0 || result >= size) { + printf("Couldn't create key for local symbol %s\n", symbol->name); + exit(1); + } + + tlhash_insert(tab, key, size, symbol); + free(key); +} + void find_globals ( void ) { @@ -156,52 +183,270 @@ find_globals ( void ) for (int i = 0; i < global_list->n_children; i++) { node_t *child = global_list->children[i]; - symbol_t *symbol = malloc(sizeof(symbol_t)); - symbol->node = child; if (child->type == DECLARATION) { - symbol->name = child->children[0]->children[0]->data; - symbol->type = SYM_GLOBAL_VAR; + for (int j = 0; j < child->children[0]->n_children; j++) { + symbol_t *symbol = malloc(sizeof(symbol_t)); + symbol->locals = NULL; + symbol->node = child->children[0]->children[j]; + symbol->name = symbol->node->data; + symbol->type = SYM_GLOBAL_VAR; + tlhash_insert_symbol(global_names, symbol); + } } else if (child->type == FUNCTION) { + symbol_t *symbol = malloc(sizeof(symbol_t)); + symbol->node = child; symbol->name = child->children[0]->data; symbol->type = SYM_FUNCTION; - symbol->nparms = child->children[1]->n_children; - symbol->seq = function_count; - function_count++; - + symbol->seq = function_count++; symbol->locals = malloc(sizeof(tlhash_t)); - tlhash_init(symbol->locals, symbol->nparms/2); - - for (int j = 0; j < symbol->nparms; j++) { - node_t *parameter = child->children[1]->children[j]; - symbol_t *parameter_symbol = malloc(sizeof(symbol_t)); - parameter_symbol->name = parameter->data; - parameter_symbol->type = SYM_PARAMETER; - parameter_symbol->node = parameter; - parameter_symbol->seq = j; + if (tlhash_init(symbol->locals, 1) != TLHASH_SUCCESS) { + printf("Couldn't initialize local symbol table for function %s\n", symbol->name); + exit(1); + } - tlhash_insert_symbol(symbol->locals, parameter_symbol); + if (child->children[1] != NULL) { + symbol->nparms = child->children[1]->n_children; + } else { + symbol->nparms = 0; } + + tlhash_insert_symbol(global_names, symbol); } else { - // The node should be a variable or a function. - // Something is wrong + puts("The node should be a variable or a function. Something is wrong\n"); + exit(1); + } + } +} + +void +bind_variable(symbol_t *function, node_t *identifier, struct scope_stack *stack) +{ + char *name = identifier->data; + symbol_t *symbol; + + while (stack != NULL) { + int result = tlhash_lookup(stack->scope, name, strlen(name), &symbol); + + if (result == TLHASH_SUCCESS) { + identifier->entry = symbol; + return; + } else if (result == TLHASH_ENOMEM) { + puts("Error in bind_variable: No memory available"); exit(1); } - tlhash_insert_symbol(global_names, symbol); + stack = stack->next; } + + // The variable is not declared + printf("Error in function %s: Variable %s is not defined\n", function->name, identifier->data); + exit(1); } void -bind_names ( symbol_t *function, node_t *root ) +bind_string(node_t *node) +{ + if (node->type != STRING_DATA) { + printf("Error in bind_string: Illegal node type %d\n", node->type); + exit(1); + } + + if (string_list == NULL) { + string_list = calloc(stringc + 1, sizeof(char*)); + } else { + string_list = realloc(string_list, (stringc + 1)*sizeof(char*)); + } + if (string_list == NULL) { + printf("Error in bind_string: Couldn't realloc string array with size %d\n", stringc+1); + } + string_list[stringc] = node->data; + size_t *count = malloc(sizeof(size_t)); + *count = stringc; + node->data = (void*) count; + stringc++; +} + +void +bind_expression(symbol_t *function, node_t *expression, struct scope_stack *stack) +{ + if (expression->type == IDENTIFIER_DATA) { + bind_variable(function, expression, stack); + } else if (expression->type == STRING_DATA) { + bind_string(expression); + } else if (expression->type == NUMBER_DATA) { + // Ignore + } else if (expression->type == EXPRESSION || expression->type == RELATION) { + if (expression->data == NULL) { + bind_variable(function, expression->children[0], stack); + + for (int i = 0; i < expression->children[1]->n_children; i++) { + node_t *child = expression->children[1]->children[i]; + + if (child->type == IDENTIFIER_DATA) { + bind_variable(function, child, stack); + } else if (child->type == EXPRESSION) { + bind_expression(function, child, stack); + } + } + } else { + for (int i = 0; i < expression->n_children; i++) { + bind_expression(function, expression->children[i], stack); + } + } + } else { + printf("Error in bind_expression: Illegal node type %d\n", expression->type); + exit(1); + } +} + +void +bind_statement(symbol_t *function, node_t *node, struct scope_stack *stack) +{ + if (node->type == ASSIGNMENT_STATEMENT) { + bind_variable(function, node->children[0], stack); + bind_expression(function, node->children[1], stack); + } else if (node->type == PRINT_STATEMENT) { + for (int i = 0; i < node->n_children; i++) { + bind_expression(function, node->children[i], stack); + } + } else if (node->type == IF_STATEMENT || node->type == WHILE_STATEMENT) { + bind_expression(function, node->children[0], stack); + + // The if statement can have two sub-statements, the if and the else. + // The while statement only has one, but the logic is the same + for (int i = 1; i < node->n_children; i++) { + if (node->children[i]->type == BLOCK) { + bind_scope(function, node->children[i], stack); + } else { + bind_statement(function, node->children[i], stack); + } + } + } else if (node->type == RETURN_STATEMENT) { + bind_expression(function, node->children[0], stack); + } +} + +void +bind_scope ( symbol_t *function, node_t *root, struct scope_stack *parent_stack ) { // Go through variable list, add to local scope - // Go through rest of function - // Replace variable references with pointers to the symbol table - // put strings in the symbol table, and increment root->data = stringc + node_t *statement_list; + int seq = tlhash_size(function->locals); + + struct scope_stack *stack = malloc(sizeof(struct scope_stack)); + stack->next = parent_stack; + stack->scope = malloc(sizeof(tlhash_t)); + if (tlhash_init(stack->scope, 1) != TLHASH_SUCCESS) { + printf("Couldn't initialize stack\n"); + exit(1); + } + + if (root->n_children == 2) { + node_t *variable_list = root->children[0]; + statement_list = root->children[1]; + + for (int i = 0; i < variable_list->n_children; i++) { + for (int j = 0; j < variable_list->children[i]->children[0]->n_children; j++) { + node_t *variable = variable_list->children[i]->children[0]->children[j]; + symbol_t *variable_symbol = malloc(sizeof(symbol_t)); + variable_symbol->name = variable->data; + variable_symbol->type = SYM_LOCAL_VAR; + variable_symbol->node = variable; + variable_symbol->seq = seq++; + + // Insert into the global table with an unique key + tlhash_insert_local_symbol(function->locals, variable_symbol, function->name, seq); + // Insert into the global table with the name as the key + tlhash_insert_symbol(stack->scope, variable_symbol); + } + } + } else { + statement_list = root->children[0]; + } + + for (int i = 0; i < statement_list->n_children; i++) { + bind_statement(function, statement_list->children[i], stack); + } + + tlhash_finalize(stack->scope); + free(stack->scope); + free(stack); +} + +void +bind_names ( symbol_t *function, node_t *root ) +{ + struct scope_stack *global_stack = malloc(sizeof(struct scope_stack)); + struct scope_stack *stack = malloc(sizeof(struct scope_stack)); + + global_stack->next = NULL; + global_stack->scope = global_names; + stack->next = global_stack; + stack->scope = malloc(sizeof(tlhash_t)); + if (tlhash_init(stack->scope, 1) != TLHASH_SUCCESS) { + printf("Couldn't initialize stack\n"); + exit(1); + } + + // Insert the function parameters into the symbol table + for (int i = 0; i < function->nparms; i++) { + node_t *parameter = root->children[1]->children[i]; + symbol_t *parameter_symbol = malloc(sizeof(symbol_t)); + parameter_symbol->name = parameter->data; + parameter_symbol->type = SYM_PARAMETER; + parameter_symbol->node = parameter; + parameter_symbol->seq = i; + + // Insert into the global table with an unique key + tlhash_insert_local_symbol(function->locals, parameter_symbol, function->name, i); + // Insert into the global table with the name as the key + tlhash_insert_symbol(stack->scope, parameter_symbol); + } + + bind_scope(function, root->children[2], stack); + + tlhash_finalize(stack->scope); + free(stack->scope); + free(stack); + free(global_stack); } void destroy_symtab ( void ) { + for (int i = 0; i < stringc; i++) { + free(string_list[i]); + } + free(string_list); + + symbol_t **values = calloc(tlhash_size(global_names), sizeof(symbol_t*)); + tlhash_values(global_names, values); + + if (values != NULL) { + for (int i = 0; i < tlhash_size(global_names); i++) { + symbol_t *symbol = values[i]; + + if (symbol->locals != NULL) { + symbol_t **local_values = calloc(tlhash_size(symbol->locals), sizeof(symbol_t*)); + tlhash_values(symbol->locals, local_values); + + if (local_values != NULL) { + for (int i = 0; i < tlhash_size(symbol->locals); i++) { + symbol_t *local_symbol = local_values[i]; + free(local_symbol); + } + } + + free(local_values); + tlhash_finalize(symbol->locals); + free(symbol->locals); + } + + free(symbol); + } + } + free(values); + tlhash_finalize(global_names); + free(global_names); } diff --git a/vsl_programs/euclid.tree b/vsl_programs/euclid.tree index b812cea..960eb7d 100644 --- a/vsl_programs/euclid.tree +++ b/vsl_programs/euclid.tree @@ -82,3 +82,53 @@ IDENTIFIER_DATA(a) RETURN_STATEMENT IDENTIFIER_DATA(g) +String table: +0: "Greatest common divisor of" +1: "and" +2: "is" +3: "and" +4: "are relative primes" +-- +Globals: +gcd: function 1: + 3 local variables, 2 are parameters: + g: local var 2 + b: parameter 1 + a: parameter 0 +euclid: function 0: + 2 local variables, 2 are parameters: + b: parameter 1 + a: parameter 0 +-- +Linked parameter 0 ('a') +Linked parameter 0 ('a') +Linked parameter 0 ('a') +Linked parameter 1 ('b') +Linked parameter 1 ('b') +Linked parameter 1 ('b') +Linked function 1 ('gcd') +Linked parameter 0 ('a') +Linked parameter 1 ('b') +Linked string 0 +Linked parameter 0 ('a') +Linked string 1 +Linked parameter 1 ('b') +Linked string 2 +Linked function 1 ('gcd') +Linked parameter 0 ('a') +Linked parameter 1 ('b') +Linked parameter 0 ('a') +Linked string 3 +Linked parameter 1 ('b') +Linked string 4 +Linked parameter 1 ('b') +Linked local var 2 ('g') +Linked function 1 ('gcd') +Linked parameter 1 ('b') +Linked parameter 0 ('a') +Linked parameter 0 ('a') +Linked parameter 1 ('b') +Linked parameter 1 ('b') +Linked local var 2 ('g') +Linked parameter 0 ('a') +Linked local var 2 ('g') diff --git a/vsl_programs/lexical.tree b/vsl_programs/lexical.tree index 16fa2cf..6597a7c 100644 --- a/vsl_programs/lexical.tree +++ b/vsl_programs/lexical.tree @@ -116,3 +116,81 @@ IDENTIFIER_DATA(a) RETURN_STATEMENT NUMBER_DATA(0) +String table: +0: "Hello, world!" +1: "" +2: "Hello, \"world\"!" +3: "+" +4: ":=" +5: "-" +6: ":=" +7: "+ (-" +8: ") :=" +9: "*" +10: ":=" +11: "/" +12: ":=" +13: "Skip..." +-- +Globals: +main: function 0: + 3 local variables, 0 are parameters: + a_2: local var 2 + _a1: local var 1 + a: local var 0 +-- +Linked string 0 +Linked string 1 +Linked string 2 +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 0 ('a') +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 1 ('_a1') +Linked string 3 +Linked local var 2 ('a_2') +Linked string 4 +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 1 ('_a1') +Linked string 5 +Linked local var 2 ('a_2') +Linked string 6 +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 1 ('_a1') +Linked string 7 +Linked local var 2 ('a_2') +Linked string 8 +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 1 ('_a1') +Linked string 9 +Linked local var 2 ('a_2') +Linked string 10 +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 1 ('_a1') +Linked local var 2 ('a_2') +Linked local var 1 ('_a1') +Linked string 11 +Linked local var 2 ('a_2') +Linked string 12 +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked local var 0 ('a') +Linked string 13 +Linked local var 0 ('a') diff --git a/vsl_programs/newton.tree b/vsl_programs/newton.tree index 5b3dc7b..aa3ba1e 100644 --- a/vsl_programs/newton.tree +++ b/vsl_programs/newton.tree @@ -57,3 +57,34 @@ EXPRESSION_LIST IDENTIFIER_DATA(n) IDENTIFIER_DATA(next) +String table: +0: "The square root of" +1: "is" +-- +Globals: +improve: function 1: + 3 local variables, 2 are parameters: + next: local var 2 + estimate: parameter 1 + n: parameter 0 +newton: function 0: + 1 local variables, 1 are parameters: + n: parameter 0 +-- +Linked string 0 +Linked parameter 0 ('n') +Linked string 1 +Linked function 1 ('improve') +Linked parameter 0 ('n') +Linked local var 2 ('next') +Linked parameter 1 ('estimate') +Linked parameter 1 ('estimate') +Linked parameter 1 ('estimate') +Linked parameter 0 ('n') +Linked parameter 1 ('estimate') +Linked local var 2 ('next') +Linked parameter 1 ('estimate') +Linked local var 2 ('next') +Linked function 1 ('improve') +Linked parameter 0 ('n') +Linked local var 2 ('next') diff --git a/vsl_programs/prec.tree b/vsl_programs/prec.tree index 90b4862..7633fcc 100644 --- a/vsl_programs/prec.tree +++ b/vsl_programs/prec.tree @@ -18,3 +18,13 @@ NUMBER_DATA(-2) RETURN_STATEMENT NUMBER_DATA(0) +String table: +-- +Globals: +unary_minus_precedence: function 0: + 2 local variables, 0 are parameters: + b: local var 1 + a: local var 0 +-- +Linked local var 0 ('a') +Linked local var 1 ('b') diff --git a/vsl_programs/prime.tree b/vsl_programs/prime.tree index 563de03..91a849a 100644 --- a/vsl_programs/prime.tree +++ b/vsl_programs/prime.tree @@ -69,3 +69,34 @@ STRING_DATA("is a prime factor") RETURN_STATEMENT NUMBER_DATA(0) +String table: +0: "is a prime factor" +-- +Globals: +factor: function 1: + 3 local variables, 1 are parameters: + r: local var 2 + f: local var 1 + n: parameter 0 +main: function 0: + 0 local variables, 0 are parameters: +-- +Linked function 1 ('factor') +Linked local var 1 ('f') +Linked parameter 0 ('n') +Linked parameter 0 ('n') +Linked local var 1 ('f') +Linked parameter 0 ('n') +Linked local var 1 ('f') +Linked local var 1 ('f') +Linked local var 1 ('f') +Linked local var 1 ('f') +Linked local var 2 ('r') +Linked function 1 ('factor') +Linked local var 1 ('f') +Linked local var 2 ('r') +Linked function 1 ('factor') +Linked parameter 0 ('n') +Linked local var 1 ('f') +Linked parameter 0 ('n') +Linked string 0 diff --git a/vsl_programs/simplify.tree b/vsl_programs/simplify.tree index c1d9499..d047cdc 100644 --- a/vsl_programs/simplify.tree +++ b/vsl_programs/simplify.tree @@ -34,3 +34,24 @@ IDENTIFIER_DATA(d) RETURN_STATEMENT NUMBER_DATA(0) +String table: +0: "a=" +-- +Globals: +simplify: function 0: + 2 local variables, 2 are parameters: + f: parameter 1 + e: parameter 0 +d: global variable +c: global variable +b: global variable +a: global variable +-- +Linked global var 'a' +Linked string 0 +Linked global var 'a' +Linked global var 'a' +Linked global var 'b' +Linked function 0 ('simplify') +Linked global var 'c' +Linked global var 'd'