#include "bc_compiler.h" #include <assert.h> // DEBUG #include <stdio.h> void string_init ( String * str ) { str -> capacity = INIT_STRING_LENGTH; str -> len = 0; str -> str = (u8*) malloc ( INIT_STRING_LENGTH ); } void string_destroy ( String * str ) { free ( str -> str ); } void string_write_byte ( String * str, const u8 data ) { if ( str -> capacity == str -> len ) { u8 * tmp = (u8*) malloc ( str -> capacity * 2 ); assert ( tmp ); memcpy ( tmp, str -> str, str -> len ); free ( str -> str ); str -> str = tmp; str -> capacity *= 2; } str -> str [ str -> len ++ ] = data; } void string_write_u16 ( String * str, const u16 data ) { string_write_byte ( str, data & 255 ); string_write_byte ( str, data >> 8 ); } void string_write_i32 ( String * str, const i32 data ) { string_write_byte ( str, data & 255 ); string_write_byte ( str, (data >> 8) & 255 ); string_write_byte ( str, (data >> 16) & 255 ); string_write_byte ( str, (data >> 24) & 255 ); } void string_write_constant ( String * str, BCConstant constant ) { switch ( constant . kind ) { case CONSTANT_NULL: // fprintf ( stderr, "null, " ); string_write_byte ( str, 0x01 ); break; case CONSTANT_BOOLEAN: // fprintf ( stderr, "bool: %u, ", constant . boolean ); string_write_byte ( str, 0x04 ); string_write_byte ( str, constant . boolean ? 0x01 : 0x00 ); break; case CONSTANT_INTEGER: // fprintf ( stderr, "int: %d, ", constant . integer ); string_write_byte ( str, 0x00 ); string_write_i32 ( str, constant . integer ); break; case CONSTANT_STRING: // fprintf ( stderr, "\"%.*s\", ", (int) constant . string . len, constant . string . str ); string_write_byte ( str, 0x02 ); string_write_i32 ( str, constant . string . len ); for ( size_t i = 0; i < constant . string . len; ++i ) string_write_byte ( str, constant . string . str [ i ] ); break; case CONSTANT_FUNCTION: { // fprintf ( stderr, "f ( %u, %u, %u ), ", constant . function . parameters, constant . function . locals, constant . function . bc . len ); string_write_byte ( str, 0x03 ); string_write_byte ( str, constant . function . parameters ); string_write_u16 ( str, constant . function . locals ); string_write_i32 ( str, constant . function . bc . len ); for ( size_t i = 0; i < constant . function . bc . len; ++i ) string_write_byte ( str, constant . function . bc . str [ i ] ); break; } default: assert ( false ); break; } } BCConstant create_function ( u8 parameters ) { BCFunction function = (BCFunction) { .parameters = parameters, .locals = 0 }; string_init ( & function . bc ); return (BCConstant) { .kind = CONSTANT_FUNCTION, .function = function }; } LocalScope * create_function_scope ( u8 parameters ) { LocalScope * scope = (LocalScope*) malloc ( sizeof (LocalScope*) + 2 * sizeof (u16) + sizeof (Str) * ( parameters + MAX_SCOPE_VARIABLES ) ); scope -> locals [ 0 ] = STR ( "this" ); scope -> used_locals = 1; return scope; } void add_scope ( BCCompilerState * state ) { LocalScope * scope = (LocalScope*) malloc ( sizeof (LocalScope*) + 2 * sizeof (u16) + sizeof (Str) * MAX_SCOPE_VARIABLES ); scope -> local_count = state -> scope -> local_count; scope -> used_locals = 0; scope -> prev = state -> scope; state -> scope = scope; } void remove_scope ( BCCompilerState * state ) { LocalScope * scope = state -> scope; state -> scope = scope -> prev; free ( scope ); } void convert_function_to_bc ( BCCompilerState * state, u16 index ) { BCConstant * this_constant = & state -> constants . constants [ index ]; AstFunction * func = (AstFunction*) this_constant -> ast; LocalScope * old_scope = state -> scope; *this_constant = create_function ( func -> parameter_cnt + 1 ); u16 old_fp = state -> fp; state -> fp = index; state -> scope = create_function_scope ( func -> parameter_cnt + 1 ); state -> scope -> prev = state -> top; state -> scope -> used_locals = func -> parameter_cnt + 1; state -> scope -> local_count = func -> parameter_cnt + 1; for ( u16 i = 1; i < func -> parameter_cnt + 1; ++i ) state -> scope -> locals [ i ] = func -> parameters [ i - 1 ]; ast_to_bc ( state, func -> body ); free ( state -> scope ); string_write_byte ( & this_constant -> function . bc, 0x0F ); state -> scope = old_scope; state -> fp = old_fp; } u16 get_string_index ( BCCompilerState * state, Str str ) { u16 i; for ( i = 0; i < state -> constants . constant_count; ++i ) if ( state -> constants . constants [ i ] . kind == CONSTANT_STRING && str_eq ( state -> constants . constants [ i ] . string, str ) ) return i; return 0; } u16 make_string_index ( BCCompilerState * state, Str str ) { insert_constant ( state, (BCConstant) { .kind = CONSTANT_STRING, .string = str } ); return state -> constants . constant_count - 1; } u16 get_local_index ( BCCompilerState * state, Str name ) { LocalScope * scope = state -> scope; size_t i; while ( scope ) { for ( i = 0; i < scope -> used_locals; ++i ) if ( str_eq ( scope -> locals [ i ], name ) ) return scope -> local_count - scope -> used_locals + i; // return scope -> prev ? scope -> prev -> local_count + i: i; scope = scope -> prev; } return 256 * 256 - 1; } u16 make_local_index ( BCCompilerState * state, Str name ) { BCFunction * function = & state -> constants . constants [ state -> fp ] . function; u16 * n = & state -> scope -> local_count; assert ( *n != 256 * 256 ); state -> scope -> locals [ state -> scope -> used_locals ] = (Str) { .len = name . len, .str = name . str }; ++(*n); ++(state -> scope -> used_locals); if ( *n - function -> parameters > function -> locals ) { function -> locals = *n - function -> parameters; } return *n - 1; } void gen_bc_constant ( BCCompilerState * state, u16 index ) { BCFunction * function = & state -> constants . constants [ state -> fp ] . function; string_write_byte ( & function -> bc, 0x01 ); string_write_u16 ( & function -> bc, index ); } void insert_constant ( BCCompilerState * state, BCConstant constant ) { state -> constants . constants [ state -> constants . constant_count ++ ] = constant; } void bc_state_init ( BCCompilerState * state ) { state -> constants . constant_count = 0; state -> constants . null_pos = -1; state -> constants . bool_pos [ 0 ] = -1; state -> constants . bool_pos [ 1 ] = -1; state -> globals . global_count = 0; state -> scope = NULL; //state -> fp = 0; } void bc_state_destroy ( BCCompilerState * state ) { // TODO ( free strings ) LocalScope * scope = state -> scope; LocalScope * prev; while ( scope ) { prev = scope -> prev; free ( scope ); scope = prev; } for ( size_t i = 0; i < state -> constants . constant_count; ++i ) { BCConstant * constant = & state -> constants . constants [ i ]; if ( constant -> kind == CONSTANT_FUNCTION ) string_destroy ( & constant -> function . bc ); } } void ast_to_bc ( BCCompilerState * state, Ast * ast ) { switch ( ast -> kind ) { case AST_TOP: { AstTop * top = (AstTop*) ast; state -> ep = state -> fp = state -> constants . constant_count; insert_constant ( state, create_function ( 1 ) ); state -> scope = create_function_scope ( 1 ); state -> scope -> prev = NULL; state -> scope -> used_locals = 1; state -> scope -> local_count = 1; state -> top = state -> scope; BCFunction * function = & state -> constants . constants [ state -> fp ] . function; for ( size_t i = 0; i < top -> expression_cnt; ++i ) { ast_to_bc ( state, top -> expressions [ i ] ); if ( i != top -> expression_cnt - 1 ) string_write_byte ( & function -> bc, 0x00 ); } //free ( state -> scope ); string_write_byte ( & function -> bc, 0x0F ); return; } case AST_NULL: { if ( state -> constants . null_pos < 0 ) { state -> constants . null_pos = state -> constants . constant_count; insert_constant ( state, (BCConstant) { .kind = CONSTANT_NULL } ); } gen_bc_constant ( state, state -> constants . null_pos ); return; } case AST_INTEGER: { AstInteger * integer = (AstInteger*) ast; u16 i; for ( i = 0; i < state -> constants . constant_count; ++i ) if ( state -> constants . constants [ i ] . kind == CONSTANT_INTEGER && state -> constants . constants [ i ] . integer == integer -> value ) break; if ( i == state -> constants . constant_count ) insert_constant ( state, (BCConstant) { .kind = CONSTANT_INTEGER, .integer = integer -> value } ); gen_bc_constant ( state, i ); return; } case AST_BOOLEAN: { AstBoolean * boolean = (AstBoolean*) ast; size_t pos = boolean -> value ? 1 : 0; if ( state -> constants . bool_pos [ pos ] < 0 ) { state -> constants . bool_pos [ pos ] = state -> constants . constant_count; insert_constant ( state, (BCConstant) { .kind = CONSTANT_BOOLEAN, .boolean = boolean -> value } ); } gen_bc_constant ( state, state -> constants . bool_pos [ pos ] ); return; } case AST_BLOCK: { AstBlock * block = (AstBlock*) ast; // make local scope add_scope ( state ); BCFunction * function = & state -> constants . constants [ state -> fp ] . function; for ( size_t i = 0; i < block -> expression_cnt; ++i ) { ast_to_bc ( state, block -> expressions [ i ] ); if ( i != block -> expression_cnt - 1 ) string_write_byte ( & function -> bc, 0x00 ); } // delete local scope remove_scope ( state ); return; } case AST_FUNCTION: { insert_constant ( state, (BCConstant) { .kind = CONSTANT_AST_FUNCTION, .ast = ast } ); gen_bc_constant ( state, state -> constants . constant_count - 1 ); return; } case AST_FUNCTION_CALL: { AstFunctionCall * call = (AstFunctionCall*) ast; ast_to_bc ( state, call -> function ); for ( size_t i = 0; i < call -> argument_cnt; ++i ) ast_to_bc ( state, call -> arguments [ i ] ); BCFunction * function = & state -> constants . constants [ state -> fp ] . function; string_write_byte ( & function -> bc, 0x08 ); string_write_byte ( & function -> bc, call -> argument_cnt & 255 ); return; } case AST_PRINT: { AstPrint * print = (AstPrint*) ast; u16 index = get_string_index ( state, print -> format ); if ( index == 0 ) index = make_string_index ( state, print -> format ); for ( size_t i = 0; i < print -> argument_cnt; ++i ) ast_to_bc ( state, print -> arguments [ i ] ); BCFunction * function = & state -> constants . constants [ state -> fp ] . function; string_write_byte ( & function -> bc, 0x02 ); string_write_u16 ( & function -> bc, index ); string_write_byte ( & function -> bc, print -> argument_cnt & 255 ); return; } case AST_DEFINITION: { AstDefinition * def = (AstDefinition*) ast; ast_to_bc ( state, def -> value ); BCFunction * function = & state -> constants . constants [ state -> fp ] . function; if ( state -> scope == state -> top ) { // GLOBAL u16 index = get_string_index ( state, def -> name ); if ( index == 0 ) index = make_string_index ( state, def -> name ); u16 * count = & state -> globals . global_count; size_t i; for ( i = 0; i < *count; ++i ) if ( state -> globals . names [ i ] == index ) break; if ( i == *count ) { state -> globals . names [ *count ] = index; ++(*count); } string_write_byte ( & function -> bc, 0x0B ); string_write_u16 ( & function -> bc, index ); } else { // LOCAL u16 index = 65535; for ( size_t i = 0; i < state -> scope -> used_locals; ++i ) if ( str_eq ( state -> scope -> locals [ i ], def -> name ) ) index = function -> parameters + ( state -> scope -> prev ? state -> scope -> prev -> local_count + i: i ); if ( index == 65535 ) index = make_local_index ( state, def -> name ); string_write_byte ( & function -> bc, 0x09 ); string_write_u16 ( & function -> bc, index ); } return; } case AST_VARIABLE_ACCESS: { AstVariableAccess * var = (AstVariableAccess*) ast; BCFunction * function = & state -> constants . constants [ state -> fp ] . function; // try local u16 index = get_local_index ( state, var -> name ); if ( index == 65535 ) { // GLOBAL u16 str_index = get_string_index ( state, var -> name ); if ( str_index == 0 ) { // create global / local if ( state -> scope == state -> top ) { str_index = make_string_index ( state, var -> name ); u16 * count = & state -> globals . global_count; state -> globals . names [ *count ] = str_index; ++(*count); string_write_byte ( & function -> bc, 0x0C ); string_write_u16 ( & function -> bc, str_index ); return; } else { index = make_local_index ( state, var -> name ); string_write_byte ( & function -> bc, 0x0A ); string_write_u16 ( & function -> bc, index ); return; } } u16 * count = & state -> globals . global_count; size_t i; for ( i = 0; i < *count; ++i ) if ( state -> globals . names [ i ] == str_index ) break; if ( i == *count ) assert ( false ); string_write_byte ( & function -> bc, 0x0C ); string_write_u16 ( & function -> bc, str_index ); } else { // LOCAL string_write_byte ( & function -> bc, 0x0A ); string_write_u16 ( & function -> bc, index ); } return; } case AST_VARIABLE_ASSIGNMENT: { AstVariableAssignment * assign = (AstVariableAssignment*) ast; ast_to_bc ( state, assign -> value ); BCFunction * function = & state -> constants . constants [ state -> fp ] . function; // try local u16 index = get_local_index ( state, assign -> name ); if ( index == 65535 ) { // GLOBAL u16 str_index = get_string_index ( state, assign -> name ); if ( str_index == 0 ) { // create global / local if ( state -> scope == state -> top ) { str_index = make_string_index ( state, assign -> name ); u16 * count = & state -> globals . global_count; state -> globals . names [ *count ] = str_index; ++(*count); string_write_byte ( & function -> bc, 0x0B ); string_write_u16 ( & function -> bc, str_index ); return; } else { index = make_local_index ( state, assign -> name ); string_write_byte ( & function -> bc, 0x09 ); string_write_u16 ( & function -> bc, index ); return; } } u16 * count = & state -> globals . global_count; size_t i; for ( i = 0; i < *count; ++i ) if ( state -> globals . names [ i ] == str_index ) break; if ( i == *count ) assert ( false ); string_write_byte ( & function -> bc, 0x0B ); string_write_u16 ( & function -> bc, str_index ); } else { // LOCAL string_write_byte ( & function -> bc, 0x09 ); string_write_u16 ( & function -> bc, index ); } return; } case AST_CONDITIONAL: { AstConditional * cond = (AstConditional*) ast; BCFunction * function = & state -> constants . constants [ state -> fp ] . function; ast_to_bc ( state, cond -> condition ); string_write_byte ( & function -> bc, 0x0D ); string_write_u16 ( & function -> bc, 0 ); u16 alt_pos = function -> bc . len; if ( cond -> alternative -> kind != AST_NULL ) { add_scope ( state ); ast_to_bc ( state, cond -> alternative ); remove_scope ( state ); } string_write_byte ( & function -> bc, 0x0E ); string_write_u16 ( & function -> bc, 0 ); u16 cons_pos = function -> bc . len; u16 alt_len = cons_pos - alt_pos; function -> bc . str [ alt_pos - 2 ] = alt_len & 255; function -> bc . str [ alt_pos - 1 ] = (alt_len >> 8) & 255; add_scope ( state ); ast_to_bc ( state, cond -> consequent ); remove_scope ( state ); u16 cons_len = function -> bc . len - cons_pos; function -> bc . str [ cons_pos - 2 ] = cons_len & 255; function -> bc . str [ cons_pos - 1 ] = (cons_len >> 8) & 255; return; } case AST_LOOP: { AstLoop * loop = (AstLoop*) ast; BCFunction * function = & state -> constants . constants [ state -> fp ] . function; ast_to_bc ( state, (Ast*) & (AstNull) { .base = (Ast) { .kind = AST_NULL } } ); size_t start_pos = function -> bc . len; ast_to_bc ( state, loop -> condition ); string_write_byte ( & function -> bc, 0x0D ); string_write_u16 ( & function -> bc, 3 ); string_write_byte ( & function -> bc, 0x0E ); string_write_u16 ( & function -> bc, 0 ); size_t body_pos = function -> bc . len; add_scope ( state ); ast_to_bc ( state, loop -> body ); remove_scope ( state ); string_write_byte ( & function -> bc, 0x0E ); string_write_u16 ( & function -> bc, 0 ); size_t after_pos = function -> bc . len; i16 loop_len = after_pos - start_pos; function -> bc . str [ after_pos - 2 ] = (-loop_len) & 255; function -> bc . str [ after_pos - 1 ] = ((-loop_len) >> 8) & 255; u16 body_len = after_pos - body_pos; function -> bc . str [ body_pos - 2 ] = body_len & 255; function -> bc . str [ body_pos - 1 ] = (body_len >> 8) & 255; return; } default: assert ( false ); } } String generate_bc ( Ast * ast ) { // init String bc; string_init ( & bc ); BCCompilerState bc_state; bc_state_init ( & bc_state ); // generate internals (constants (exp. functions), globals) ast_to_bc ( &bc_state, ast ); // header string_write_byte ( & bc, 'F' ); string_write_byte ( & bc, 'M' ); string_write_byte ( & bc, 'L' ); string_write_byte ( & bc, '\n' ); // fprintf ( stderr, "printed FML\n" ); // constants to bc // first functions to bc internals for ( size_t i = 0; i < bc_state . constants . constant_count; ++i ) if ( bc_state . constants . constants [ i ] . kind == CONSTANT_AST_FUNCTION ) convert_function_to_bc ( & bc_state, i ); // fprintf ( stderr, "%d constants:\n", bc_state . constants . constant_count ); string_write_u16 ( & bc, bc_state . constants . constant_count ); for ( size_t i = 0; i < bc_state . constants . constant_count; ++i ) string_write_constant ( & bc, bc_state . constants . constants [ i ] ); // fprintf ( stderr, "\n" ); // globals to bc // fprintf ( stderr, "%u globals:\n", bc_state . globals . global_count ); string_write_u16 ( & bc, bc_state . globals . global_count ); for ( size_t i = 0; i < bc_state . globals . global_count; ++i ) { // fprintf ( stderr, "%u, ", bc_state . globals . names [ i ] ); string_write_u16 ( & bc, bc_state . globals . names [ i ] ); } // fprintf ( stderr, "\n" ); // EP string_write_u16 ( & bc, bc_state . ep ); // fprintf ( stderr, "EP=%u\n", bc_state . ep ); // free bc_state_destroy ( & bc_state ); // fprintf ( stderr, "len: %u, %.*s\n", bc . len, (int) bc . len, bc . str ); return bc; }