From 7a61f213fb9be8ab7f9bd0fb33940b21fa143b05 Mon Sep 17 00:00:00 2001 From: Wojciech Kosior Date: Sat, 3 Oct 2020 21:45:24 +0200 Subject: fixes, conditional if-not jump and translation of if-else instruction from wasm --- tools/assemble.c | 40 +++++-- tools/parse_module.c | 45 ++----- tools/stack_machine_instruction.h | 69 ++++++----- tools/translate.c | 246 +++++++++++++++++++++++++++++++++++--- tools/translate_xmacro.h | 5 + tools/wasm.h | 30 +++++ tools/wasm_compile.h | 24 +++- 7 files changed, 369 insertions(+), 90 deletions(-) (limited to 'tools') diff --git a/tools/assemble.c b/tools/assemble.c index 11bb7c9..b8bd75e 100644 --- a/tools/assemble.c +++ b/tools/assemble.c @@ -55,7 +55,8 @@ static uint8_t estimate_instruction_size(struct instruction *instruction) case DATA_NONE : return 2; case DATA_KNOWN : - case DATA_UNKNOWN : + case DATA_INSTR_ADDR : + case DATA_ADDR_AFTER : return 6; case DATA_KNOWN_21_BITS : return 4; @@ -81,16 +82,26 @@ static void assign_addresses_and_sizes(struct instruction *expr, uint32_t address) { struct instruction *tmp = expr; + uint32_t target_addr; do { + if ((tmp->data.info == DATA_INSTR_ADDR || + tmp->data.info == DATA_ADDR_AFTER) && + *tmp->data.data.ptr && + (*tmp->data.data.ptr)->address_assigned) { + target_addr = (*tmp->data.data.ptr)->address; + + if (tmp->data.info == DATA_ADDR_AFTER) { + target_addr += estimate_instruction_size + (*tmp->data.data.ptr); + } + + tmp->data = im(target_addr); + } + tmp->address = address; tmp->address_assigned = true; - if (tmp->data.info == DATA_UNKNOWN && - *tmp->data.data.ptr && - (*tmp->data.data.ptr)->address_assigned) - tmp->data = im((*tmp->data.data.ptr)->address); - address += estimate_instruction_size(tmp); tmp = tmp->next; } while (tmp != expr); @@ -108,21 +119,24 @@ static void encode_instruction(struct instruction *instruction, uint16_t encoding = instruction->encoding; uint16_t *dest = memory + instruction->address / 2; - if (instruction->data.info != DATA_NONE) { - if (instruction->data.info == DATA_UNKNOWN) - im = (*instruction->data.data.ptr)->address; - else - im = instruction->data.data.im; + if (instruction->data.info == DATA_INSTR_ADDR) { + im = (*instruction->data.data.ptr)->address; + } else if (instruction->data.info == DATA_ADDR_AFTER) { + im = (*instruction->data.data.ptr)->address + + estimate_instruction_size(*instruction->data.data.ptr); + } else if (instruction->data.info != DATA_NONE) { + im = instruction->data.data.im; } switch (instruction->data.info) { - case DATA_UNKNOWN : + case DATA_INSTR_ADDR : + case DATA_ADDR_AFTER : case DATA_KNOWN : *(dest++) = im_instruction(im >> 22); case DATA_KNOWN_21_BITS : *(dest++) = im_instruction(im >> 7); case DATA_KNOWN_6_BITS : - encoding |= (im & 0x7F); + encoding |= im & 0x7F; } *dest = encoding; diff --git a/tools/parse_module.c b/tools/parse_module.c index 75329f8..3f9895c 100644 --- a/tools/parse_module.c +++ b/tools/parse_module.c @@ -2,39 +2,11 @@ #include "wasm_compile.h" #include "wasm.h" -static inline int is_valid_valtype(char value_type) -{ - if (value_type == VALTYPE_I32) - return 1; - - if (value_type == VALTYPE_I64 || - value_type == VALTYPE_F32 || - value_type == VALTYPE_F64) - PRERR("Only type i32 is recognized for now"); - - return -1; - - /* return */ - /* value_type == VALTYPE_I32 || */ - /* value_type == VALTYPE_I64 || */ - /* value_type == VALTYPE_F32 || */ - /* value_type == VALTYPE_F64; */ -} - -static inline int is_valid_exportdesc(char desc) -{ - return - desc == EXPORT_FUNCIDX || - desc == EXPORT_TABLEIDX || - desc == EXPORT_MEMIDX || - desc == EXPORT_GLOBALIDX; -} - -int leb_u32(FILE *handle, uint32_t *result) +int leb_32(FILE *handle, uint32_t *result, bool with_sign) { int i, j; int encoded[5]; - uint64_t decoded; + int64_t decoded; for (i = 0; i < 5; i++) { encoded[i] = fgetc(handle); @@ -53,12 +25,17 @@ int leb_u32(FILE *handle, uint32_t *result) } } - decoded = 0; + if (with_sign && encoded[i] & (1 << 6)) + decoded = -1; + else + decoded = 0; for (j = i; j >= 0; j--) decoded = (decoded << 7) | (encoded[j] & 0x7F); - if (decoded > UINT32_MAX) { + if ((with_sign && decoded > INT32_MAX) || + (with_sign && decoded < INT32_MIN) || + (!with_sign && decoded > UINT32_MAX)) { PRERR(MSG_BAD_NUM_ENC); return -1; } @@ -105,6 +82,8 @@ void free_module(struct module *module) free(module->exports); + free_targets(module->targets); + free(module); } @@ -602,7 +581,7 @@ struct module *parse_module(FILE *handle) /* check magic number */ if (memcmp(initial, magic, 4)) { - PRERR("Bad magic number"); + PRERR("Bad magic number\n"); goto fail; } diff --git a/tools/stack_machine_instruction.h b/tools/stack_machine_instruction.h index 7b22c83..ea65334 100644 --- a/tools/stack_machine_instruction.h +++ b/tools/stack_machine_instruction.h @@ -1,5 +1,7 @@ #include +#include "wasm_compile.h" + /* * TODO: this enum is to be removed; it it only still here as a cheat sheet * for use when defining missing inline functions for those instructions @@ -43,7 +45,8 @@ enum instruction_code { #define DATA_KNOWN 1 #define DATA_KNOWN_21_BITS 2 #define DATA_KNOWN_6_BITS 3 -#define DATA_UNKNOWN 4 +#define DATA_INSTR_ADDR 4 +#define DATA_ADDR_AFTER 5 struct instruction_data { char info; @@ -65,8 +68,6 @@ struct instruction { .info = DATA_NONE \ }) -#define POW(n) (((int64_t) 1) << (n)) - inline static uint8_t im_instruction_size(int32_t im) { if (im < POW(6) && im >= -POW(6)) @@ -98,7 +99,16 @@ inline static struct instruction_data im(uint32_t im) inline static struct instruction_data ptr(struct instruction **val) { struct instruction_data data; - data.info = DATA_UNKNOWN; + data.info = DATA_INSTR_ADDR; + data.data.ptr = val; + + return data; +} + +inline static struct instruction_data ptr_after(struct instruction **val) { + struct instruction_data data; + + data.info = DATA_ADDR_AFTER; data.data.ptr = val; return data; @@ -122,30 +132,33 @@ int add_instruction(struct instruction **expr, uint16_t encoding, return add_instruction(expr, encoding, NO_DATA); \ } -X(store, 0x7E00) /* 0111_1110_0xxx_xxxx */ -X(store_p, 0x6E00) /* 0110_1110_0xxx_xxxx */ -X(storeb_p, 0x6C00) /* 0110_1100_0xxx_xxxx */ -X(storew_p, 0x6D00) /* 0110_1101_0xxx_xxxx */ -X(load, 0x5E00) /* 0101_1110_0xxx_xxxx */ -X(load_p, 0x4E00) /* 0100_1110_0xxx_xxxx */ -X(loadbzx_p, 0x4C00) /* 0100_1100_0xxx_xxxx */ -X(loadbsx_p, 0x4C80) /* 0100_1100_1xxx_xxxx */ -X(loadwzx_p, 0x4D00) /* 0100_1101_0xxx_xxxx */ -X(loadwsx_p, 0x4D80) /* 0100_1101_1xxx_xxxx */ -Y(swap, 0x0002) /* 0000_0000_0000_0010 */ -X(set_sp, 0x4000) /* 0100_0000_0xxx_xxxx */ -X(jump, 0x4080) /* 0100_0000_1xxx_xxxx */ -Y(tee, 0x1000) /* 0001_0000_0000_0000 */ -Y(get_frame, 0x1001) /* 0001_0000_0000_0001 */ -X(const, 0x5000) /* 0101_0000_0xxx_xxxx */ -X(call, 0x5080) /* 0101_0000_1xxx_xxxx */ -Y(add, 0x3000) /* 0011_0000_0000_0000 */ -Y(sub, 0x3001) /* 0011_0000_0000_0001 */ -Y(div, 0x3002) /* 0011_0000_0000_0010 */ -Y(mul, 0x3003) /* 0011_0000_0000_0011 */ -Y(drop, 0x3004) /* 0011_0000_0000_0100 */ -Y(ret, 0x3080) /* 0011_0000_1000_0000 */ -Y(halt, 0x0000) /* 0000_0000_0000_0000 */ +X(store, 0x7E00) /* 0111_1110_0xxx_xxxx */ +X(store_p, 0x6E00) /* 0110_1110_0xxx_xxxx */ +X(storeb_p, 0x6C00) /* 0110_1100_0xxx_xxxx */ +X(storew_p, 0x6D00) /* 0110_1101_0xxx_xxxx */ +X(load, 0x5E00) /* 0101_1110_0xxx_xxxx */ +X(load_p, 0x4E00) /* 0100_1110_0xxx_xxxx */ +X(loadbzx_p, 0x4C00) /* 0100_1100_0xxx_xxxx */ +X(loadbsx_p, 0x4C80) /* 0100_1100_1xxx_xxxx */ +X(loadwzx_p, 0x4D00) /* 0100_1101_0xxx_xxxx */ +X(loadwsx_p, 0x4D80) /* 0100_1101_1xxx_xxxx */ +Y(nop, 0x0001) /* 0000_0000_0000_0001 */ +Y(swap, 0x0002) /* 0000_0000_0000_0010 */ +X(set_sp, 0x4000) /* 0100_0000_0xxx_xxxx */ +X(jump, 0x4080) /* 0100_0000_1xxx_xxxx */ +Y(tee, 0x1000) /* 0001_0000_0000_0000 */ +Y(get_frame, 0x1001) /* 0001_0000_0000_0001 */ +X(const, 0x5000) /* 0101_0000_0xxx_xxxx */ +X(call, 0x5080) /* 0101_0000_1xxx_xxxx */ +Y(add, 0x3000) /* 0011_0000_0000_0000 */ +Y(sub, 0x3001) /* 0011_0000_0000_0001 */ +Y(div, 0x3002) /* 0011_0000_0000_0010 */ +Y(mul, 0x3003) /* 0011_0000_0000_0011 */ +Y(drop, 0x3004) /* 0011_0000_0000_0100 */ +Y(ret, 0x3080) /* 0011_0000_1000_0000 */ +X(cond_jump, 0x7080) /* 0111_0000_1xxx_xxxx */ +X(cond_jump_n, 0x7100) /* 0111_0001_0xxx_xxxx */ +Y(halt, 0x0000) /* 0000_0000_0000_0000 */ #undef X #undef Y diff --git a/tools/translate.c b/tools/translate.c index b3d2afd..3a4496d 100644 --- a/tools/translate.c +++ b/tools/translate.c @@ -2,9 +2,15 @@ #include "wasm.h" #include "stack_machine_instruction.h" +struct target { + struct instruction *instr; + struct target *prev; +}; + struct types { struct types *prev; char type; /* should be one of VALTYPE_* constants from wasm.h */ + int refs; }; struct translation { @@ -14,7 +20,40 @@ struct translation { struct types *types_stack; }; -void free_types_stack(struct types *top) +struct end_markers { + int count; + const uint8_t *codes; +}; + +void free_targets(struct target *top) +{ + struct target *tmp; + + while (top) { + tmp = top->prev; + free(top); + top = tmp; + } +} + +inline static void get_type(struct types *type) +{ + if (type) + type->refs++; +} + +static void put_type(struct types *type) +{ + struct types *tmp; + + while (type && !--type->refs) { + tmp = type->prev; + free(type); + type = tmp; + } +} + +static void free_types_stack(struct types *top) { struct types *tmp; @@ -25,13 +64,163 @@ void free_types_stack(struct types *top) } } +static int translate_expr(struct translation *data, struct resulttype *args, + struct resulttype *results, + const struct end_markers *end_markers, + char *marker_found); + /* All functions, that go into one of function pointer arrays, start with _ */ -/** DEFINE TRANSLATION FUNCTIONS **/ +/** DEFINE INSTRUCTION TRANSLATION FUNCTIONS **/ /* Translate complex - those routines have to be defined manually */ #define TC(wasm_opcode, name, argtypes, restype) +static int parse_blocktype(FILE *handle, struct resulttype *args, + struct resulttype *results, char *storage, + struct module *module) +{ + int readval; + uint32_t typeidx; + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + return -1; + } + + if (readval == 0x40) { + /* Blocktype is empty (no arguments, no result values) */ + *args = (struct resulttype) {.count = 0, .types = NULL}; + *results = *args; + return 0; + } + + /* + * A nonnegative array index encoded as signed number in LEB + * shall have 0 as the second (most significant) bit of the first byte. + * Otherwise, it can't be array index, but might be a simple value type. + */ + if (readval & (1 << 6)) { + if (!is_valid_valtype(readval)) + goto fail; + + *args = (struct resulttype) {.count = 0, .types = NULL}; + *storage = readval; + *results = (struct resulttype) {.count = 1, .types = storage}; + return 0; + } + + /* + * We know for sure it's a nonnegative number, we can just use leb_u32 + * decoding function (encoding as signed or unsigned is the same in this + * particular case). + */ + ungetc(readval, handle); + + if (leb_u32(handle, &typeidx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (typeidx <= module->functypes_count) { + PRERR(MSG_BAD_IDX("type index")); + goto fail; + } + + *args = module->functypes[typeidx].args; + *results = module->functypes[typeidx].results; + return 0; + +fail: + PRERR("Couldn't parse blocktype\n"); + return -1; +} + +static struct target *add_target(struct module *module) +{ + struct target *tgt; + + tgt = malloc(sizeof(struct target)); + + if (!tgt) { + PRERR(MSG_ALLOC_FAIL(sizeof(struct target))); + return NULL; + } + + tgt->instr = NULL; + tgt->prev = module->targets; + module->targets = tgt; + return tgt; +} + +static int _translate_if(struct translation *data) +{ + struct types *backed_stack; + struct resulttype block_args, block_results; + char type_storage; + struct target *if_end, *else_end; + struct instruction **expr = &data->function->translated_body; + static const uint8_t if_end_markers_codes[2] = {WASM_ELSE, WASM_END}; + static const struct end_markers if_end_markers = { + .count = 2, + .codes = if_end_markers_codes + }; + static const struct end_markers else_end_markers = { + .count = 1, + .codes = if_end_markers_codes + 1 + }; + char marker_found; + int retval; + + if (parse_blocktype(data->handle, &block_args, &block_results, + &type_storage, data->module)) + goto fail; + + if_end = add_target(data->module); + else_end = add_target(data->module); + + if (!if_end || !else_end) + goto fail; + + if (i_cond_jump_n(ptr_after(&if_end->instr), expr)) + goto fail; + + backed_stack = data->types_stack; + get_type(backed_stack); + + retval = translate_expr(data, &block_args, &block_results, + &if_end_markers, &marker_found); + + put_type(data->types_stack); + data->types_stack = backed_stack; + + if (retval) + goto fail; + + if (i_jump(ptr_after(&else_end->instr), expr)) + goto fail; + + if_end->instr = data->function->translated_body->prev; + + if (marker_found == WASM_END) + ungetc(WASM_END, data->handle); + + if (translate_expr(data, &block_args, &block_results, + &else_end_markers, NULL)) + goto fail; + + else_end->instr = data->function->translated_body->prev; + + return 0; + +fail: + PRERR("Couldn't translate if-else instruction\n"); + + return -1; +} + static int typecheck_call(struct translation *data, struct function *callee); static int _translate_call(struct translation *data) @@ -98,9 +287,9 @@ static int _translate_local_get(struct translation *data) static int _translate_const(struct translation *data) { - uint32_t constant; + int32_t constant; - if (leb_u32(data->handle, &constant)) { + if (leb_s32(data->handle, &constant)) { PRERR(MSG_BAD_NUM); return -1; } @@ -209,7 +398,11 @@ static int argcheck_generic(struct types **types_stack, char expected) top_type = *types_stack; *types_stack = top_type->prev; - free(top_type); + + if (*types_stack) + get_type(*types_stack); + + put_type(top_type); return 0; } @@ -254,7 +447,7 @@ static int rescheck_generic(struct types **types_stack, char returned) return -1; } - *top = (struct types) {.prev = *types_stack, .type = returned}; + *top = (struct types) {.prev = *types_stack, .type = returned, .refs = 1}; *types_stack = top; return 0; @@ -289,7 +482,7 @@ struct typecheck { int (*argcheck) (struct types **), (*rescheck) (struct types **); }; -/*static*/ struct typecheck typecheck_routines[256] = { +static struct typecheck typecheck_routines[256] = { #include "translate_xmacro.h" }; @@ -355,7 +548,9 @@ static int translate_instr(struct translation *data, uint8_t wasm_opcode) } static int translate_expr(struct translation *data, struct resulttype *args, - struct resulttype *results) + struct resulttype *results, + const struct end_markers *end_markers, + char *marker_found) { struct types **tmp, *types_stack_rest; uint32_t i; @@ -382,13 +577,23 @@ static int translate_expr(struct translation *data, struct resulttype *args, goto fail; } - if (wasm_opcode == WASM_END) - break; + i = end_markers->count; + + while (i--) { + if (wasm_opcode == end_markers->codes[i]) { + if (marker_found) + *marker_found = wasm_opcode; + + goto block_end; + } + } if (translate_instr(data, wasm_opcode)) goto fail; } +block_end: + tmp = &data->types_stack; i = results ? results->count : 0; @@ -408,7 +613,7 @@ static int translate_expr(struct translation *data, struct resulttype *args, return 0; fail: - free_types_stack(data->types_stack); + put_type(data->types_stack); data->types_stack = types_stack_rest; return -1; @@ -421,10 +626,16 @@ int translate(FILE *handle, struct function *function, struct module *module) uint32_t locals_count = function->locals_count; uint32_t all_locals_count = args_count + locals_count; size_t i; + static const uint8_t function_end_marker_code = WASM_END; + static const struct end_markers function_end_markers = { + .count = 1, + .codes = &function_end_marker_code + }; struct translation data = {.handle = handle, .function = function, .module = module, .types_stack = NULL}; + int retval = -1; if (locals_count + (uint64_t) args_count > STACK_TOP_ADDR * 4) { PRERR("Too many locals in a function\n"); @@ -445,7 +656,8 @@ int translate(FILE *handle, struct function *function, struct module *module) goto fail; /* actual function body */ - if (translate_expr(&data, NULL, &function->type->results)) + if (translate_expr(&data, NULL, &function->type->results, + &function_end_markers, NULL)) goto fail; /* function epilogue */ @@ -471,7 +683,6 @@ int translate(FILE *handle, struct function *function, struct module *module) i_load_p (im(0), expr) || i_store (im(STACK_FRAME_BACKUP_ADDR), expr)) goto fail; - } i = locals_count + args_count + 2; @@ -487,13 +698,18 @@ int translate(FILE *handle, struct function *function, struct module *module) if (i_ret(expr)) goto fail; - return 0; + retval = 0; fail: + free_types_stack(data.types_stack); + + if (!retval) + return retval; + PRERR("Couldn't translate function to stack machine\n"); free_expr(*expr); function->translated_body = NULL; - return -1; + return retval; } diff --git a/tools/translate_xmacro.h b/tools/translate_xmacro.h index 5e5bd79..b271e75 100644 --- a/tools/translate_xmacro.h +++ b/tools/translate_xmacro.h @@ -15,6 +15,11 @@ TLS(WASM_I32_STORE, store_p, i32_i32, empty) TLS(WASM_I32_STORE8, storeb_p, i32_i32, empty) TLS(WASM_I32_STORE16, storew_p, i32_i32, empty) +/* + * There are more checks to be performed in case of if, but we do them + * another way and only check for the i32 condition value here. + */ +TC (WASM_IF, if, i32, custom) TC (WASM_CALL, call, custom, custom) TC (WASM_LOCAL_GET, local_get, empty, custom) TC (WASM_I32_CONST, const, empty, i32) diff --git a/tools/wasm.h b/tools/wasm.h index 07252fc..bf85490 100644 --- a/tools/wasm.h +++ b/tools/wasm.h @@ -22,6 +22,8 @@ #define EXPORT_GLOBALIDX 0x03 /* WebAssembly opcodes */ +#define WASM_IF 0x04 +#define WASM_ELSE 0x05 #define WASM_END 0x0B #define WASM_CALL 0x10 @@ -43,3 +45,31 @@ #define WASM_I32_SUB 0x6B #define WASM_I32_MUL 0x6C #define WASM_I32_DIV_U 0x6E + +static inline int is_valid_valtype(char value_type) +{ + if (value_type == VALTYPE_I32) + return 1; + + if (value_type == VALTYPE_I64 || + value_type == VALTYPE_F32 || + value_type == VALTYPE_F64) + PRERR("Only type i32 is recognized for now"); + + return -1; + + /* return */ + /* value_type == VALTYPE_I32 || */ + /* value_type == VALTYPE_I64 || */ + /* value_type == VALTYPE_F32 || */ + /* value_type == VALTYPE_F64; */ +} + +static inline int is_valid_exportdesc(char desc) +{ + return + desc == EXPORT_FUNCIDX || + desc == EXPORT_TABLEIDX || + desc == EXPORT_MEMIDX || + desc == EXPORT_GLOBALIDX; +} diff --git a/tools/wasm_compile.h b/tools/wasm_compile.h index e223ec6..4cee1bb 100644 --- a/tools/wasm_compile.h +++ b/tools/wasm_compile.h @@ -1,8 +1,12 @@ +#ifndef WASM_COMPILE_H +#define WASM_COMPILE_H + #include #include #include #include #include +#include #define STACK_FRAME_BACKUP_ADDR 0x0FFFFC #define STACK_TOP_ADDR 0x0FFFFC @@ -24,6 +28,8 @@ #define PRERR(...) fprintf(stderr, __VA_ARGS__) +#define POW(n) (((int64_t) 1) << (n)) + struct resulttype { uint32_t count; char *types; @@ -40,6 +46,7 @@ struct function { char *locals; struct instruction *translated_body; + struct target *targets; uint32_t start_addr; }; @@ -62,12 +69,25 @@ struct module { uint32_t mem_min, mem_max; uint32_t exports_count; struct export *exports; + struct target *targets; }; -int leb_u32(FILE *handle, uint32_t *result); +int leb_32(FILE *handle, uint32_t *result, bool with_sign); + +inline static int leb_u32(FILE *handle, uint32_t *result) +{ + return leb_32(handle, result, false); +} + +inline static int leb_s32(FILE *handle, int32_t *result) +{ + return leb_32(handle, (uint32_t *) result, true); +} void free_expr(struct instruction *expr); +void free_targets(struct target *top); + void free_module(struct module *module); struct module *parse_module(FILE *handle); @@ -76,3 +96,5 @@ int translate(FILE *handle, struct function *function, struct module *module); int assemble(uint32_t memory_size, uint16_t memory[memory_size], struct module *module); + +#endif /* WASM_COMPILE_H */ -- cgit v1.2.3