diff options
Diffstat (limited to 'tools/parse_module.c')
-rw-r--r-- | tools/parse_module.c | 662 |
1 files changed, 662 insertions, 0 deletions
diff --git a/tools/parse_module.c b/tools/parse_module.c new file mode 100644 index 0000000..85540af --- /dev/null +++ b/tools/parse_module.c @@ -0,0 +1,662 @@ +// TODO: count read bytes ourselves instead of relying on ftell() +#include "wasm_compile.h" +#include "wasm.h" + +static inline int is_valid_valtype(char value_type) +{ + return + value_type == VALTYPE_I32 || + value_type == VALTYPE_I64 || + value_type == VALTYPE_F32 || + value_type == VALTYPE_F64; +} + +static inline int is_valid_exportdesc(char desc) +{ + return + desc == EXPORT_FUNCIDX || + desc == EXPORT_TABLEIDX || + desc == EXPORT_MEMIDX || + desc == EXPORT_GLOBALIDX; +} + +int leb_u32(FILE *handle, uint32_t *result) +{ + int i, j; + int encoded[5]; + uint64_t decoded; + + for (i = 0; i < 5; i++) { + encoded[i] = fgetc(handle); + + if (encoded[i] == EOF) { + PRERR(MSG_EOF); + return -1; + } + + if (encoded[i] >= 0) + break; + + if (i == 4) { + PRERR(MSG_BAD_NUM_ENC); + return -1; + } + } + + decoded = 0; + + for (j = i; j >= 0; j--) + decoded = (decoded << 7) | (encoded[i] & 0x7F); + + if (decoded > UINT32_MAX) { + PRERR(MSG_BAD_NUM_ENC); + return -1; + } + + *result = decoded; + + return 0; +} + +void free_module(struct module *module) +{ + size_t i; + + if (!module) + return; + + for (i = 0; i < module->functypes_count; i++) + free(module->functypes[i].arguments); + + free(module->functypes); + + for (i = 0; i < module->functions_count; i++) { + free(module->functions[i].locals); + free_expr(module->functions[i].translated_body); + } + + free(module->functions); + + for (i = 0; i < module->exports_count; i++) + free(module->exports[i].name); + + free(module->exports); + + free(module); +} + +/* Guard against overflows on 32-bit systems */ +static inline int safe_mul(size_t *factor1, uint32_t factor2) +{ + uint64_t product; + + product = *factor1; + product *= factor2; + + if (product > SIZE_MAX) { + PRERR(MSG_SIZE_OVERFLOW); + return -1; + } + + *factor1 = product; + + return 0; +} + +int parse_type_section(FILE *handle, struct module *module) +{ + uint32_t types_count; + int readval; + size_t malloc_size; + struct functype *types = NULL; + uint32_t types_parsed = 0; + uint32_t args_count; + char *args; + uint32_t i; + + if (leb_u32(handle, &types_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct functype); + + if (safe_mul(&malloc_size, types_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + types = malloc(malloc_size); + + if (!types) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + while (types_parsed < types_count) { + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (readval != 0x60) { + PRERR(MSG_BAD("functype starting byte (0x60)", + readval)); + goto fail; + } + + if (leb_u32(handle, &args_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (args_count) { + args = malloc(args_count); + + if (!args) { + PRERR(MSG_ALLOC_FAIL(args_count)); + goto fail; + } + } else { + args = NULL; + } + + types[types_parsed].arguments_count = args_count; + types[types_parsed].arguments = args; + /* Increment here, so that jump to fail: frees the args */ + types_parsed++; + + for (i = 0; i < args_count; i++) { + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + args[i] = readval; + } + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (readval == 0x00) { + types[types_parsed - 1].result = 0; + } else if (readval == 0x01) { + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + types[types_parsed - 1].result = readval; + } else { + PRERR(MSG_BAD("return values count", readval)); + goto fail; + } + } + + module->functypes_count = types_count; + module->functypes = types; + + return 0; + +fail: + PRERR("Couldn't parse function types section\n"); + + if (types) { + while (types_parsed) { + free(types[types_parsed - 1].arguments); + types_parsed--; + } + + free(types); + } + + return -1; +} + +int parse_function_section(FILE *handle, struct module *module) +{ + uint32_t funcs_count; + size_t malloc_size; + struct function *funcs = NULL; + uint32_t i; + uint32_t type_idx; + + if (leb_u32(handle, &funcs_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct function); + + if (safe_mul(&malloc_size, funcs_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + funcs = malloc(malloc_size); + + if (!funcs) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + for (i = 0; i < funcs_count; i++) { + if (leb_u32(handle, &type_idx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (type_idx >= module->functypes_count) { + PRERR("Nonexistent function type index used"); + goto fail; + } + + funcs[i].type = module->functypes + i; + } + + module->functions_count = funcs_count; + module->functions = funcs; + + return 0; + +fail: + PRERR("Couldn't parse functions section"); + + free(funcs); + + return -1; +} + +static int parse_memory_section(FILE *handle, struct module *module) +{ + // TODO: move limits parsing to separate function? + uint32_t memories_count; + int limits_type; + + if (leb_u32(handle, &memories_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (memories_count > 1) { + PRERR("More than one Wasm memory\n"); + goto fail; + } + + limits_type = fgetc(handle); + + if (limits_type == EOF) { + PRERR(MSG_EOF); + return -1; + } + + if (leb_u32(handle, &module->mem_min)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (limits_type == 0x00) { + module->memory_type = MEM_MIN; + } else if (limits_type == 0x01) { + if (leb_u32(handle, &module->mem_max)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + module->memory_type = MEM_MIN_MAX; + } else { + PRERR(MSG_BAD("limit type", limits_type)); + goto fail; + } + + return 0; + +fail: + module->mem_min = 0; + + return -1; +} + +static int parse_export_section(FILE *handle, struct module *module) +{ + int readval; + uint32_t exports_count; + size_t malloc_size; + struct export *exports = NULL; + uint32_t exports_parsed = 0; + uint32_t name_len; + char *name; + + if (leb_u32(handle, &exports_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct export); + + if (safe_mul(&malloc_size, exports_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + exports = malloc(malloc_size); + + if (!exports) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + while (exports_parsed < exports_count) { + if (leb_u32(handle, &name_len)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + name = malloc(name_len + 1); + + if (!name) { + PRERR(MSG_ALLOC_FAIL(name_len + 1)); + goto fail; + } + + exports[exports_parsed].name = name; + /* Increment here, so that jump to fail: frees the name */ + exports_parsed++; + + if (fread(name, name_len, 1, handle) != 1) { + PRERR(MSG_EOF); + goto fail; + } + + name[name_len] = '\0'; + + readval = fgetc(handle); + + if (!is_valid_exportdesc(readval)) { + PRERR(MSG_BAD("exportdesc", readval)); + goto fail; + } + + exports[exports_parsed - 1].desc = readval; + + if (leb_u32(handle, &exports[exports_parsed - 1].idx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + } + + module->exports_count = exports_count; + module->exports = exports; + + return 0; + +fail: + PRERR("Couldn't parse exports section\n"); + + if (exports) { + while (exports_parsed) { + free(exports[exports_parsed - 1].name); + exports_parsed--; + } + + free(exports); + } + + return -1; +} + +static int parse_function_code(FILE *handle, struct function *function, + struct module *module) +{ + int readval; + uint32_t locals_blocks; + uint32_t locals_count = 0; + char *locals = NULL, *tmp; + char *body = NULL; + uint32_t i; + uint32_t locals_in_block; + + if (leb_u32(handle, &locals_blocks)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + for (i = 0; i < locals_blocks; i++) { + if (leb_u32(handle, &locals_in_block)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (locals_count + (uint64_t) locals_in_block > UINT32_MAX) { + PRERR("Too many locals\n"); + goto fail; + } + + locals_count += locals_in_block; + + if (locals_in_block) { + tmp = realloc(locals, locals_count); + + if (!tmp) { + PRERR(MSG_ALLOC_FAIL(locals_count)); + goto fail; + } + + locals = tmp; + } + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + while (locals_in_block) + locals[locals_count - locals_in_block--] = readval; + } + + function->translated_body = NULL; + + function->locals_count = locals_count; + function->locals = locals; + + if (translate(handle, function, module)) + goto fail; + + return 0; + +fail: + free(locals); + free(body); + + return -1; +} + +int parse_code_section(FILE *handle, struct module *module) +{ + uint32_t functions_count; + uint32_t functions_parsed = 0; + uint32_t function_size; + long function_start, function_end; + + if (leb_u32(handle, &functions_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (functions_count != module->functions_count) { + PRERR("Number of function bodies doesn't match number of functions\n"); + goto fail; + } + + while (functions_parsed < functions_count) { + if (leb_u32(handle, &function_size)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + function_start = ftell(handle); + + if (parse_function_code(handle, + module->functions + functions_parsed, + module)) { + PRERR("Couldn't parse code of function %lu\n", + (unsigned long) functions_parsed); + goto fail; + } + + function_end = ftell(handle); + + if (function_end - function_size != function_start) { + PRERR("Function %lu started at offset %ld and should end at %ld, but ended at %ld\n", + (unsigned long) functions_parsed, + function_start, function_end, + (long) (function_start + function_size)); + goto fail; + } + + functions_parsed++; + } + + return 0; + +fail: + PRERR("Couldn't parse code section\n"); + + while (functions_parsed) { + free(module->functions[functions_parsed - 1].locals); + free_expr(module->functions[functions_parsed - 1] + .translated_body); + + functions_parsed--; + } + + return -1; +} + +static const char magic[] = {0x00, 0x61, 0x73, 0x6D}; +static const char version[] = {0x01, 0x00, 0x00, 0x00}; + +struct module *parse_module(FILE *handle) +{ + char initial[8]; + struct module *module = NULL; + int section_id; + char highest_section_id = 0; + uint32_t section_size; + long section_start, section_end; + int (*section_parser) (FILE*, struct module*); + + if (fread(initial, 8, 1, handle) != 1) { + PRERR(MSG_EOF); + goto fail; + } + + /* check magic number */ + if (memcmp(initial, magic, 4)) { + PRERR("Bad magic number"); + goto fail; + } + + /* check version */ + if (memcmp(initial + 4, version, 4)) { + PRERR("Unsupported Wasm version: 0x%02hhx 0x%02hhx 0x%02hhx 0x%02hhx\n", + initial[4], initial[5], initial[6], initial[7]); + goto fail; + } + + module = calloc(1, sizeof(struct module)); + + if (!module) { + PRERR(MSG_ALLOC_FAIL(sizeof(struct module))); + goto fail; + } + + while (1) { + section_id = fgetc(handle); + + if (section_id == EOF) + break; + + if (leb_u32(handle, §ion_size)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + section_start = ftell(handle); + + if (section_id == SECTION_CUSTOM) + continue; + + /* Sections are only allowed to appear in order */ + if (section_id <= highest_section_id) { + PRERR("Sections out of order\n"); + goto fail; + } + + highest_section_id = section_id; + + if (section_id == SECTION_TYPE) { + section_parser = parse_type_section; + } else if (section_id == SECTION_FUNCTION) { + section_parser = parse_function_section; + } else if (section_id == SECTION_MEMORY) { + section_parser = parse_memory_section; + } else if (section_id == SECTION_EXPORT) { + section_parser = parse_export_section; + } else if (section_id == SECTION_CODE) { + section_parser = parse_code_section; + } else { + PRERR("Unknown section id: %d\n", section_id); + goto fail; + } + + if (section_parser(handle, module)) + goto fail; + + section_end = ftell(handle); + + if (section_end - section_size != section_start) { + PRERR("Section %d started at offset %ld and should end at %ld, but ended at %ld\n", + section_id, section_start, section_end, + (long) (section_start + section_size)); + goto fail; + } + } + + return module; + +fail: + PRERR("Parsing failed\n"); + + free_module(module); + + return NULL; +} |