aboutsummaryrefslogtreecommitdiff
path: root/tools/parse_module.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/parse_module.c')
-rw-r--r--tools/parse_module.c662
1 files changed, 662 insertions, 0 deletions
diff --git a/tools/parse_module.c b/tools/parse_module.c
new file mode 100644
index 0000000..85540af
--- /dev/null
+++ b/tools/parse_module.c
@@ -0,0 +1,662 @@
+// TODO: count read bytes ourselves instead of relying on ftell()
+#include "wasm_compile.h"
+#include "wasm.h"
+
+static inline int is_valid_valtype(char value_type)
+{
+ return
+ value_type == VALTYPE_I32 ||
+ value_type == VALTYPE_I64 ||
+ value_type == VALTYPE_F32 ||
+ value_type == VALTYPE_F64;
+}
+
+static inline int is_valid_exportdesc(char desc)
+{
+ return
+ desc == EXPORT_FUNCIDX ||
+ desc == EXPORT_TABLEIDX ||
+ desc == EXPORT_MEMIDX ||
+ desc == EXPORT_GLOBALIDX;
+}
+
+int leb_u32(FILE *handle, uint32_t *result)
+{
+ int i, j;
+ int encoded[5];
+ uint64_t decoded;
+
+ for (i = 0; i < 5; i++) {
+ encoded[i] = fgetc(handle);
+
+ if (encoded[i] == EOF) {
+ PRERR(MSG_EOF);
+ return -1;
+ }
+
+ if (encoded[i] >= 0)
+ break;
+
+ if (i == 4) {
+ PRERR(MSG_BAD_NUM_ENC);
+ return -1;
+ }
+ }
+
+ decoded = 0;
+
+ for (j = i; j >= 0; j--)
+ decoded = (decoded << 7) | (encoded[i] & 0x7F);
+
+ if (decoded > UINT32_MAX) {
+ PRERR(MSG_BAD_NUM_ENC);
+ return -1;
+ }
+
+ *result = decoded;
+
+ return 0;
+}
+
+void free_module(struct module *module)
+{
+ size_t i;
+
+ if (!module)
+ return;
+
+ for (i = 0; i < module->functypes_count; i++)
+ free(module->functypes[i].arguments);
+
+ free(module->functypes);
+
+ for (i = 0; i < module->functions_count; i++) {
+ free(module->functions[i].locals);
+ free_expr(module->functions[i].translated_body);
+ }
+
+ free(module->functions);
+
+ for (i = 0; i < module->exports_count; i++)
+ free(module->exports[i].name);
+
+ free(module->exports);
+
+ free(module);
+}
+
+/* Guard against overflows on 32-bit systems */
+static inline int safe_mul(size_t *factor1, uint32_t factor2)
+{
+ uint64_t product;
+
+ product = *factor1;
+ product *= factor2;
+
+ if (product > SIZE_MAX) {
+ PRERR(MSG_SIZE_OVERFLOW);
+ return -1;
+ }
+
+ *factor1 = product;
+
+ return 0;
+}
+
+int parse_type_section(FILE *handle, struct module *module)
+{
+ uint32_t types_count;
+ int readval;
+ size_t malloc_size;
+ struct functype *types = NULL;
+ uint32_t types_parsed = 0;
+ uint32_t args_count;
+ char *args;
+ uint32_t i;
+
+ if (leb_u32(handle, &types_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct functype);
+
+ if (safe_mul(&malloc_size, types_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ types = malloc(malloc_size);
+
+ if (!types) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ while (types_parsed < types_count) {
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (readval != 0x60) {
+ PRERR(MSG_BAD("functype starting byte (0x60)",
+ readval));
+ goto fail;
+ }
+
+ if (leb_u32(handle, &args_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (args_count) {
+ args = malloc(args_count);
+
+ if (!args) {
+ PRERR(MSG_ALLOC_FAIL(args_count));
+ goto fail;
+ }
+ } else {
+ args = NULL;
+ }
+
+ types[types_parsed].arguments_count = args_count;
+ types[types_parsed].arguments = args;
+ /* Increment here, so that jump to fail: frees the args */
+ types_parsed++;
+
+ for (i = 0; i < args_count; i++) {
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ args[i] = readval;
+ }
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (readval == 0x00) {
+ types[types_parsed - 1].result = 0;
+ } else if (readval == 0x01) {
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ types[types_parsed - 1].result = readval;
+ } else {
+ PRERR(MSG_BAD("return values count", readval));
+ goto fail;
+ }
+ }
+
+ module->functypes_count = types_count;
+ module->functypes = types;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse function types section\n");
+
+ if (types) {
+ while (types_parsed) {
+ free(types[types_parsed - 1].arguments);
+ types_parsed--;
+ }
+
+ free(types);
+ }
+
+ return -1;
+}
+
+int parse_function_section(FILE *handle, struct module *module)
+{
+ uint32_t funcs_count;
+ size_t malloc_size;
+ struct function *funcs = NULL;
+ uint32_t i;
+ uint32_t type_idx;
+
+ if (leb_u32(handle, &funcs_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct function);
+
+ if (safe_mul(&malloc_size, funcs_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ funcs = malloc(malloc_size);
+
+ if (!funcs) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ for (i = 0; i < funcs_count; i++) {
+ if (leb_u32(handle, &type_idx)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (type_idx >= module->functypes_count) {
+ PRERR("Nonexistent function type index used");
+ goto fail;
+ }
+
+ funcs[i].type = module->functypes + i;
+ }
+
+ module->functions_count = funcs_count;
+ module->functions = funcs;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse functions section");
+
+ free(funcs);
+
+ return -1;
+}
+
+static int parse_memory_section(FILE *handle, struct module *module)
+{
+ // TODO: move limits parsing to separate function?
+ uint32_t memories_count;
+ int limits_type;
+
+ if (leb_u32(handle, &memories_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (memories_count > 1) {
+ PRERR("More than one Wasm memory\n");
+ goto fail;
+ }
+
+ limits_type = fgetc(handle);
+
+ if (limits_type == EOF) {
+ PRERR(MSG_EOF);
+ return -1;
+ }
+
+ if (leb_u32(handle, &module->mem_min)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (limits_type == 0x00) {
+ module->memory_type = MEM_MIN;
+ } else if (limits_type == 0x01) {
+ if (leb_u32(handle, &module->mem_max)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ module->memory_type = MEM_MIN_MAX;
+ } else {
+ PRERR(MSG_BAD("limit type", limits_type));
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ module->mem_min = 0;
+
+ return -1;
+}
+
+static int parse_export_section(FILE *handle, struct module *module)
+{
+ int readval;
+ uint32_t exports_count;
+ size_t malloc_size;
+ struct export *exports = NULL;
+ uint32_t exports_parsed = 0;
+ uint32_t name_len;
+ char *name;
+
+ if (leb_u32(handle, &exports_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct export);
+
+ if (safe_mul(&malloc_size, exports_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ exports = malloc(malloc_size);
+
+ if (!exports) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ while (exports_parsed < exports_count) {
+ if (leb_u32(handle, &name_len)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ name = malloc(name_len + 1);
+
+ if (!name) {
+ PRERR(MSG_ALLOC_FAIL(name_len + 1));
+ goto fail;
+ }
+
+ exports[exports_parsed].name = name;
+ /* Increment here, so that jump to fail: frees the name */
+ exports_parsed++;
+
+ if (fread(name, name_len, 1, handle) != 1) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ name[name_len] = '\0';
+
+ readval = fgetc(handle);
+
+ if (!is_valid_exportdesc(readval)) {
+ PRERR(MSG_BAD("exportdesc", readval));
+ goto fail;
+ }
+
+ exports[exports_parsed - 1].desc = readval;
+
+ if (leb_u32(handle, &exports[exports_parsed - 1].idx)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+ }
+
+ module->exports_count = exports_count;
+ module->exports = exports;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse exports section\n");
+
+ if (exports) {
+ while (exports_parsed) {
+ free(exports[exports_parsed - 1].name);
+ exports_parsed--;
+ }
+
+ free(exports);
+ }
+
+ return -1;
+}
+
+static int parse_function_code(FILE *handle, struct function *function,
+ struct module *module)
+{
+ int readval;
+ uint32_t locals_blocks;
+ uint32_t locals_count = 0;
+ char *locals = NULL, *tmp;
+ char *body = NULL;
+ uint32_t i;
+ uint32_t locals_in_block;
+
+ if (leb_u32(handle, &locals_blocks)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ for (i = 0; i < locals_blocks; i++) {
+ if (leb_u32(handle, &locals_in_block)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (locals_count + (uint64_t) locals_in_block > UINT32_MAX) {
+ PRERR("Too many locals\n");
+ goto fail;
+ }
+
+ locals_count += locals_in_block;
+
+ if (locals_in_block) {
+ tmp = realloc(locals, locals_count);
+
+ if (!tmp) {
+ PRERR(MSG_ALLOC_FAIL(locals_count));
+ goto fail;
+ }
+
+ locals = tmp;
+ }
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ while (locals_in_block)
+ locals[locals_count - locals_in_block--] = readval;
+ }
+
+ function->translated_body = NULL;
+
+ function->locals_count = locals_count;
+ function->locals = locals;
+
+ if (translate(handle, function, module))
+ goto fail;
+
+ return 0;
+
+fail:
+ free(locals);
+ free(body);
+
+ return -1;
+}
+
+int parse_code_section(FILE *handle, struct module *module)
+{
+ uint32_t functions_count;
+ uint32_t functions_parsed = 0;
+ uint32_t function_size;
+ long function_start, function_end;
+
+ if (leb_u32(handle, &functions_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (functions_count != module->functions_count) {
+ PRERR("Number of function bodies doesn't match number of functions\n");
+ goto fail;
+ }
+
+ while (functions_parsed < functions_count) {
+ if (leb_u32(handle, &function_size)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ function_start = ftell(handle);
+
+ if (parse_function_code(handle,
+ module->functions + functions_parsed,
+ module)) {
+ PRERR("Couldn't parse code of function %lu\n",
+ (unsigned long) functions_parsed);
+ goto fail;
+ }
+
+ function_end = ftell(handle);
+
+ if (function_end - function_size != function_start) {
+ PRERR("Function %lu started at offset %ld and should end at %ld, but ended at %ld\n",
+ (unsigned long) functions_parsed,
+ function_start, function_end,
+ (long) (function_start + function_size));
+ goto fail;
+ }
+
+ functions_parsed++;
+ }
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse code section\n");
+
+ while (functions_parsed) {
+ free(module->functions[functions_parsed - 1].locals);
+ free_expr(module->functions[functions_parsed - 1]
+ .translated_body);
+
+ functions_parsed--;
+ }
+
+ return -1;
+}
+
+static const char magic[] = {0x00, 0x61, 0x73, 0x6D};
+static const char version[] = {0x01, 0x00, 0x00, 0x00};
+
+struct module *parse_module(FILE *handle)
+{
+ char initial[8];
+ struct module *module = NULL;
+ int section_id;
+ char highest_section_id = 0;
+ uint32_t section_size;
+ long section_start, section_end;
+ int (*section_parser) (FILE*, struct module*);
+
+ if (fread(initial, 8, 1, handle) != 1) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ /* check magic number */
+ if (memcmp(initial, magic, 4)) {
+ PRERR("Bad magic number");
+ goto fail;
+ }
+
+ /* check version */
+ if (memcmp(initial + 4, version, 4)) {
+ PRERR("Unsupported Wasm version: 0x%02hhx 0x%02hhx 0x%02hhx 0x%02hhx\n",
+ initial[4], initial[5], initial[6], initial[7]);
+ goto fail;
+ }
+
+ module = calloc(1, sizeof(struct module));
+
+ if (!module) {
+ PRERR(MSG_ALLOC_FAIL(sizeof(struct module)));
+ goto fail;
+ }
+
+ while (1) {
+ section_id = fgetc(handle);
+
+ if (section_id == EOF)
+ break;
+
+ if (leb_u32(handle, &section_size)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ section_start = ftell(handle);
+
+ if (section_id == SECTION_CUSTOM)
+ continue;
+
+ /* Sections are only allowed to appear in order */
+ if (section_id <= highest_section_id) {
+ PRERR("Sections out of order\n");
+ goto fail;
+ }
+
+ highest_section_id = section_id;
+
+ if (section_id == SECTION_TYPE) {
+ section_parser = parse_type_section;
+ } else if (section_id == SECTION_FUNCTION) {
+ section_parser = parse_function_section;
+ } else if (section_id == SECTION_MEMORY) {
+ section_parser = parse_memory_section;
+ } else if (section_id == SECTION_EXPORT) {
+ section_parser = parse_export_section;
+ } else if (section_id == SECTION_CODE) {
+ section_parser = parse_code_section;
+ } else {
+ PRERR("Unknown section id: %d\n", section_id);
+ goto fail;
+ }
+
+ if (section_parser(handle, module))
+ goto fail;
+
+ section_end = ftell(handle);
+
+ if (section_end - section_size != section_start) {
+ PRERR("Section %d started at offset %ld and should end at %ld, but ended at %ld\n",
+ section_id, section_start, section_end,
+ (long) (section_start + section_size));
+ goto fail;
+ }
+ }
+
+ return module;
+
+fail:
+ PRERR("Parsing failed\n");
+
+ free_module(module);
+
+ return NULL;
+}