diff options
author | Wojciech Kosior <kwojtus@protonmail.com> | 2020-09-19 21:22:41 +0200 |
---|---|---|
committer | Wojciech Kosior <kwojtus@protonmail.com> | 2020-09-19 21:22:41 +0200 |
commit | 605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa (patch) | |
tree | b3c7a166f33d9d1ff35cbc1ede202dd89903e81e | |
parent | 780f056e61323a41abcaf0dd53a44f99bcac197c (diff) | |
download | AGH-engineering-thesis-605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa.tar.gz AGH-engineering-thesis-605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa.zip |
initial work towards translating wasm to stack machine (with a provisional bench)
-rw-r--r-- | Makefile | 15 | ||||
-rw-r--r-- | Makefile.config | 16 | ||||
-rw-r--r-- | Makefile.test | 4 | ||||
-rw-r--r-- | Makefile.util | 12 | ||||
l--------- | tests/stack_machine_wasm_sub/Makefile | 1 | ||||
-rw-r--r-- | tests/stack_machine_wasm_sub/instructions.wat | 9 | ||||
l--------- | tests/stack_machine_wasm_sub/test.v | 1 | ||||
-rw-r--r-- | tests/stack_machine_wasm_sub/words_to_verify.mem | 3 | ||||
-rw-r--r-- | tools/Makefile | 18 | ||||
-rw-r--r-- | tools/Makefile.tools | 4 | ||||
-rw-r--r-- | tools/assemble.c | 217 | ||||
-rw-r--r-- | tools/parse_module.c | 662 | ||||
-rw-r--r-- | tools/stack_machine_instruction.h | 148 | ||||
-rw-r--r-- | tools/translate.c | 187 | ||||
-rw-r--r-- | tools/wasm.h | 22 | ||||
-rw-r--r-- | tools/wasm_compile.c | 72 | ||||
-rw-r--r-- | tools/wasm_compile.h | 76 |
17 files changed, 1448 insertions, 19 deletions
@@ -1,8 +1,6 @@ include Makefile.config include Makefile.util - -# Short C programs -TOOLS := VGAdump2ppm +include tools/Makefile.tools TEST_TARGETS := $(addprefix test_,$(shell ls tests)) @@ -55,14 +53,17 @@ quicktest : $(TEST_TARGETS) : test_% : $(MAKE) -C tests/$* + tools : $(TOOLS_TARGETS) -$(TOOLS_TARGETS) : tools/% : tools/%.c - $(CC) $(CFLAGS) $^ -o $@ +$(TOOLS_TARGETS) : tools/% : + $(MAKE) -C tools/ $* + clean : for TEST in tests/*; do $(MAKE) -C $$TEST clean >/dev/null; done - rm $(GENERATED_MEM_FILES) $(TOOLS_TARGETS) 2>/dev/null || true + rm $(GENERATED_MEM_FILES) 2>/dev/null || true + $(MAKE) -C tools/ clean >/dev/null rm $(addprefix design.,v json asc bin) timing.rpt 2>/dev/null || true -.PHONY : all tools test quicktest $(TEST_TARGETS) +.PHONY : all tools test quicktest $(TEST_TARGETS) $(TOOLS_TARGETS) diff --git a/Makefile.config b/Makefile.config index 5c619c3..a67e9f1 100644 --- a/Makefile.config +++ b/Makefile.config @@ -1,5 +1,14 @@ +# Uncomment this line when needed +#DEBUG=1 + CC = gcc -CFLAGS = -std=c89 -pedantic -Wall -Werror -O2 +CFLAGS = -std=c99 -pedantic -Wall -Werror -O2 + +ifdef DEBUG +CFLAGS += -O0 -g +else +CFLAGS += -O2 +endif IV = iverilog @@ -8,9 +17,8 @@ PNR = git-nextpnr-ice40 ICEPACK = git-icepack ICETIME = git-icetime +WAT2WASM = wat2wasm + TOPMODULE = soc PCF = design/pins.pcf - -# Uncomment this line when needed -#DEBUG=1 diff --git a/Makefile.test b/Makefile.test index fac2c14..46af49a 100644 --- a/Makefile.test +++ b/Makefile.test @@ -67,7 +67,7 @@ GENERATED_MEM_FILES += $(basename $(shell find . -name "*.memv")) GENERATED_MEM_FILES := $(addsuffix .mem,$(GENERATED_MEM_FILES)) clean : - rm $(GENERATED_MEM_FILES) *.vvp report.log VGAdump.mem VGAdump.ppm \ - 2>/dev/null || true + rm $(GENERATED_MEM_FILES) *.vvp *.wasm report.log VGAdump.mem \ + VGAdump.ppm 2>/dev/null || true .PHONY : test clean diff --git a/Makefile.util b/Makefile.util index 7bdc584..02ad5a3 100644 --- a/Makefile.util +++ b/Makefile.util @@ -1,10 +1,10 @@ FILE_LINES = `grep -E '^[[:space:]]*[^[:space:]/]' -c $(1)` -ifdef PROJ_DIR -vpath tclasm.tcl $(PROJ_DIR) -endif +%.mem : $(PROJ_DIR)/tclasm.tcl %.s.tcl + tclsh $^ > $@ -vpath tclasm.tcl . +%.mem : $(PROJ_DIR)/tools/wasm_compile %.wasm + $^ > $@ -%.mem : tclasm.tcl %.s.tcl - tclsh $^ > $@ +%.wasm : %.wat + $(WAT2WASM) $< -o $@ diff --git a/tests/stack_machine_wasm_sub/Makefile b/tests/stack_machine_wasm_sub/Makefile new file mode 120000 index 0000000..4673ba3 --- /dev/null +++ b/tests/stack_machine_wasm_sub/Makefile @@ -0,0 +1 @@ +../stack_machine_function_call/Makefile
\ No newline at end of file diff --git a/tests/stack_machine_wasm_sub/instructions.wat b/tests/stack_machine_wasm_sub/instructions.wat new file mode 100644 index 0000000..7579a46 --- /dev/null +++ b/tests/stack_machine_wasm_sub/instructions.wat @@ -0,0 +1,9 @@ +(module + (memory 0 2) + (func $sub (param $lhs i32) (param $rhs i32);; (result i32) + i32.const 0x17 + local.get $lhs + local.get $rhs + i32.sub + i32.store offset=0x25 align=2) + (export "main" (func $sub))) diff --git a/tests/stack_machine_wasm_sub/test.v b/tests/stack_machine_wasm_sub/test.v new file mode 120000 index 0000000..f5b6a59 --- /dev/null +++ b/tests/stack_machine_wasm_sub/test.v @@ -0,0 +1 @@ +../stack_machine_store/test.v
\ No newline at end of file diff --git a/tests/stack_machine_wasm_sub/words_to_verify.mem b/tests/stack_machine_wasm_sub/words_to_verify.mem new file mode 100644 index 0000000..bd24e95 --- /dev/null +++ b/tests/stack_machine_wasm_sub/words_to_verify.mem @@ -0,0 +1,3 @@ +// address value + 0FFFFC 23 + 23C 1E // Address is 0x200 + 0x25 + 0x17 diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 0000000..8634a5c --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,18 @@ +include ../Makefile.config + +include Makefile.tools + +all : $(TOOLS) + +# I know, this can be done better... +%.o : *.h + +wasm_compile : wasm_compile.o assemble.o parse_module.o translate.o + +VGAdump2ppm : VGAdump2ppm.o + +clean : + rm *.o 2>/dev/null || true + find . -executable -type f -delete + +.PHONY : clean all diff --git a/tools/Makefile.tools b/tools/Makefile.tools new file mode 100644 index 0000000..5fc2b6c --- /dev/null +++ b/tools/Makefile.tools @@ -0,0 +1,4 @@ +# This variable definition has been put here, because it is used by +# toplevel Makefile as well as Makefile in tools/ + +TOOLS = wasm_compile VGAdump2ppm diff --git a/tools/assemble.c b/tools/assemble.c new file mode 100644 index 0000000..07416fd --- /dev/null +++ b/tools/assemble.c @@ -0,0 +1,217 @@ +#include "wasm_compile.h" +#include "wasm.h" +#include "stack_machine_instruction.h" + +/* instruction structs are connected in a circular list */ +void free_expr(struct instruction *expr) +{ + struct instruction *tmp; + + if (expr) { + tmp = expr->next; + while (tmp != expr) { + tmp = tmp->next; + free(tmp->prev); + } + + free(expr); + } +} + +/* instructions are stored in a circular list */ +int add_instruction(struct instruction **expr, uint16_t encoding, + struct instruction_data data) +{ + struct instruction *new; + + new = malloc(sizeof(struct instruction)); + + if (!new) { + PRERR(MSG_ALLOC_FAIL(sizeof(struct instruction))); + return -1; + } + + new->address_assigned = false; + new->encoding = encoding; + new->data = data; + + if (!*expr) { + new->next = new; + new->prev = new; + *expr = new; + } else { + new->next = *expr; + new->prev = (*expr)->prev; + (*expr)->prev->next = new; + (*expr)->prev = new; + } + + return 0; +} + +static uint8_t estimate_instruction_size(struct instruction *instruction) +{ + switch (instruction->data.info) { + case DATA_NONE : + return 2; + case DATA_KNOWN : + case DATA_UNKNOWN : + return 6; + case DATA_KNOWN_21_BITS : + return 4; + default /* case DATA_KNOWN_6_BITS */ : + return 2; + } +} + +static uint64_t estimate_expr_size(struct instruction *expr) +{ + struct instruction *tmp = expr; + uint64_t max_expr_size = 0; + + do { + max_expr_size += estimate_instruction_size(tmp); + tmp = tmp->next; + } while (tmp != expr); + + return max_expr_size; +} + +static void assign_addresses_and_sizes(struct instruction *expr, + uint32_t address) +{ + struct instruction *tmp = expr; + + do { + tmp->address = address; + tmp->address_assigned = true; + + if (tmp->data.info == DATA_UNKNOWN && + *tmp->data.data.ptr && + (*tmp->data.data.ptr)->address_assigned) + tmp->data = im((*tmp->data.data.ptr)->address); + + address += estimate_instruction_size(tmp); + tmp = tmp->next; + } while (tmp != expr); +} + +static uint16_t im_instruction(uint16_t payload) +{ + return payload | (((uint16_t) 1) << 15); +} + +static void encode_instruction(struct instruction *instruction, + uint16_t *memory) +{ + uint32_t im = 0; + uint16_t encoding = instruction->encoding; + uint16_t *dest = memory + instruction->address / 2; + + if (instruction->data.info != DATA_NONE) { + if (instruction->data.info == DATA_UNKNOWN) + im = (*instruction->data.data.ptr)->address; + else + im = instruction->data.data.im; + } + + switch (instruction->data.info) { + case DATA_UNKNOWN : + case DATA_KNOWN : + *(dest++) = im_instruction(im >> 22); + case DATA_KNOWN_21_BITS : + *(dest++) = im_instruction(im >> 7); + case DATA_KNOWN_6_BITS : + encoding |= (im & 0x7F); + } + + *dest = encoding; +} + +static void encode_expr(struct instruction *expr, uint16_t *memory) +{ + struct instruction *tmp = expr; + + do { + encode_instruction(tmp, memory); + tmp = tmp->next; + } while (tmp != expr); +} + +int assemble(uint32_t memory_size, uint16_t memory[memory_size], + struct module *module) +{ + uint32_t i; + struct function *main_function = NULL; + uint32_t current_address; + uint64_t function_size; + struct instruction *startup = NULL; + unsigned short startup_size; + int retval = -1; + + for (i = 0; i < module->exports_count; i++) { + if (module->exports[i].desc == EXPORT_FUNCIDX && + !strcmp(module->exports[i].name, "main")) { + main_function = module->functions + + module->exports[i].idx; + break; + } + } + + if (!main_function) { + PRERR("No 'main' function\n"); + goto fail; + } + + // For now we're passing 2 numbers to main(): 0x32 and 0x14. + // This is just a temporary way to check if our Wasm function + // actually does anything. In the end - main() will be + // a function, that doesn't require any arguments. + if (i_const(im(0x23), &startup) || + i_store(im(STACK_FRAME_BACKUP_ADDR), &startup) || + i_const(im(0x32), &startup) || + i_const(im(0x14), &startup) || + i_call (ptr(&main_function->translated_body), &startup) || + i_halt ( &startup)) + goto fail; + + startup_size = estimate_expr_size(startup); + + i = module->functions_count; + current_address = CODE_TOP_ADDR; + + while (i--) { + function_size = estimate_expr_size(module->functions[i] + .translated_body); + if (function_size > current_address - startup_size) { + PRERR("Not enough space for code\n"); + goto fail; + } + + current_address -= function_size; + module->functions[i].start_addr = current_address; + } + + i = module->functions_count; + while (i--) + assign_addresses_and_sizes(module->functions[i].translated_body, + module->functions[i].start_addr); + + assign_addresses_and_sizes(startup, 0x0); + + i = module->functions_count; + while (i--) + encode_expr(module->functions[i].translated_body, memory); + + encode_expr(startup, memory); + + retval = 0; + +fail: + if (retval) + PRERR("Couldn't assemble code for stack machine\n"); + + free_expr(startup); + + return retval; +} diff --git a/tools/parse_module.c b/tools/parse_module.c new file mode 100644 index 0000000..85540af --- /dev/null +++ b/tools/parse_module.c @@ -0,0 +1,662 @@ +// TODO: count read bytes ourselves instead of relying on ftell() +#include "wasm_compile.h" +#include "wasm.h" + +static inline int is_valid_valtype(char value_type) +{ + return + value_type == VALTYPE_I32 || + value_type == VALTYPE_I64 || + value_type == VALTYPE_F32 || + value_type == VALTYPE_F64; +} + +static inline int is_valid_exportdesc(char desc) +{ + return + desc == EXPORT_FUNCIDX || + desc == EXPORT_TABLEIDX || + desc == EXPORT_MEMIDX || + desc == EXPORT_GLOBALIDX; +} + +int leb_u32(FILE *handle, uint32_t *result) +{ + int i, j; + int encoded[5]; + uint64_t decoded; + + for (i = 0; i < 5; i++) { + encoded[i] = fgetc(handle); + + if (encoded[i] == EOF) { + PRERR(MSG_EOF); + return -1; + } + + if (encoded[i] >= 0) + break; + + if (i == 4) { + PRERR(MSG_BAD_NUM_ENC); + return -1; + } + } + + decoded = 0; + + for (j = i; j >= 0; j--) + decoded = (decoded << 7) | (encoded[i] & 0x7F); + + if (decoded > UINT32_MAX) { + PRERR(MSG_BAD_NUM_ENC); + return -1; + } + + *result = decoded; + + return 0; +} + +void free_module(struct module *module) +{ + size_t i; + + if (!module) + return; + + for (i = 0; i < module->functypes_count; i++) + free(module->functypes[i].arguments); + + free(module->functypes); + + for (i = 0; i < module->functions_count; i++) { + free(module->functions[i].locals); + free_expr(module->functions[i].translated_body); + } + + free(module->functions); + + for (i = 0; i < module->exports_count; i++) + free(module->exports[i].name); + + free(module->exports); + + free(module); +} + +/* Guard against overflows on 32-bit systems */ +static inline int safe_mul(size_t *factor1, uint32_t factor2) +{ + uint64_t product; + + product = *factor1; + product *= factor2; + + if (product > SIZE_MAX) { + PRERR(MSG_SIZE_OVERFLOW); + return -1; + } + + *factor1 = product; + + return 0; +} + +int parse_type_section(FILE *handle, struct module *module) +{ + uint32_t types_count; + int readval; + size_t malloc_size; + struct functype *types = NULL; + uint32_t types_parsed = 0; + uint32_t args_count; + char *args; + uint32_t i; + + if (leb_u32(handle, &types_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct functype); + + if (safe_mul(&malloc_size, types_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + types = malloc(malloc_size); + + if (!types) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + while (types_parsed < types_count) { + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (readval != 0x60) { + PRERR(MSG_BAD("functype starting byte (0x60)", + readval)); + goto fail; + } + + if (leb_u32(handle, &args_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (args_count) { + args = malloc(args_count); + + if (!args) { + PRERR(MSG_ALLOC_FAIL(args_count)); + goto fail; + } + } else { + args = NULL; + } + + types[types_parsed].arguments_count = args_count; + types[types_parsed].arguments = args; + /* Increment here, so that jump to fail: frees the args */ + types_parsed++; + + for (i = 0; i < args_count; i++) { + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + args[i] = readval; + } + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (readval == 0x00) { + types[types_parsed - 1].result = 0; + } else if (readval == 0x01) { + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + types[types_parsed - 1].result = readval; + } else { + PRERR(MSG_BAD("return values count", readval)); + goto fail; + } + } + + module->functypes_count = types_count; + module->functypes = types; + + return 0; + +fail: + PRERR("Couldn't parse function types section\n"); + + if (types) { + while (types_parsed) { + free(types[types_parsed - 1].arguments); + types_parsed--; + } + + free(types); + } + + return -1; +} + +int parse_function_section(FILE *handle, struct module *module) +{ + uint32_t funcs_count; + size_t malloc_size; + struct function *funcs = NULL; + uint32_t i; + uint32_t type_idx; + + if (leb_u32(handle, &funcs_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct function); + + if (safe_mul(&malloc_size, funcs_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + funcs = malloc(malloc_size); + + if (!funcs) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + for (i = 0; i < funcs_count; i++) { + if (leb_u32(handle, &type_idx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (type_idx >= module->functypes_count) { + PRERR("Nonexistent function type index used"); + goto fail; + } + + funcs[i].type = module->functypes + i; + } + + module->functions_count = funcs_count; + module->functions = funcs; + + return 0; + +fail: + PRERR("Couldn't parse functions section"); + + free(funcs); + + return -1; +} + +static int parse_memory_section(FILE *handle, struct module *module) +{ + // TODO: move limits parsing to separate function? + uint32_t memories_count; + int limits_type; + + if (leb_u32(handle, &memories_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (memories_count > 1) { + PRERR("More than one Wasm memory\n"); + goto fail; + } + + limits_type = fgetc(handle); + + if (limits_type == EOF) { + PRERR(MSG_EOF); + return -1; + } + + if (leb_u32(handle, &module->mem_min)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (limits_type == 0x00) { + module->memory_type = MEM_MIN; + } else if (limits_type == 0x01) { + if (leb_u32(handle, &module->mem_max)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + module->memory_type = MEM_MIN_MAX; + } else { + PRERR(MSG_BAD("limit type", limits_type)); + goto fail; + } + + return 0; + +fail: + module->mem_min = 0; + + return -1; +} + +static int parse_export_section(FILE *handle, struct module *module) +{ + int readval; + uint32_t exports_count; + size_t malloc_size; + struct export *exports = NULL; + uint32_t exports_parsed = 0; + uint32_t name_len; + char *name; + + if (leb_u32(handle, &exports_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + malloc_size = sizeof(struct export); + + if (safe_mul(&malloc_size, exports_count)) { + PRERR(MSG_BAD_SIZE); + goto fail; + } + + exports = malloc(malloc_size); + + if (!exports) { + PRERR(MSG_ALLOC_FAIL(malloc_size)); + goto fail; + } + + while (exports_parsed < exports_count) { + if (leb_u32(handle, &name_len)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + name = malloc(name_len + 1); + + if (!name) { + PRERR(MSG_ALLOC_FAIL(name_len + 1)); + goto fail; + } + + exports[exports_parsed].name = name; + /* Increment here, so that jump to fail: frees the name */ + exports_parsed++; + + if (fread(name, name_len, 1, handle) != 1) { + PRERR(MSG_EOF); + goto fail; + } + + name[name_len] = '\0'; + + readval = fgetc(handle); + + if (!is_valid_exportdesc(readval)) { + PRERR(MSG_BAD("exportdesc", readval)); + goto fail; + } + + exports[exports_parsed - 1].desc = readval; + + if (leb_u32(handle, &exports[exports_parsed - 1].idx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + } + + module->exports_count = exports_count; + module->exports = exports; + + return 0; + +fail: + PRERR("Couldn't parse exports section\n"); + + if (exports) { + while (exports_parsed) { + free(exports[exports_parsed - 1].name); + exports_parsed--; + } + + free(exports); + } + + return -1; +} + +static int parse_function_code(FILE *handle, struct function *function, + struct module *module) +{ + int readval; + uint32_t locals_blocks; + uint32_t locals_count = 0; + char *locals = NULL, *tmp; + char *body = NULL; + uint32_t i; + uint32_t locals_in_block; + + if (leb_u32(handle, &locals_blocks)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + for (i = 0; i < locals_blocks; i++) { + if (leb_u32(handle, &locals_in_block)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (locals_count + (uint64_t) locals_in_block > UINT32_MAX) { + PRERR("Too many locals\n"); + goto fail; + } + + locals_count += locals_in_block; + + if (locals_in_block) { + tmp = realloc(locals, locals_count); + + if (!tmp) { + PRERR(MSG_ALLOC_FAIL(locals_count)); + goto fail; + } + + locals = tmp; + } + + readval = fgetc(handle); + + if (readval == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + if (!is_valid_valtype(readval)) { + PRERR(MSG_BAD("value type encoding", readval)); + goto fail; + } + + while (locals_in_block) + locals[locals_count - locals_in_block--] = readval; + } + + function->translated_body = NULL; + + function->locals_count = locals_count; + function->locals = locals; + + if (translate(handle, function, module)) + goto fail; + + return 0; + +fail: + free(locals); + free(body); + + return -1; +} + +int parse_code_section(FILE *handle, struct module *module) +{ + uint32_t functions_count; + uint32_t functions_parsed = 0; + uint32_t function_size; + long function_start, function_end; + + if (leb_u32(handle, &functions_count)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (functions_count != module->functions_count) { + PRERR("Number of function bodies doesn't match number of functions\n"); + goto fail; + } + + while (functions_parsed < functions_count) { + if (leb_u32(handle, &function_size)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + function_start = ftell(handle); + + if (parse_function_code(handle, + module->functions + functions_parsed, + module)) { + PRERR("Couldn't parse code of function %lu\n", + (unsigned long) functions_parsed); + goto fail; + } + + function_end = ftell(handle); + + if (function_end - function_size != function_start) { + PRERR("Function %lu started at offset %ld and should end at %ld, but ended at %ld\n", + (unsigned long) functions_parsed, + function_start, function_end, + (long) (function_start + function_size)); + goto fail; + } + + functions_parsed++; + } + + return 0; + +fail: + PRERR("Couldn't parse code section\n"); + + while (functions_parsed) { + free(module->functions[functions_parsed - 1].locals); + free_expr(module->functions[functions_parsed - 1] + .translated_body); + + functions_parsed--; + } + + return -1; +} + +static const char magic[] = {0x00, 0x61, 0x73, 0x6D}; +static const char version[] = {0x01, 0x00, 0x00, 0x00}; + +struct module *parse_module(FILE *handle) +{ + char initial[8]; + struct module *module = NULL; + int section_id; + char highest_section_id = 0; + uint32_t section_size; + long section_start, section_end; + int (*section_parser) (FILE*, struct module*); + + if (fread(initial, 8, 1, handle) != 1) { + PRERR(MSG_EOF); + goto fail; + } + + /* check magic number */ + if (memcmp(initial, magic, 4)) { + PRERR("Bad magic number"); + goto fail; + } + + /* check version */ + if (memcmp(initial + 4, version, 4)) { + PRERR("Unsupported Wasm version: 0x%02hhx 0x%02hhx 0x%02hhx 0x%02hhx\n", + initial[4], initial[5], initial[6], initial[7]); + goto fail; + } + + module = calloc(1, sizeof(struct module)); + + if (!module) { + PRERR(MSG_ALLOC_FAIL(sizeof(struct module))); + goto fail; + } + + while (1) { + section_id = fgetc(handle); + + if (section_id == EOF) + break; + + if (leb_u32(handle, §ion_size)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + section_start = ftell(handle); + + if (section_id == SECTION_CUSTOM) + continue; + + /* Sections are only allowed to appear in order */ + if (section_id <= highest_section_id) { + PRERR("Sections out of order\n"); + goto fail; + } + + highest_section_id = section_id; + + if (section_id == SECTION_TYPE) { + section_parser = parse_type_section; + } else if (section_id == SECTION_FUNCTION) { + section_parser = parse_function_section; + } else if (section_id == SECTION_MEMORY) { + section_parser = parse_memory_section; + } else if (section_id == SECTION_EXPORT) { + section_parser = parse_export_section; + } else if (section_id == SECTION_CODE) { + section_parser = parse_code_section; + } else { + PRERR("Unknown section id: %d\n", section_id); + goto fail; + } + + if (section_parser(handle, module)) + goto fail; + + section_end = ftell(handle); + + if (section_end - section_size != section_start) { + PRERR("Section %d started at offset %ld and should end at %ld, but ended at %ld\n", + section_id, section_start, section_end, + (long) (section_start + section_size)); + goto fail; + } + } + + return module; + +fail: + PRERR("Parsing failed\n"); + + free_module(module); + + return NULL; +} diff --git a/tools/stack_machine_instruction.h b/tools/stack_machine_instruction.h new file mode 100644 index 0000000..5fd20fc --- /dev/null +++ b/tools/stack_machine_instruction.h @@ -0,0 +1,148 @@ +#include <stdbool.h> + +/* + * TODO: this enum is to be removed; it it only still here as a cheat sheet + * for use when defining missing inline functions for those instructions + */ +enum instruction_code { + STORE, + STORE_P, + STOREB, + STOREB_P, + STOREW, + STOREW_P, + LOAD, + LOAD_P, + LOADBZX, + LOADBZX_P, + LOADBSX, + LOADBSX_P, + LOADWZX, + LOADWZX_P, + LOADWSX, + LOADWSX_P, + HALT, + NOP, + SWAP, + SET_SP, + JUMP, + TEE, + GET_FRAME, + CONST, + CALL, + ADD, + SUB, + DIV, + MUL, + DROP, + RET, + COND_JUMP +}; + +#define DATA_NONE 0 +#define DATA_KNOWN 1 +#define DATA_KNOWN_21_BITS 2 +#define DATA_KNOWN_6_BITS 3 +#define DATA_UNKNOWN 4 + +struct instruction_data { + char info; + union { + uint32_t im; + struct instruction **ptr; + } data; +}; + +struct instruction { + struct instruction *prev, *next; + uint32_t address; + bool address_assigned; + uint16_t encoding; + struct instruction_data data; +}; + +#define NO_DATA ((struct instruction_data) { \ + .info = DATA_NONE \ + }) + +#define POW(n) (((int64_t) 1) << (n)) + +inline static uint8_t im_instruction_size(int32_t im) +{ + if (im < POW(6) && im >= -POW(6)) + return 2; + else if (im < POW(21) && im >= -POW(21)) + return 4; + else + return 6; +} + +inline static struct instruction_data im(uint32_t im) +{ + struct instruction_data data; + + data.data.im = im; + + uint8_t size = im_instruction_size(im); + + if (size == 2) + data.info = DATA_KNOWN_6_BITS; + else if (size == 4) + data.info = DATA_KNOWN_21_BITS; + else + data.info = DATA_KNOWN; + + return data; +} + +inline static struct instruction_data ptr(struct instruction **val) { + struct instruction_data data; + + data.info = DATA_UNKNOWN; + data.data.ptr = val; + + return data; +} + +int add_instruction(struct instruction **expr, uint16_t encoding, + struct instruction_data data); + +/* Define stack machine instructions, that take immediate operands */ +#define X(instr, encoding) \ + inline static int i_##instr(struct instruction_data data, \ + struct instruction **expr) \ + { \ + return add_instruction(expr, encoding, data); \ + } + +/* Define stack machine instructions, that *don't* take immediate operands */ +#define Y(instr, encoding) \ + inline static int i_##instr(struct instruction **expr) \ + { \ + return add_instruction(expr, encoding, NO_DATA); \ + } + +X(store, 0x7E00) /* 0111_1110_0xxx_xxxx */ +X(store_p, 0x6E00) /* 0110_1110_0xxx_xxxx */ +X(storeb_p, 0x6C00) /* 0110_1100_0xxx_xxxx */ +X(storew_p, 0x6D00) /* 0110_1101_0xxx_xxxx */ +X(load, 0x5E00) /* 0101_1110_0xxx_xxxx */ +X(load_p, 0x4E00) /* 0100_1110_0xxx_xxxx */ +X(loadbzx_p, 0x4C00) /* 0100_1100_0xxx_xxxx */ +X(loadbsx_p, 0x4C80) /* 0100_1100_1xxx_xxxx */ +X(loadwzx_p, 0x4D00) /* 0100_1101_0xxx_xxxx */ +X(loadwsx_p, 0x4D80) /* 0100_1101_1xxx_xxxx */ +Y(swap, 0x0002) /* 0000_0000_0000_0010 */ +X(set_sp, 0x4000) /* 0100_0000_0xxx_xxxx */ +X(jump, 0x4080) /* 0100_0000_1xxx_xxxx */ +Y(tee, 0x1000) /* 0001_0000_0000_0000 */ +Y(get_frame, 0x1001) /* 0001_0000_0000_0001 */ +X(const, 0x5000) /* 0101_0000_0xxx_xxxx */ +X(call, 0x5080) /* 0101_0000_1xxx_xxxx */ +Y(sub, 0x3001) /* 0011_0000_0000_0001 */ +Y(drop, 0x3004) /* 0011_0000_0000_0100 */ +Y(ret, 0x3080) /* 0011_0000_1000_0000 */ +Y(halt, 0x0000) /* 0000_0000_0000_0000 */ + +#undef X +#undef Y diff --git a/tools/translate.c b/tools/translate.c new file mode 100644 index 0000000..ee6ea99 --- /dev/null +++ b/tools/translate.c @@ -0,0 +1,187 @@ +#include "wasm_compile.h" +#include "stack_machine_instruction.h" + +/* WebAssembly opcodes */ +#define WASM_END 0x0B +#define WASM_LOCAL_GET 0x20 +#define WASM_I32_LOAD 0x28 +#define WASM_I32_LOAD8_S 0x2C +#define WASM_I32_LOAD8_U 0x2D +#define WASM_I32_LOAD16_S 0x2E +#define WASM_I32_LOAD16_U 0x2F +#define WASM_I32_STORE 0x36 +#define WASM_I32_STORE8 0x3A +#define WASM_I32_STORE16 0x3B +#define WASM_I64_STORE32 0x3E +#define WASM_I32_CONST 0x41 +#define WASM_I32_SUB 0x6B + +int translate(FILE *handle, struct function *function, struct module *module) +{ + struct instruction *expr = NULL; + uint32_t args_count = function->type->arguments_count; + uint32_t locals_count = function->locals_count; + uint32_t all_locals_count = args_count + locals_count; + size_t i; + int wasm_opcode; + int matched; + + if (locals_count + (uint64_t) args_count > STACK_TOP_ADDR * 4) { + PRERR("Too many locals in a function\n"); + goto fail; + } + + for (i = locals_count + 3; i; i--) { + if (i_const(im(0), &expr)) + goto fail; + } + + /* function prologue */ + if (i_get_frame( &expr) || + i_tee ( &expr) || + i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) || + i_store_p (im(0x0), &expr) || + i_store (im(STACK_FRAME_BACKUP_ADDR), &expr)) + goto fail; + + /* actual function body */ + i = 0; + while (1) { + matched = 0; + + wasm_opcode = fgetc(handle); + + if (wasm_opcode == EOF) { + PRERR(MSG_EOF); + goto fail; + } + + // TODO: make a function for each instruction type, + // call them through some table... + if (wasm_opcode == WASM_I32_SUB) { + if (i_sub(&expr)) + goto fail; + + matched = 1; + } else if (wasm_opcode <= WASM_I64_STORE32 && + wasm_opcode >= WASM_I32_LOAD) { + uint32_t align, offset; + + if (leb_u32(handle, &align) || + leb_u32(handle, &offset)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + offset += MEMORY_BOTTOM_ADDR; + + // TODO: rewrite it some cleaner way +#define TRY(opcode, instr) \ + if (wasm_opcode == opcode) { \ + matched = 1; \ + \ + if (i_##instr(im(offset), &expr)) \ + goto fail; \ + } + + TRY(WASM_I32_LOAD, load_p); + TRY(WASM_I32_LOAD8_S, loadbsx_p); + TRY(WASM_I32_LOAD8_U, loadbzx_p); + TRY(WASM_I32_LOAD16_S, loadwsx_p); + TRY(WASM_I32_LOAD16_U, loadwzx_p); + TRY(WASM_I32_STORE, store_p); + TRY(WASM_I32_STORE8, storeb_p); + TRY(WASM_I32_STORE16, storew_p); + } else if (wasm_opcode == WASM_LOCAL_GET) { + uint32_t localidx; + uint64_t offset_on_frame; + + if (leb_u32(handle, &localidx)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (localidx >= all_locals_count) { + PRERR(MSG_BAD_IDX("localidx")); + goto fail; + } + + offset_on_frame = all_locals_count - localidx + 1; + + if (localidx >= args_count) + offset_on_frame -= 1; + + if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) || + i_load_p(im(4 * offset_on_frame), &expr)) + goto fail; + + matched = 1; + } else if (wasm_opcode == WASM_I32_CONST) { + uint32_t constant; + + if (leb_u32(handle, &constant)) { + PRERR(MSG_BAD_NUM); + goto fail; + } + + if (i_const(im(constant), &expr)) + goto fail; + + matched = 1; + } else if (wasm_opcode == WASM_END) { + break; + } + + if (!matched) { + PRERR("Unknown Wasm opcode: %02x\n", wasm_opcode); + goto fail; + } + } + + /* function epilogue */ + if (function->type->result) { + if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) || + i_swap ( &expr) || + i_store_p(im(4 * (2 + all_locals_count - 1)), &expr) || + i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) || + i_tee ( &expr) || + i_tee ( &expr) || + i_load_p (im(4 * (1 + locals_count)), &expr) || + i_store_p(im(4 * (2 + all_locals_count - 2)), &expr) || + i_load_p (im(0), &expr) || + i_store (im(STACK_FRAME_BACKUP_ADDR), &expr)) + goto fail; + } else { + /* It's a bit shorter if we don't return anything */ + if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) || + i_tee ( &expr) || + i_tee ( &expr) || + i_load_p (im(4 * (1 + locals_count)), &expr) || + i_store_p(im(4 * (2 + all_locals_count - 1)), &expr) || + i_load_p (im(0), &expr) || + i_store (im(STACK_FRAME_BACKUP_ADDR), &expr)) + goto fail; + + } + + i = locals_count + args_count + 2 + (function->type->result ? 0 : 1); + + while (i--) { + if (i_drop(&expr)) + goto fail; + } + + if (i_ret(&expr)) + goto fail; + + function->translated_body = expr; + + return 0; + +fail: + PRERR("Couldn't translate function to stack machine\n"); + + free_expr(expr); + + return -1; +} diff --git a/tools/wasm.h b/tools/wasm.h new file mode 100644 index 0000000..fc6a910 --- /dev/null +++ b/tools/wasm.h @@ -0,0 +1,22 @@ +#define SECTION_CUSTOM 0 +#define SECTION_TYPE 1 +#define SECTION_IMPORT 2 +#define SECTION_FUNCTION 3 +#define SECTION_TABLE 4 +#define SECTION_MEMORY 5 +#define SECTION_GLOBAL 6 +#define SECTION_EXPORT 7 +#define SECTION_START 8 +#define SECTION_ELEMENT 9 +#define SECTION_CODE 10 +#define SECTION_DATA 11 + +#define VALTYPE_I32 0x7F +#define VALTYPE_I64 0x7E +#define VALTYPE_F32 0x7D +#define VALTYPE_F64 0x7C + +#define EXPORT_FUNCIDX 0x00 +#define EXPORT_TABLEIDX 0x01 +#define EXPORT_MEMIDX 0x02 +#define EXPORT_GLOBALIDX 0x03 diff --git a/tools/wasm_compile.c b/tools/wasm_compile.c new file mode 100644 index 0000000..64988c8 --- /dev/null +++ b/tools/wasm_compile.c @@ -0,0 +1,72 @@ +#include "wasm_compile.h" + +void print_instructions(uint32_t count, uint16_t instructions[count]) +{ + uint32_t i; + char binary[17]; + uint16_t instruction; + int j; + + binary[16] = '\0'; + + for (i = 0; i < count; i++) { + instruction = instructions[i]; + j = 16; + + while (j--) { + binary[j] = (instruction & 1) ? '1' : '0'; + instruction >>= 1; + } + + puts(binary); + } +} + +int main(int argc, char **argv) +{ + FILE *handle = NULL; + struct module *module = NULL; + uint16_t *translated_instructions = NULL; + char retval = -1; + + if (argc < 2) { + PRERR("Please provide name of the file to parse on the command line\n"); + goto fail; + } + + handle = fopen(argv[1], "r"); + + if (!handle) { + PRERR("Couldn't open '%s'\n", argv[1]); + goto fail; + } + + module = parse_module(handle); + + if (!module) + goto fail; + + translated_instructions = calloc(1, CODE_TOP_ADDR); + + if (!translated_instructions) { + PRERR(MSG_ALLOC_FAIL(CODE_TOP_ADDR)); + goto fail; + } + + if (assemble(CODE_TOP_ADDR / 2, translated_instructions, module)) + goto fail; + + print_instructions(CODE_TOP_ADDR / 2, translated_instructions); + + retval = 0; + +fail: + if (handle) + fclose(handle); + + free_module(module); + + free(translated_instructions); + + return retval; +} diff --git a/tools/wasm_compile.h b/tools/wasm_compile.h new file mode 100644 index 0000000..3412b2d --- /dev/null +++ b/tools/wasm_compile.h @@ -0,0 +1,76 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> + +#define STACK_FRAME_BACKUP_ADDR 0x0FFFFC +#define STACK_TOP_ADDR 0x0FFFFC +#define CODE_BOTTOM_ADDR 0x000000 +#define CODE_TOP_ADDR 0x000200 +#define MEMORY_BOTTOM_ADDR 0x000200 + +/* Error messages */ +#define MSG_SIZE_OVERFLOW "Number overflows size_t\n" +#define MSG_EOF "Unexpected end of bytes\n" +#define MSG_BAD_NUM_ENC "Improper number encoding\n" +#define MSG_BAD_NUM "Couldn't parse number\n" +#define MSG_BAD_SIZE "Couldn't compute size to allocate\n" +#define MSG_ALLOC_FAIL(n) "Failed to allocate %lu bytes\n", (unsigned long) (n) +#define MSG_BAD(nam, val) "Found 0x%02hhx instead of %s\n", (char) (val), nam +#define MSG_BYTES_REMAIN "Didn't use up all bytes when parsing\n" +#define MSG_SEEK_FAIL "Couldn't navigate the file\n" +#define MSG_BAD_IDX(nam) "Got %s out of range\n", nam + +#define PRERR(...) fprintf(stderr, __VA_ARGS__) + +struct functype { + uint32_t arguments_count; + char *arguments; + + char result; +}; + +struct function { + struct functype *type; + + uint32_t locals_count; + char *locals; + + struct instruction *translated_body; + uint32_t start_addr; +}; + +struct export { + char *name; + char desc; + uint32_t idx; +}; + +struct module { + uint32_t functypes_count; + struct functype *functypes; + uint32_t functions_count; + struct function *functions; + enum { + MEM_NONE = 0, + MEM_MIN, + MEM_MIN_MAX + } memory_type; + uint32_t mem_min, mem_max; + uint32_t exports_count; + struct export *exports; +}; + +int leb_u32(FILE *handle, uint32_t *result); + +void free_expr(struct instruction *expr); + +void free_module(struct module *module); + +struct module *parse_module(FILE *handle); + +int translate(FILE *handle, struct function *function, struct module *module); + +int assemble(uint32_t memory_size, uint16_t memory[memory_size], + struct module *module); |