aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojciech Kosior <kwojtus@protonmail.com>2020-09-19 21:22:41 +0200
committerWojciech Kosior <kwojtus@protonmail.com>2020-09-19 21:22:41 +0200
commit605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa (patch)
treeb3c7a166f33d9d1ff35cbc1ede202dd89903e81e
parent780f056e61323a41abcaf0dd53a44f99bcac197c (diff)
downloadAGH-engineering-thesis-605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa.tar.gz
AGH-engineering-thesis-605ac7f13be9f597d3cf7ab3786b71f5cda8c9fa.zip
initial work towards translating wasm to stack machine (with a provisional bench)
-rw-r--r--Makefile15
-rw-r--r--Makefile.config16
-rw-r--r--Makefile.test4
-rw-r--r--Makefile.util12
l---------tests/stack_machine_wasm_sub/Makefile1
-rw-r--r--tests/stack_machine_wasm_sub/instructions.wat9
l---------tests/stack_machine_wasm_sub/test.v1
-rw-r--r--tests/stack_machine_wasm_sub/words_to_verify.mem3
-rw-r--r--tools/Makefile18
-rw-r--r--tools/Makefile.tools4
-rw-r--r--tools/assemble.c217
-rw-r--r--tools/parse_module.c662
-rw-r--r--tools/stack_machine_instruction.h148
-rw-r--r--tools/translate.c187
-rw-r--r--tools/wasm.h22
-rw-r--r--tools/wasm_compile.c72
-rw-r--r--tools/wasm_compile.h76
17 files changed, 1448 insertions, 19 deletions
diff --git a/Makefile b/Makefile
index 677053a..d17a892 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,6 @@
include Makefile.config
include Makefile.util
-
-# Short C programs
-TOOLS := VGAdump2ppm
+include tools/Makefile.tools
TEST_TARGETS := $(addprefix test_,$(shell ls tests))
@@ -55,14 +53,17 @@ quicktest :
$(TEST_TARGETS) : test_% :
$(MAKE) -C tests/$*
+
tools : $(TOOLS_TARGETS)
-$(TOOLS_TARGETS) : tools/% : tools/%.c
- $(CC) $(CFLAGS) $^ -o $@
+$(TOOLS_TARGETS) : tools/% :
+ $(MAKE) -C tools/ $*
+
clean :
for TEST in tests/*; do $(MAKE) -C $$TEST clean >/dev/null; done
- rm $(GENERATED_MEM_FILES) $(TOOLS_TARGETS) 2>/dev/null || true
+ rm $(GENERATED_MEM_FILES) 2>/dev/null || true
+ $(MAKE) -C tools/ clean >/dev/null
rm $(addprefix design.,v json asc bin) timing.rpt 2>/dev/null || true
-.PHONY : all tools test quicktest $(TEST_TARGETS)
+.PHONY : all tools test quicktest $(TEST_TARGETS) $(TOOLS_TARGETS)
diff --git a/Makefile.config b/Makefile.config
index 5c619c3..a67e9f1 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -1,5 +1,14 @@
+# Uncomment this line when needed
+#DEBUG=1
+
CC = gcc
-CFLAGS = -std=c89 -pedantic -Wall -Werror -O2
+CFLAGS = -std=c99 -pedantic -Wall -Werror -O2
+
+ifdef DEBUG
+CFLAGS += -O0 -g
+else
+CFLAGS += -O2
+endif
IV = iverilog
@@ -8,9 +17,8 @@ PNR = git-nextpnr-ice40
ICEPACK = git-icepack
ICETIME = git-icetime
+WAT2WASM = wat2wasm
+
TOPMODULE = soc
PCF = design/pins.pcf
-
-# Uncomment this line when needed
-#DEBUG=1
diff --git a/Makefile.test b/Makefile.test
index fac2c14..46af49a 100644
--- a/Makefile.test
+++ b/Makefile.test
@@ -67,7 +67,7 @@ GENERATED_MEM_FILES += $(basename $(shell find . -name "*.memv"))
GENERATED_MEM_FILES := $(addsuffix .mem,$(GENERATED_MEM_FILES))
clean :
- rm $(GENERATED_MEM_FILES) *.vvp report.log VGAdump.mem VGAdump.ppm \
- 2>/dev/null || true
+ rm $(GENERATED_MEM_FILES) *.vvp *.wasm report.log VGAdump.mem \
+ VGAdump.ppm 2>/dev/null || true
.PHONY : test clean
diff --git a/Makefile.util b/Makefile.util
index 7bdc584..02ad5a3 100644
--- a/Makefile.util
+++ b/Makefile.util
@@ -1,10 +1,10 @@
FILE_LINES = `grep -E '^[[:space:]]*[^[:space:]/]' -c $(1)`
-ifdef PROJ_DIR
-vpath tclasm.tcl $(PROJ_DIR)
-endif
+%.mem : $(PROJ_DIR)/tclasm.tcl %.s.tcl
+ tclsh $^ > $@
-vpath tclasm.tcl .
+%.mem : $(PROJ_DIR)/tools/wasm_compile %.wasm
+ $^ > $@
-%.mem : tclasm.tcl %.s.tcl
- tclsh $^ > $@
+%.wasm : %.wat
+ $(WAT2WASM) $< -o $@
diff --git a/tests/stack_machine_wasm_sub/Makefile b/tests/stack_machine_wasm_sub/Makefile
new file mode 120000
index 0000000..4673ba3
--- /dev/null
+++ b/tests/stack_machine_wasm_sub/Makefile
@@ -0,0 +1 @@
+../stack_machine_function_call/Makefile \ No newline at end of file
diff --git a/tests/stack_machine_wasm_sub/instructions.wat b/tests/stack_machine_wasm_sub/instructions.wat
new file mode 100644
index 0000000..7579a46
--- /dev/null
+++ b/tests/stack_machine_wasm_sub/instructions.wat
@@ -0,0 +1,9 @@
+(module
+ (memory 0 2)
+ (func $sub (param $lhs i32) (param $rhs i32);; (result i32)
+ i32.const 0x17
+ local.get $lhs
+ local.get $rhs
+ i32.sub
+ i32.store offset=0x25 align=2)
+ (export "main" (func $sub)))
diff --git a/tests/stack_machine_wasm_sub/test.v b/tests/stack_machine_wasm_sub/test.v
new file mode 120000
index 0000000..f5b6a59
--- /dev/null
+++ b/tests/stack_machine_wasm_sub/test.v
@@ -0,0 +1 @@
+../stack_machine_store/test.v \ No newline at end of file
diff --git a/tests/stack_machine_wasm_sub/words_to_verify.mem b/tests/stack_machine_wasm_sub/words_to_verify.mem
new file mode 100644
index 0000000..bd24e95
--- /dev/null
+++ b/tests/stack_machine_wasm_sub/words_to_verify.mem
@@ -0,0 +1,3 @@
+// address value
+ 0FFFFC 23
+ 23C 1E // Address is 0x200 + 0x25 + 0x17
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644
index 0000000..8634a5c
--- /dev/null
+++ b/tools/Makefile
@@ -0,0 +1,18 @@
+include ../Makefile.config
+
+include Makefile.tools
+
+all : $(TOOLS)
+
+# I know, this can be done better...
+%.o : *.h
+
+wasm_compile : wasm_compile.o assemble.o parse_module.o translate.o
+
+VGAdump2ppm : VGAdump2ppm.o
+
+clean :
+ rm *.o 2>/dev/null || true
+ find . -executable -type f -delete
+
+.PHONY : clean all
diff --git a/tools/Makefile.tools b/tools/Makefile.tools
new file mode 100644
index 0000000..5fc2b6c
--- /dev/null
+++ b/tools/Makefile.tools
@@ -0,0 +1,4 @@
+# This variable definition has been put here, because it is used by
+# toplevel Makefile as well as Makefile in tools/
+
+TOOLS = wasm_compile VGAdump2ppm
diff --git a/tools/assemble.c b/tools/assemble.c
new file mode 100644
index 0000000..07416fd
--- /dev/null
+++ b/tools/assemble.c
@@ -0,0 +1,217 @@
+#include "wasm_compile.h"
+#include "wasm.h"
+#include "stack_machine_instruction.h"
+
+/* instruction structs are connected in a circular list */
+void free_expr(struct instruction *expr)
+{
+ struct instruction *tmp;
+
+ if (expr) {
+ tmp = expr->next;
+ while (tmp != expr) {
+ tmp = tmp->next;
+ free(tmp->prev);
+ }
+
+ free(expr);
+ }
+}
+
+/* instructions are stored in a circular list */
+int add_instruction(struct instruction **expr, uint16_t encoding,
+ struct instruction_data data)
+{
+ struct instruction *new;
+
+ new = malloc(sizeof(struct instruction));
+
+ if (!new) {
+ PRERR(MSG_ALLOC_FAIL(sizeof(struct instruction)));
+ return -1;
+ }
+
+ new->address_assigned = false;
+ new->encoding = encoding;
+ new->data = data;
+
+ if (!*expr) {
+ new->next = new;
+ new->prev = new;
+ *expr = new;
+ } else {
+ new->next = *expr;
+ new->prev = (*expr)->prev;
+ (*expr)->prev->next = new;
+ (*expr)->prev = new;
+ }
+
+ return 0;
+}
+
+static uint8_t estimate_instruction_size(struct instruction *instruction)
+{
+ switch (instruction->data.info) {
+ case DATA_NONE :
+ return 2;
+ case DATA_KNOWN :
+ case DATA_UNKNOWN :
+ return 6;
+ case DATA_KNOWN_21_BITS :
+ return 4;
+ default /* case DATA_KNOWN_6_BITS */ :
+ return 2;
+ }
+}
+
+static uint64_t estimate_expr_size(struct instruction *expr)
+{
+ struct instruction *tmp = expr;
+ uint64_t max_expr_size = 0;
+
+ do {
+ max_expr_size += estimate_instruction_size(tmp);
+ tmp = tmp->next;
+ } while (tmp != expr);
+
+ return max_expr_size;
+}
+
+static void assign_addresses_and_sizes(struct instruction *expr,
+ uint32_t address)
+{
+ struct instruction *tmp = expr;
+
+ do {
+ tmp->address = address;
+ tmp->address_assigned = true;
+
+ if (tmp->data.info == DATA_UNKNOWN &&
+ *tmp->data.data.ptr &&
+ (*tmp->data.data.ptr)->address_assigned)
+ tmp->data = im((*tmp->data.data.ptr)->address);
+
+ address += estimate_instruction_size(tmp);
+ tmp = tmp->next;
+ } while (tmp != expr);
+}
+
+static uint16_t im_instruction(uint16_t payload)
+{
+ return payload | (((uint16_t) 1) << 15);
+}
+
+static void encode_instruction(struct instruction *instruction,
+ uint16_t *memory)
+{
+ uint32_t im = 0;
+ uint16_t encoding = instruction->encoding;
+ uint16_t *dest = memory + instruction->address / 2;
+
+ if (instruction->data.info != DATA_NONE) {
+ if (instruction->data.info == DATA_UNKNOWN)
+ im = (*instruction->data.data.ptr)->address;
+ else
+ im = instruction->data.data.im;
+ }
+
+ switch (instruction->data.info) {
+ case DATA_UNKNOWN :
+ case DATA_KNOWN :
+ *(dest++) = im_instruction(im >> 22);
+ case DATA_KNOWN_21_BITS :
+ *(dest++) = im_instruction(im >> 7);
+ case DATA_KNOWN_6_BITS :
+ encoding |= (im & 0x7F);
+ }
+
+ *dest = encoding;
+}
+
+static void encode_expr(struct instruction *expr, uint16_t *memory)
+{
+ struct instruction *tmp = expr;
+
+ do {
+ encode_instruction(tmp, memory);
+ tmp = tmp->next;
+ } while (tmp != expr);
+}
+
+int assemble(uint32_t memory_size, uint16_t memory[memory_size],
+ struct module *module)
+{
+ uint32_t i;
+ struct function *main_function = NULL;
+ uint32_t current_address;
+ uint64_t function_size;
+ struct instruction *startup = NULL;
+ unsigned short startup_size;
+ int retval = -1;
+
+ for (i = 0; i < module->exports_count; i++) {
+ if (module->exports[i].desc == EXPORT_FUNCIDX &&
+ !strcmp(module->exports[i].name, "main")) {
+ main_function = module->functions +
+ module->exports[i].idx;
+ break;
+ }
+ }
+
+ if (!main_function) {
+ PRERR("No 'main' function\n");
+ goto fail;
+ }
+
+ // For now we're passing 2 numbers to main(): 0x32 and 0x14.
+ // This is just a temporary way to check if our Wasm function
+ // actually does anything. In the end - main() will be
+ // a function, that doesn't require any arguments.
+ if (i_const(im(0x23), &startup) ||
+ i_store(im(STACK_FRAME_BACKUP_ADDR), &startup) ||
+ i_const(im(0x32), &startup) ||
+ i_const(im(0x14), &startup) ||
+ i_call (ptr(&main_function->translated_body), &startup) ||
+ i_halt ( &startup))
+ goto fail;
+
+ startup_size = estimate_expr_size(startup);
+
+ i = module->functions_count;
+ current_address = CODE_TOP_ADDR;
+
+ while (i--) {
+ function_size = estimate_expr_size(module->functions[i]
+ .translated_body);
+ if (function_size > current_address - startup_size) {
+ PRERR("Not enough space for code\n");
+ goto fail;
+ }
+
+ current_address -= function_size;
+ module->functions[i].start_addr = current_address;
+ }
+
+ i = module->functions_count;
+ while (i--)
+ assign_addresses_and_sizes(module->functions[i].translated_body,
+ module->functions[i].start_addr);
+
+ assign_addresses_and_sizes(startup, 0x0);
+
+ i = module->functions_count;
+ while (i--)
+ encode_expr(module->functions[i].translated_body, memory);
+
+ encode_expr(startup, memory);
+
+ retval = 0;
+
+fail:
+ if (retval)
+ PRERR("Couldn't assemble code for stack machine\n");
+
+ free_expr(startup);
+
+ return retval;
+}
diff --git a/tools/parse_module.c b/tools/parse_module.c
new file mode 100644
index 0000000..85540af
--- /dev/null
+++ b/tools/parse_module.c
@@ -0,0 +1,662 @@
+// TODO: count read bytes ourselves instead of relying on ftell()
+#include "wasm_compile.h"
+#include "wasm.h"
+
+static inline int is_valid_valtype(char value_type)
+{
+ return
+ value_type == VALTYPE_I32 ||
+ value_type == VALTYPE_I64 ||
+ value_type == VALTYPE_F32 ||
+ value_type == VALTYPE_F64;
+}
+
+static inline int is_valid_exportdesc(char desc)
+{
+ return
+ desc == EXPORT_FUNCIDX ||
+ desc == EXPORT_TABLEIDX ||
+ desc == EXPORT_MEMIDX ||
+ desc == EXPORT_GLOBALIDX;
+}
+
+int leb_u32(FILE *handle, uint32_t *result)
+{
+ int i, j;
+ int encoded[5];
+ uint64_t decoded;
+
+ for (i = 0; i < 5; i++) {
+ encoded[i] = fgetc(handle);
+
+ if (encoded[i] == EOF) {
+ PRERR(MSG_EOF);
+ return -1;
+ }
+
+ if (encoded[i] >= 0)
+ break;
+
+ if (i == 4) {
+ PRERR(MSG_BAD_NUM_ENC);
+ return -1;
+ }
+ }
+
+ decoded = 0;
+
+ for (j = i; j >= 0; j--)
+ decoded = (decoded << 7) | (encoded[i] & 0x7F);
+
+ if (decoded > UINT32_MAX) {
+ PRERR(MSG_BAD_NUM_ENC);
+ return -1;
+ }
+
+ *result = decoded;
+
+ return 0;
+}
+
+void free_module(struct module *module)
+{
+ size_t i;
+
+ if (!module)
+ return;
+
+ for (i = 0; i < module->functypes_count; i++)
+ free(module->functypes[i].arguments);
+
+ free(module->functypes);
+
+ for (i = 0; i < module->functions_count; i++) {
+ free(module->functions[i].locals);
+ free_expr(module->functions[i].translated_body);
+ }
+
+ free(module->functions);
+
+ for (i = 0; i < module->exports_count; i++)
+ free(module->exports[i].name);
+
+ free(module->exports);
+
+ free(module);
+}
+
+/* Guard against overflows on 32-bit systems */
+static inline int safe_mul(size_t *factor1, uint32_t factor2)
+{
+ uint64_t product;
+
+ product = *factor1;
+ product *= factor2;
+
+ if (product > SIZE_MAX) {
+ PRERR(MSG_SIZE_OVERFLOW);
+ return -1;
+ }
+
+ *factor1 = product;
+
+ return 0;
+}
+
+int parse_type_section(FILE *handle, struct module *module)
+{
+ uint32_t types_count;
+ int readval;
+ size_t malloc_size;
+ struct functype *types = NULL;
+ uint32_t types_parsed = 0;
+ uint32_t args_count;
+ char *args;
+ uint32_t i;
+
+ if (leb_u32(handle, &types_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct functype);
+
+ if (safe_mul(&malloc_size, types_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ types = malloc(malloc_size);
+
+ if (!types) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ while (types_parsed < types_count) {
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (readval != 0x60) {
+ PRERR(MSG_BAD("functype starting byte (0x60)",
+ readval));
+ goto fail;
+ }
+
+ if (leb_u32(handle, &args_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (args_count) {
+ args = malloc(args_count);
+
+ if (!args) {
+ PRERR(MSG_ALLOC_FAIL(args_count));
+ goto fail;
+ }
+ } else {
+ args = NULL;
+ }
+
+ types[types_parsed].arguments_count = args_count;
+ types[types_parsed].arguments = args;
+ /* Increment here, so that jump to fail: frees the args */
+ types_parsed++;
+
+ for (i = 0; i < args_count; i++) {
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ args[i] = readval;
+ }
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (readval == 0x00) {
+ types[types_parsed - 1].result = 0;
+ } else if (readval == 0x01) {
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ types[types_parsed - 1].result = readval;
+ } else {
+ PRERR(MSG_BAD("return values count", readval));
+ goto fail;
+ }
+ }
+
+ module->functypes_count = types_count;
+ module->functypes = types;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse function types section\n");
+
+ if (types) {
+ while (types_parsed) {
+ free(types[types_parsed - 1].arguments);
+ types_parsed--;
+ }
+
+ free(types);
+ }
+
+ return -1;
+}
+
+int parse_function_section(FILE *handle, struct module *module)
+{
+ uint32_t funcs_count;
+ size_t malloc_size;
+ struct function *funcs = NULL;
+ uint32_t i;
+ uint32_t type_idx;
+
+ if (leb_u32(handle, &funcs_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct function);
+
+ if (safe_mul(&malloc_size, funcs_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ funcs = malloc(malloc_size);
+
+ if (!funcs) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ for (i = 0; i < funcs_count; i++) {
+ if (leb_u32(handle, &type_idx)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (type_idx >= module->functypes_count) {
+ PRERR("Nonexistent function type index used");
+ goto fail;
+ }
+
+ funcs[i].type = module->functypes + i;
+ }
+
+ module->functions_count = funcs_count;
+ module->functions = funcs;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse functions section");
+
+ free(funcs);
+
+ return -1;
+}
+
+static int parse_memory_section(FILE *handle, struct module *module)
+{
+ // TODO: move limits parsing to separate function?
+ uint32_t memories_count;
+ int limits_type;
+
+ if (leb_u32(handle, &memories_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (memories_count > 1) {
+ PRERR("More than one Wasm memory\n");
+ goto fail;
+ }
+
+ limits_type = fgetc(handle);
+
+ if (limits_type == EOF) {
+ PRERR(MSG_EOF);
+ return -1;
+ }
+
+ if (leb_u32(handle, &module->mem_min)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (limits_type == 0x00) {
+ module->memory_type = MEM_MIN;
+ } else if (limits_type == 0x01) {
+ if (leb_u32(handle, &module->mem_max)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ module->memory_type = MEM_MIN_MAX;
+ } else {
+ PRERR(MSG_BAD("limit type", limits_type));
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ module->mem_min = 0;
+
+ return -1;
+}
+
+static int parse_export_section(FILE *handle, struct module *module)
+{
+ int readval;
+ uint32_t exports_count;
+ size_t malloc_size;
+ struct export *exports = NULL;
+ uint32_t exports_parsed = 0;
+ uint32_t name_len;
+ char *name;
+
+ if (leb_u32(handle, &exports_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ malloc_size = sizeof(struct export);
+
+ if (safe_mul(&malloc_size, exports_count)) {
+ PRERR(MSG_BAD_SIZE);
+ goto fail;
+ }
+
+ exports = malloc(malloc_size);
+
+ if (!exports) {
+ PRERR(MSG_ALLOC_FAIL(malloc_size));
+ goto fail;
+ }
+
+ while (exports_parsed < exports_count) {
+ if (leb_u32(handle, &name_len)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ name = malloc(name_len + 1);
+
+ if (!name) {
+ PRERR(MSG_ALLOC_FAIL(name_len + 1));
+ goto fail;
+ }
+
+ exports[exports_parsed].name = name;
+ /* Increment here, so that jump to fail: frees the name */
+ exports_parsed++;
+
+ if (fread(name, name_len, 1, handle) != 1) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ name[name_len] = '\0';
+
+ readval = fgetc(handle);
+
+ if (!is_valid_exportdesc(readval)) {
+ PRERR(MSG_BAD("exportdesc", readval));
+ goto fail;
+ }
+
+ exports[exports_parsed - 1].desc = readval;
+
+ if (leb_u32(handle, &exports[exports_parsed - 1].idx)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+ }
+
+ module->exports_count = exports_count;
+ module->exports = exports;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse exports section\n");
+
+ if (exports) {
+ while (exports_parsed) {
+ free(exports[exports_parsed - 1].name);
+ exports_parsed--;
+ }
+
+ free(exports);
+ }
+
+ return -1;
+}
+
+static int parse_function_code(FILE *handle, struct function *function,
+ struct module *module)
+{
+ int readval;
+ uint32_t locals_blocks;
+ uint32_t locals_count = 0;
+ char *locals = NULL, *tmp;
+ char *body = NULL;
+ uint32_t i;
+ uint32_t locals_in_block;
+
+ if (leb_u32(handle, &locals_blocks)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ for (i = 0; i < locals_blocks; i++) {
+ if (leb_u32(handle, &locals_in_block)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (locals_count + (uint64_t) locals_in_block > UINT32_MAX) {
+ PRERR("Too many locals\n");
+ goto fail;
+ }
+
+ locals_count += locals_in_block;
+
+ if (locals_in_block) {
+ tmp = realloc(locals, locals_count);
+
+ if (!tmp) {
+ PRERR(MSG_ALLOC_FAIL(locals_count));
+ goto fail;
+ }
+
+ locals = tmp;
+ }
+
+ readval = fgetc(handle);
+
+ if (readval == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ if (!is_valid_valtype(readval)) {
+ PRERR(MSG_BAD("value type encoding", readval));
+ goto fail;
+ }
+
+ while (locals_in_block)
+ locals[locals_count - locals_in_block--] = readval;
+ }
+
+ function->translated_body = NULL;
+
+ function->locals_count = locals_count;
+ function->locals = locals;
+
+ if (translate(handle, function, module))
+ goto fail;
+
+ return 0;
+
+fail:
+ free(locals);
+ free(body);
+
+ return -1;
+}
+
+int parse_code_section(FILE *handle, struct module *module)
+{
+ uint32_t functions_count;
+ uint32_t functions_parsed = 0;
+ uint32_t function_size;
+ long function_start, function_end;
+
+ if (leb_u32(handle, &functions_count)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (functions_count != module->functions_count) {
+ PRERR("Number of function bodies doesn't match number of functions\n");
+ goto fail;
+ }
+
+ while (functions_parsed < functions_count) {
+ if (leb_u32(handle, &function_size)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ function_start = ftell(handle);
+
+ if (parse_function_code(handle,
+ module->functions + functions_parsed,
+ module)) {
+ PRERR("Couldn't parse code of function %lu\n",
+ (unsigned long) functions_parsed);
+ goto fail;
+ }
+
+ function_end = ftell(handle);
+
+ if (function_end - function_size != function_start) {
+ PRERR("Function %lu started at offset %ld and should end at %ld, but ended at %ld\n",
+ (unsigned long) functions_parsed,
+ function_start, function_end,
+ (long) (function_start + function_size));
+ goto fail;
+ }
+
+ functions_parsed++;
+ }
+
+ return 0;
+
+fail:
+ PRERR("Couldn't parse code section\n");
+
+ while (functions_parsed) {
+ free(module->functions[functions_parsed - 1].locals);
+ free_expr(module->functions[functions_parsed - 1]
+ .translated_body);
+
+ functions_parsed--;
+ }
+
+ return -1;
+}
+
+static const char magic[] = {0x00, 0x61, 0x73, 0x6D};
+static const char version[] = {0x01, 0x00, 0x00, 0x00};
+
+struct module *parse_module(FILE *handle)
+{
+ char initial[8];
+ struct module *module = NULL;
+ int section_id;
+ char highest_section_id = 0;
+ uint32_t section_size;
+ long section_start, section_end;
+ int (*section_parser) (FILE*, struct module*);
+
+ if (fread(initial, 8, 1, handle) != 1) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ /* check magic number */
+ if (memcmp(initial, magic, 4)) {
+ PRERR("Bad magic number");
+ goto fail;
+ }
+
+ /* check version */
+ if (memcmp(initial + 4, version, 4)) {
+ PRERR("Unsupported Wasm version: 0x%02hhx 0x%02hhx 0x%02hhx 0x%02hhx\n",
+ initial[4], initial[5], initial[6], initial[7]);
+ goto fail;
+ }
+
+ module = calloc(1, sizeof(struct module));
+
+ if (!module) {
+ PRERR(MSG_ALLOC_FAIL(sizeof(struct module)));
+ goto fail;
+ }
+
+ while (1) {
+ section_id = fgetc(handle);
+
+ if (section_id == EOF)
+ break;
+
+ if (leb_u32(handle, &section_size)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ section_start = ftell(handle);
+
+ if (section_id == SECTION_CUSTOM)
+ continue;
+
+ /* Sections are only allowed to appear in order */
+ if (section_id <= highest_section_id) {
+ PRERR("Sections out of order\n");
+ goto fail;
+ }
+
+ highest_section_id = section_id;
+
+ if (section_id == SECTION_TYPE) {
+ section_parser = parse_type_section;
+ } else if (section_id == SECTION_FUNCTION) {
+ section_parser = parse_function_section;
+ } else if (section_id == SECTION_MEMORY) {
+ section_parser = parse_memory_section;
+ } else if (section_id == SECTION_EXPORT) {
+ section_parser = parse_export_section;
+ } else if (section_id == SECTION_CODE) {
+ section_parser = parse_code_section;
+ } else {
+ PRERR("Unknown section id: %d\n", section_id);
+ goto fail;
+ }
+
+ if (section_parser(handle, module))
+ goto fail;
+
+ section_end = ftell(handle);
+
+ if (section_end - section_size != section_start) {
+ PRERR("Section %d started at offset %ld and should end at %ld, but ended at %ld\n",
+ section_id, section_start, section_end,
+ (long) (section_start + section_size));
+ goto fail;
+ }
+ }
+
+ return module;
+
+fail:
+ PRERR("Parsing failed\n");
+
+ free_module(module);
+
+ return NULL;
+}
diff --git a/tools/stack_machine_instruction.h b/tools/stack_machine_instruction.h
new file mode 100644
index 0000000..5fd20fc
--- /dev/null
+++ b/tools/stack_machine_instruction.h
@@ -0,0 +1,148 @@
+#include <stdbool.h>
+
+/*
+ * TODO: this enum is to be removed; it it only still here as a cheat sheet
+ * for use when defining missing inline functions for those instructions
+ */
+enum instruction_code {
+ STORE,
+ STORE_P,
+ STOREB,
+ STOREB_P,
+ STOREW,
+ STOREW_P,
+ LOAD,
+ LOAD_P,
+ LOADBZX,
+ LOADBZX_P,
+ LOADBSX,
+ LOADBSX_P,
+ LOADWZX,
+ LOADWZX_P,
+ LOADWSX,
+ LOADWSX_P,
+ HALT,
+ NOP,
+ SWAP,
+ SET_SP,
+ JUMP,
+ TEE,
+ GET_FRAME,
+ CONST,
+ CALL,
+ ADD,
+ SUB,
+ DIV,
+ MUL,
+ DROP,
+ RET,
+ COND_JUMP
+};
+
+#define DATA_NONE 0
+#define DATA_KNOWN 1
+#define DATA_KNOWN_21_BITS 2
+#define DATA_KNOWN_6_BITS 3
+#define DATA_UNKNOWN 4
+
+struct instruction_data {
+ char info;
+ union {
+ uint32_t im;
+ struct instruction **ptr;
+ } data;
+};
+
+struct instruction {
+ struct instruction *prev, *next;
+ uint32_t address;
+ bool address_assigned;
+ uint16_t encoding;
+ struct instruction_data data;
+};
+
+#define NO_DATA ((struct instruction_data) { \
+ .info = DATA_NONE \
+ })
+
+#define POW(n) (((int64_t) 1) << (n))
+
+inline static uint8_t im_instruction_size(int32_t im)
+{
+ if (im < POW(6) && im >= -POW(6))
+ return 2;
+ else if (im < POW(21) && im >= -POW(21))
+ return 4;
+ else
+ return 6;
+}
+
+inline static struct instruction_data im(uint32_t im)
+{
+ struct instruction_data data;
+
+ data.data.im = im;
+
+ uint8_t size = im_instruction_size(im);
+
+ if (size == 2)
+ data.info = DATA_KNOWN_6_BITS;
+ else if (size == 4)
+ data.info = DATA_KNOWN_21_BITS;
+ else
+ data.info = DATA_KNOWN;
+
+ return data;
+}
+
+inline static struct instruction_data ptr(struct instruction **val) {
+ struct instruction_data data;
+
+ data.info = DATA_UNKNOWN;
+ data.data.ptr = val;
+
+ return data;
+}
+
+int add_instruction(struct instruction **expr, uint16_t encoding,
+ struct instruction_data data);
+
+/* Define stack machine instructions, that take immediate operands */
+#define X(instr, encoding) \
+ inline static int i_##instr(struct instruction_data data, \
+ struct instruction **expr) \
+ { \
+ return add_instruction(expr, encoding, data); \
+ }
+
+/* Define stack machine instructions, that *don't* take immediate operands */
+#define Y(instr, encoding) \
+ inline static int i_##instr(struct instruction **expr) \
+ { \
+ return add_instruction(expr, encoding, NO_DATA); \
+ }
+
+X(store, 0x7E00) /* 0111_1110_0xxx_xxxx */
+X(store_p, 0x6E00) /* 0110_1110_0xxx_xxxx */
+X(storeb_p, 0x6C00) /* 0110_1100_0xxx_xxxx */
+X(storew_p, 0x6D00) /* 0110_1101_0xxx_xxxx */
+X(load, 0x5E00) /* 0101_1110_0xxx_xxxx */
+X(load_p, 0x4E00) /* 0100_1110_0xxx_xxxx */
+X(loadbzx_p, 0x4C00) /* 0100_1100_0xxx_xxxx */
+X(loadbsx_p, 0x4C80) /* 0100_1100_1xxx_xxxx */
+X(loadwzx_p, 0x4D00) /* 0100_1101_0xxx_xxxx */
+X(loadwsx_p, 0x4D80) /* 0100_1101_1xxx_xxxx */
+Y(swap, 0x0002) /* 0000_0000_0000_0010 */
+X(set_sp, 0x4000) /* 0100_0000_0xxx_xxxx */
+X(jump, 0x4080) /* 0100_0000_1xxx_xxxx */
+Y(tee, 0x1000) /* 0001_0000_0000_0000 */
+Y(get_frame, 0x1001) /* 0001_0000_0000_0001 */
+X(const, 0x5000) /* 0101_0000_0xxx_xxxx */
+X(call, 0x5080) /* 0101_0000_1xxx_xxxx */
+Y(sub, 0x3001) /* 0011_0000_0000_0001 */
+Y(drop, 0x3004) /* 0011_0000_0000_0100 */
+Y(ret, 0x3080) /* 0011_0000_1000_0000 */
+Y(halt, 0x0000) /* 0000_0000_0000_0000 */
+
+#undef X
+#undef Y
diff --git a/tools/translate.c b/tools/translate.c
new file mode 100644
index 0000000..ee6ea99
--- /dev/null
+++ b/tools/translate.c
@@ -0,0 +1,187 @@
+#include "wasm_compile.h"
+#include "stack_machine_instruction.h"
+
+/* WebAssembly opcodes */
+#define WASM_END 0x0B
+#define WASM_LOCAL_GET 0x20
+#define WASM_I32_LOAD 0x28
+#define WASM_I32_LOAD8_S 0x2C
+#define WASM_I32_LOAD8_U 0x2D
+#define WASM_I32_LOAD16_S 0x2E
+#define WASM_I32_LOAD16_U 0x2F
+#define WASM_I32_STORE 0x36
+#define WASM_I32_STORE8 0x3A
+#define WASM_I32_STORE16 0x3B
+#define WASM_I64_STORE32 0x3E
+#define WASM_I32_CONST 0x41
+#define WASM_I32_SUB 0x6B
+
+int translate(FILE *handle, struct function *function, struct module *module)
+{
+ struct instruction *expr = NULL;
+ uint32_t args_count = function->type->arguments_count;
+ uint32_t locals_count = function->locals_count;
+ uint32_t all_locals_count = args_count + locals_count;
+ size_t i;
+ int wasm_opcode;
+ int matched;
+
+ if (locals_count + (uint64_t) args_count > STACK_TOP_ADDR * 4) {
+ PRERR("Too many locals in a function\n");
+ goto fail;
+ }
+
+ for (i = locals_count + 3; i; i--) {
+ if (i_const(im(0), &expr))
+ goto fail;
+ }
+
+ /* function prologue */
+ if (i_get_frame( &expr) ||
+ i_tee ( &expr) ||
+ i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) ||
+ i_store_p (im(0x0), &expr) ||
+ i_store (im(STACK_FRAME_BACKUP_ADDR), &expr))
+ goto fail;
+
+ /* actual function body */
+ i = 0;
+ while (1) {
+ matched = 0;
+
+ wasm_opcode = fgetc(handle);
+
+ if (wasm_opcode == EOF) {
+ PRERR(MSG_EOF);
+ goto fail;
+ }
+
+ // TODO: make a function for each instruction type,
+ // call them through some table...
+ if (wasm_opcode == WASM_I32_SUB) {
+ if (i_sub(&expr))
+ goto fail;
+
+ matched = 1;
+ } else if (wasm_opcode <= WASM_I64_STORE32 &&
+ wasm_opcode >= WASM_I32_LOAD) {
+ uint32_t align, offset;
+
+ if (leb_u32(handle, &align) ||
+ leb_u32(handle, &offset)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ offset += MEMORY_BOTTOM_ADDR;
+
+ // TODO: rewrite it some cleaner way
+#define TRY(opcode, instr) \
+ if (wasm_opcode == opcode) { \
+ matched = 1; \
+ \
+ if (i_##instr(im(offset), &expr)) \
+ goto fail; \
+ }
+
+ TRY(WASM_I32_LOAD, load_p);
+ TRY(WASM_I32_LOAD8_S, loadbsx_p);
+ TRY(WASM_I32_LOAD8_U, loadbzx_p);
+ TRY(WASM_I32_LOAD16_S, loadwsx_p);
+ TRY(WASM_I32_LOAD16_U, loadwzx_p);
+ TRY(WASM_I32_STORE, store_p);
+ TRY(WASM_I32_STORE8, storeb_p);
+ TRY(WASM_I32_STORE16, storew_p);
+ } else if (wasm_opcode == WASM_LOCAL_GET) {
+ uint32_t localidx;
+ uint64_t offset_on_frame;
+
+ if (leb_u32(handle, &localidx)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (localidx >= all_locals_count) {
+ PRERR(MSG_BAD_IDX("localidx"));
+ goto fail;
+ }
+
+ offset_on_frame = all_locals_count - localidx + 1;
+
+ if (localidx >= args_count)
+ offset_on_frame -= 1;
+
+ if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) ||
+ i_load_p(im(4 * offset_on_frame), &expr))
+ goto fail;
+
+ matched = 1;
+ } else if (wasm_opcode == WASM_I32_CONST) {
+ uint32_t constant;
+
+ if (leb_u32(handle, &constant)) {
+ PRERR(MSG_BAD_NUM);
+ goto fail;
+ }
+
+ if (i_const(im(constant), &expr))
+ goto fail;
+
+ matched = 1;
+ } else if (wasm_opcode == WASM_END) {
+ break;
+ }
+
+ if (!matched) {
+ PRERR("Unknown Wasm opcode: %02x\n", wasm_opcode);
+ goto fail;
+ }
+ }
+
+ /* function epilogue */
+ if (function->type->result) {
+ if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) ||
+ i_swap ( &expr) ||
+ i_store_p(im(4 * (2 + all_locals_count - 1)), &expr) ||
+ i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) ||
+ i_tee ( &expr) ||
+ i_tee ( &expr) ||
+ i_load_p (im(4 * (1 + locals_count)), &expr) ||
+ i_store_p(im(4 * (2 + all_locals_count - 2)), &expr) ||
+ i_load_p (im(0), &expr) ||
+ i_store (im(STACK_FRAME_BACKUP_ADDR), &expr))
+ goto fail;
+ } else {
+ /* It's a bit shorter if we don't return anything */
+ if (i_load (im(STACK_FRAME_BACKUP_ADDR), &expr) ||
+ i_tee ( &expr) ||
+ i_tee ( &expr) ||
+ i_load_p (im(4 * (1 + locals_count)), &expr) ||
+ i_store_p(im(4 * (2 + all_locals_count - 1)), &expr) ||
+ i_load_p (im(0), &expr) ||
+ i_store (im(STACK_FRAME_BACKUP_ADDR), &expr))
+ goto fail;
+
+ }
+
+ i = locals_count + args_count + 2 + (function->type->result ? 0 : 1);
+
+ while (i--) {
+ if (i_drop(&expr))
+ goto fail;
+ }
+
+ if (i_ret(&expr))
+ goto fail;
+
+ function->translated_body = expr;
+
+ return 0;
+
+fail:
+ PRERR("Couldn't translate function to stack machine\n");
+
+ free_expr(expr);
+
+ return -1;
+}
diff --git a/tools/wasm.h b/tools/wasm.h
new file mode 100644
index 0000000..fc6a910
--- /dev/null
+++ b/tools/wasm.h
@@ -0,0 +1,22 @@
+#define SECTION_CUSTOM 0
+#define SECTION_TYPE 1
+#define SECTION_IMPORT 2
+#define SECTION_FUNCTION 3
+#define SECTION_TABLE 4
+#define SECTION_MEMORY 5
+#define SECTION_GLOBAL 6
+#define SECTION_EXPORT 7
+#define SECTION_START 8
+#define SECTION_ELEMENT 9
+#define SECTION_CODE 10
+#define SECTION_DATA 11
+
+#define VALTYPE_I32 0x7F
+#define VALTYPE_I64 0x7E
+#define VALTYPE_F32 0x7D
+#define VALTYPE_F64 0x7C
+
+#define EXPORT_FUNCIDX 0x00
+#define EXPORT_TABLEIDX 0x01
+#define EXPORT_MEMIDX 0x02
+#define EXPORT_GLOBALIDX 0x03
diff --git a/tools/wasm_compile.c b/tools/wasm_compile.c
new file mode 100644
index 0000000..64988c8
--- /dev/null
+++ b/tools/wasm_compile.c
@@ -0,0 +1,72 @@
+#include "wasm_compile.h"
+
+void print_instructions(uint32_t count, uint16_t instructions[count])
+{
+ uint32_t i;
+ char binary[17];
+ uint16_t instruction;
+ int j;
+
+ binary[16] = '\0';
+
+ for (i = 0; i < count; i++) {
+ instruction = instructions[i];
+ j = 16;
+
+ while (j--) {
+ binary[j] = (instruction & 1) ? '1' : '0';
+ instruction >>= 1;
+ }
+
+ puts(binary);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ FILE *handle = NULL;
+ struct module *module = NULL;
+ uint16_t *translated_instructions = NULL;
+ char retval = -1;
+
+ if (argc < 2) {
+ PRERR("Please provide name of the file to parse on the command line\n");
+ goto fail;
+ }
+
+ handle = fopen(argv[1], "r");
+
+ if (!handle) {
+ PRERR("Couldn't open '%s'\n", argv[1]);
+ goto fail;
+ }
+
+ module = parse_module(handle);
+
+ if (!module)
+ goto fail;
+
+ translated_instructions = calloc(1, CODE_TOP_ADDR);
+
+ if (!translated_instructions) {
+ PRERR(MSG_ALLOC_FAIL(CODE_TOP_ADDR));
+ goto fail;
+ }
+
+ if (assemble(CODE_TOP_ADDR / 2, translated_instructions, module))
+ goto fail;
+
+ print_instructions(CODE_TOP_ADDR / 2, translated_instructions);
+
+ retval = 0;
+
+fail:
+ if (handle)
+ fclose(handle);
+
+ free_module(module);
+
+ free(translated_instructions);
+
+ return retval;
+}
diff --git a/tools/wasm_compile.h b/tools/wasm_compile.h
new file mode 100644
index 0000000..3412b2d
--- /dev/null
+++ b/tools/wasm_compile.h
@@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#define STACK_FRAME_BACKUP_ADDR 0x0FFFFC
+#define STACK_TOP_ADDR 0x0FFFFC
+#define CODE_BOTTOM_ADDR 0x000000
+#define CODE_TOP_ADDR 0x000200
+#define MEMORY_BOTTOM_ADDR 0x000200
+
+/* Error messages */
+#define MSG_SIZE_OVERFLOW "Number overflows size_t\n"
+#define MSG_EOF "Unexpected end of bytes\n"
+#define MSG_BAD_NUM_ENC "Improper number encoding\n"
+#define MSG_BAD_NUM "Couldn't parse number\n"
+#define MSG_BAD_SIZE "Couldn't compute size to allocate\n"
+#define MSG_ALLOC_FAIL(n) "Failed to allocate %lu bytes\n", (unsigned long) (n)
+#define MSG_BAD(nam, val) "Found 0x%02hhx instead of %s\n", (char) (val), nam
+#define MSG_BYTES_REMAIN "Didn't use up all bytes when parsing\n"
+#define MSG_SEEK_FAIL "Couldn't navigate the file\n"
+#define MSG_BAD_IDX(nam) "Got %s out of range\n", nam
+
+#define PRERR(...) fprintf(stderr, __VA_ARGS__)
+
+struct functype {
+ uint32_t arguments_count;
+ char *arguments;
+
+ char result;
+};
+
+struct function {
+ struct functype *type;
+
+ uint32_t locals_count;
+ char *locals;
+
+ struct instruction *translated_body;
+ uint32_t start_addr;
+};
+
+struct export {
+ char *name;
+ char desc;
+ uint32_t idx;
+};
+
+struct module {
+ uint32_t functypes_count;
+ struct functype *functypes;
+ uint32_t functions_count;
+ struct function *functions;
+ enum {
+ MEM_NONE = 0,
+ MEM_MIN,
+ MEM_MIN_MAX
+ } memory_type;
+ uint32_t mem_min, mem_max;
+ uint32_t exports_count;
+ struct export *exports;
+};
+
+int leb_u32(FILE *handle, uint32_t *result);
+
+void free_expr(struct instruction *expr);
+
+void free_module(struct module *module);
+
+struct module *parse_module(FILE *handle);
+
+int translate(FILE *handle, struct function *function, struct module *module);
+
+int assemble(uint32_t memory_size, uint16_t memory[memory_size],
+ struct module *module);