diff options
-rw-r--r-- | Makefile.example | 35 | ||||
-rw-r--r-- | Makefile.util | 5 | ||||
-rw-r--r-- | examples/example3a_spi_wasm/Makefile | 4 | ||||
-rw-r--r-- | examples/example3a_spi_wasm/data.txt | 40 | ||||
-rw-r--r-- | examples/example3a_spi_wasm/instructions.wat | 115 | ||||
l--------- | examples/example3b_spi_tclasm/Makefile | 1 | ||||
l--------- | examples/example3b_spi_tclasm/data.txt | 1 | ||||
-rw-r--r-- | examples/example3b_spi_tclasm/instructions.s.tcl | 142 | ||||
-rw-r--r-- | examples/example_toplevel.v | 14 | ||||
-rw-r--r-- | models/soc_with_peripherals.v | 2 |
10 files changed, 349 insertions, 10 deletions
diff --git a/Makefile.example b/Makefile.example index c85dc8f..484c279 100644 --- a/Makefile.example +++ b/Makefile.example @@ -13,28 +13,49 @@ FONT := $(DESIGN_DIR)/font.mem IVFLAGS += -I$(PROJ_DIR)/include/ -ROM_DEFINES = \ +EMBEDDED_ROM_DEFINES = \ -DROM_WORDS_COUNT=$(call FILE_LINES,instructions.mem) \ -DEMBEDDED_ROM_FILE=\"instructions.mem\" \ -DFONT_FILE=\"$(FONT)\" +ifdef FLASH_DATA +INCLUDE_SPI_IMAGE = 1 + +# We put some random data in the image so as not to waste time generating +# bitstream when we're only working in a simulator. +spi.mem : $(PROJ_DIR)/tools/bin2hex $(FLASH_DATA) + (dd if=/dev/urandom bs=135100 count=1; cat $(FLASH_DATA)) \ + 2>/dev/null | $< > $@ +else +FLASH_DATA = /dev/null +endif + +ifdef INCLUDE_SPI_IMAGE +SPI_ROM_DEFINES = \ + -DSPI_ROM_FILE=\"spi.mem\" \ + -DSPI_ROM_WORDS_COUNT=$(call FILE_LINES,spi.mem) + +example.vvp : spi.mem +simulate : spi.mem +endif + example.vvp : $(DESIGN_DIR)/*.v soc_with_peripherals.v flash_memory.v sram.v \ vga_display.v example_toplevel.v $(FONT) messages.vh \ instructions.mem - $(IV) $(IVFLAGS) $(SIMFLAGS) -DSIMULATION $(ROM_DEFINES) \ - -s example $(filter %.v,$^) -o $@ + $(IV) $(IVFLAGS) $(SIMFLAGS) -DSIMULATION $(EMBEDDED_ROM_DEFINES) \ + $(SPI_ROM_DEFINES) -s example $(filter %.v,$^) -o $@ simulate : example.vvp $(VVP) $< if [ -f VGAdump.mem ]; then $(MAKE) VGAdump.ppm; fi design.v : instructions.mem $(DESIGN_DIR)/*.v $(FONT) - $(IV) $(IVFLAGS) $(ROM_DEFINES) -E $(filter %.v,$^) -o $@ + $(IV) $(IVFLAGS) $(EMBEDDED_ROM_DEFINES) -E $(filter %.v,$^) -o $@ clean : find . -name "*.vvp" -delete - rm $(call FIND_GENERATED_FILES,.) $(addprefix design.,v json asc bin) \ - $(addsuffix .log,yosys pnr) VGAdump.mem VGAdump.ppm \ - 2>/dev/null || true + rm $(call FIND_GENERATED_FILES,.) $(addsuffix .log,yosys pnr) \ + $(addprefix design.,v json asc bin) spi.mem \ + VGAdump.mem VGAdump.ppm 2>/dev/null || true .PHONY : def simulate clean diff --git a/Makefile.util b/Makefile.util index 88939b1..85f14a9 100644 --- a/Makefile.util +++ b/Makefile.util @@ -21,8 +21,11 @@ design.asc pnr.log: $(PCF) design.json $(PNR) --hx8k --asc design.asc --pcf $< --json design.json \ --package ct256 > pnr.log 2>&1 -design.bin : design.asc +design.bin : design.asc $(FLASH_DATA) $(ICEPACK) $< $@ +ifdef FLASH_DATA + cat $(FLASH_DATA) >> $@ +endif prog : design.bin sudo iceprogduino $< diff --git a/examples/example3a_spi_wasm/Makefile b/examples/example3a_spi_wasm/Makefile new file mode 100644 index 0000000..0eff969 --- /dev/null +++ b/examples/example3a_spi_wasm/Makefile @@ -0,0 +1,4 @@ +SIMFLAGS += -DFINISH_ON_LED1=1 -DFINISH_ON_LED2=1 -DFINISH_ON_IMAGE_WRITES=1 +FLASH_DATA = data.txt + +include ../../Makefile.example diff --git a/examples/example3a_spi_wasm/data.txt b/examples/example3a_spi_wasm/data.txt new file mode 100644 index 0000000..2200f6b --- /dev/null +++ b/examples/example3a_spi_wasm/data.txt @@ -0,0 +1,40 @@ +WebAssembly or Wasm standard developed by W3C Community Group defines a bytecode +format for executable programs, as well as its text representation. The goal of the bytecode is to +enable efficient interpretation and keep the size of the binary small. Although it has been created +mainly for use on web pages, it is suited for other environments as well, including embedded +targets. Programming languages, even those with manual memory management and pointer +arithmetics, can be compiled to Wasm bytecode. Hence, a standard-conforming WebAssembly +interpreter is capable of running code written in any of the languages, for which a compiler +exists. This currently includes C, C++, C#, Rust and others. Wasm bytecode runs on a virtual +stack machine. Most existing environments either interpret it directly or use JIT compilation. +The goal of this thesis is to create a laboratory station for execution of WebAssembly on a +programmable logic device. The client is a person responsible for the equipment of a laboratory. +Project’s codename is WMC - WebAssembly Machine in Circuitry. + + +WebAsm interpretation could possibly be made faster with hardware designed specifically for +this task 1. Such hardware would also benefit from already existing tools for generating and +working with the bytecode. The entire range of languages, that can be compiled to the bytecode, +would be immediately available for use on such platform. +Although a WebAssembly processor would be unique in some sense, it could still be used in a +fashion similar to other soft processors for programmable logic, enabling sequential execution. +The processor could be integrated with hardware modules. Such a combination would allow for +implementing peripheral devices operated by Wasm software. If such WebAssembly processor +proves speed-efficient, it can be implemented in application-specific integrated circuit. + + +In terms of this thesis a laboratory station for execution of WebAssembly in a programmable +logic device is going to be created. It will consist of a selected FPGA board with a dedicated +programmer. Means of communication with the device, for example a VGA display or wired +connection to a computer, are also going to be ensured. +Wasm machine is going to be implemented in a hardware description language. Included will +be: the code, tools to generate bitstream and load it to the board, a test program in WebAssembly binary format, means of transfering it to the device and documentation for the product. +The client will be a person responsible for equipment of a research laboratory. Aside from +allowing researchers to evaluate Wasm bytecode execution on the FPGA board, the resulting +product should allow modification of the logic design, for example to add a peripheral module +or a custom instruction to the processor and perform experiments with it. +The designed Wasm machine is going to be a stack machine. Its distinguishable parts will be +control unit, arithmetic logic unit and peripheral interfaces. An in-circuit stack (as opposed to +stack fully contained in RAM) and floating-point unit might also be added. The machine shall +make use of memory module residing on the development board. It shall communicate with +devices external to FPGA chip through peripherals (e.g. VGA module, serial interface module). diff --git a/examples/example3a_spi_wasm/instructions.wat b/examples/example3a_spi_wasm/instructions.wat new file mode 100644 index 0000000..f09e080 --- /dev/null +++ b/examples/example3a_spi_wasm/instructions.wat @@ -0,0 +1,115 @@ +;; See instructions.wat of soc_print_number test. A lot has been taken from +;; there. +;; Relevant addresses are VGA text memory (0xFFC00), VGA regs (0x100600), +;; lower half of timer reg (0x1BFC08), SPI memory (0x13FC00), +;; SPI bytes_to_output reg (0x13FE00), SPI bytes_to_receive reg (0x13FE02) +;; and SPI operating reg (0x13FE04). + +(module + (memory 0 2) + (func $main + (local $transfers i32) + (local $offset i32) + (local $address i32) + (local $index i32) + + ;; power up flash chip + ;; set bytes_to_output to 1 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 1)) + ;; set bytes_to_receive to 0 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE02) (i32.const 0)) + ;; release power-down SPI command + (i32.store8 offset=0 align=1 + (i32.const 0x13FC00) (i32.const 0xAB)) + ;; start SPI operation + (i32.store16 offset=0 align=2 + (i32.const 0x13FE04) (i32.const 0x1)) + + ;; wait for at least 3000 ns after command gets sent + ;; reset the timer + (i32.store16 offset=0x0 align=2 + (i32.const 0x1BFC08) (i32.const 0x0)) + + ;; loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns + ;; is enough for power-up command to be sent + (loop $again + (br_if $again (i32.lt_u + (i32.load16_u offset=0x0 align=2 + (i32.const 0x1BFC08)) + (i32.const 45)))) + + ;; we'll transfer 2400 bytes by making 5 transfers of 480 bytes; + ;; our SPI peripheral can transfer at most 511 bytes in one SPI command + (set_local $transfers (i32.const 0)) + + (loop $outer + ;; set bytes_to_output to 5 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 5)) + ;; set bytes_to_receive to 480 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE02) (i32.const 480)) + ;; fast read SPI command + (i32.store8 offset=0 align=1 + (i32.const 0x13FC00) (i32.const 0x0B)) + ;; prepare address - first, compute current offset + (set_local $offset (i32.mul (get_local $transfers) + (i32.const 480))) + ;; then, add computed offset to base address of 135100 + (set_local $address (i32.add (get_local $offset) + (i32.const 135100))) + ;; store the address in big endian + (i32.store16 offset=0 align=2 + (i32.const 0x13FC02) + (i32.div_u (get_local $address) + (i32.const 256))) + (i32.store8 offset=0 align=1 + (i32.const 0x13FC03) (get_local $address)) + (i32.store8 offset=0 align=1 + (i32.const 0x13FC01) + (i32.div_u (get_local $address) + (i32.const 65536))) + ;; start SPI operation + (i32.store16 offset=0 align=2 + (i32.const 0x13FE04) (i32.const 0x1)) + ;; force wait for operation completion + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 0)) + + ;; assume transferred data to be ascii text and print it to screen + ;; initialize index to 0 + (set_local $index (i32.const 0)) + ;; copy characters in a loop + (loop $inner + ;; copy 4 characters to VGA memory + (i32.store offset=0xFFC00 align=4 + (i32.add (get_local $index) + (get_local $offset)) + (i32.load offset=0x13FC00 align=4 + (get_local $index))) + ;; increase index + (set_local $index (i32.add (get_local $index) + (i32.const 4))) + ;; loop condition + (br_if $inner (i32.lt_u + (get_local $index) + (i32.const 480)))) + + ;; increase transfers count + (set_local $transfers (i32.add (get_local $transfers) + (i32.const 1))) + + ;; switch LED2 + (i32.store16 offset=0x0 align=2 + (i32.const 0x1BFC06) + (i32.rem_u (get_local $transfers) (i32.const 2))) + + ;; if less than 5 transfers were done, continue with another one + (br_if $outer (i32.lt_u (get_local $transfers) (i32.const 5)))) + + ;; write a non-zero value to the VGA power-on reg at 0x100600 (0x100A00) + (i32.store16 offset=0x0 align=2 + (i32.const 0x100600) (i32.const 0x1))) + (export "main" (func $main))) diff --git a/examples/example3b_spi_tclasm/Makefile b/examples/example3b_spi_tclasm/Makefile new file mode 120000 index 0000000..00e0a5f --- /dev/null +++ b/examples/example3b_spi_tclasm/Makefile @@ -0,0 +1 @@ +../example3a_spi_wasm/Makefile
\ No newline at end of file diff --git a/examples/example3b_spi_tclasm/data.txt b/examples/example3b_spi_tclasm/data.txt new file mode 120000 index 0000000..b67d943 --- /dev/null +++ b/examples/example3b_spi_tclasm/data.txt @@ -0,0 +1 @@ +../example3a_spi_wasm/data.txt
\ No newline at end of file diff --git a/examples/example3b_spi_tclasm/instructions.s.tcl b/examples/example3b_spi_tclasm/instructions.s.tcl new file mode 100644 index 0000000..294a73e --- /dev/null +++ b/examples/example3b_spi_tclasm/instructions.s.tcl @@ -0,0 +1,142 @@ +### Relevant addresses: +### * h140000 - h1401FF - SPI data transfer memory +### * h140200 - h140201 - SPI bytes_to_output reg +### * h140202 - h140203 - SPI bytes_to_receive reg +### * h140204 - h140207 - SPI operating reg +### * h1C0008 - h1C0009 - low bits of timer reg +### * h100000 - h1009FF - VGA text memory +### * h100A00 - h100A01 - VGA power-on register +### * h1C0006 - h1C0007 - LED2 register + +### The bitstream binary itself is 135100 bytes big. We append our data +### at the and of this binary file, so it can later be found at address +### 135100 on the flash chip. + +set_sp h100000; # 2 16-bit instructions + +### power up flash ship +## set bytes_to_output to 1 +const 1; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions +## set bytes_to_receive to 0 +const 0; # 1 16-bit instruction +storew h140202; # 2 16-bit instructions +## release power-down SPI command +const 0xAB; # 2 16-bit instructions +storeb h140000; # 2 16-bit instructions +## start SPI operation +const 1; # 1 16-bit instruction +storew h140204; # 2 16-bit instructions + +### wait for at least 3000 ns after command gets sent +## reset the timer +const 0; # 1 16-bit instruction +storew h1C0008; # 2 16-bit instructions + +## loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns is enough +## for power-up command to be sent +# there were 18 16-bit instructions up to this point, so loop address is 36 +loadwzx h1C0008; # 2 16-bit instructions +const 45; # 1 16-bit instruction +lt; # 1 16-bit instruction +cond_jump 36; # 1 16-bit instruction + +### we'll transfer a total of 2400 bytes by making 5 transfers of 480 bytes; +### our SPI peripheral is capable of transfering at most 511 bytes in one go +## initialize transfers counter +const 0; # 1 16-bit instruction +storew h080000; # 2 16-bit instructions + +### transfers loop starts here, it'll execute 5 times +# there were 26 16-bit instructions up to this point, so loop address is 52 +### read 480 bytes from SPI +## set bytes_to_output to 5 +const 5; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions +## set bytes_to_receive to 480 +const 480; # 2 16-bit instructions +storew h140202; # 2 16-bit instructions +## fast read SPI command +const 0x0B; # 1 16-bit instruction +storeb h140000; # 2 16-bit instructions +## prepare address; 135100 is base address of our data inside flash memory +const 135100; # 2 16-bit instructions +## compute the offset of bytes, that have already been read +loadwzx h080000; # 2 16-bit instructions +const 480; # 2 16-bit instructions +mul; # 1 16-bit instruction +## also store the computed offset for later +storew h080002; # 2 16-bit instructions +## add the offset +loadwzx h080002; # 2 16-bit instructions +add; # 1 16-bit instruction +## store the address in big endian +store h080008; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +const 256; # 2 16-bit instructions +div; # 1 16-bit instruction +storew h140002; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +storeb h140003; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +const 65536; # 2 16-bit instructions +div; # 1 16-bit instruction +storeb h140001; # 2 16-bit instructions +## start SPI operation +const 1; # 1 16-bit instruction +storew h140204; # 2 16-bit instructions + +### force wait for operation completion +const 0; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions + +### assume data to be ascii characters and print it to screen +### initialize index to 0 +const 0; # 1 16-bit instruction +storew h080004; # 2 16-bit instructions + +## print characters in a loop +# there were 77 16-bit instructions up to this point, so loop address is 154 +## load current index value, get 2 copies of it, add offset to one copy +loadwzx h080004 +loadwzx h080002 +add +loadwzx h080004 +## load 4 bytes from SPI memory +load+ h140000 +## write to VGA memory +store+ h100000 +## increase index +loadwzx h080004 +const 4 +add +storew h080004 +## compare index to 479 +loadwzx h080004 +const 479 +lt +cond_jump 154 + +## increase transfers count +loadwzx h080000 +const 1 +add +storew h080000 + +## switch LED2 +loadwzx h080000 +const 2 +rem +storew h1C0006 + +## if less than 5 transfers were done, continue with another one +loadwzx h080000 +const 5 +lt +cond_jump 52 + +### after loops, start generating VGA output +const -1 +store h100A00 + +halt diff --git a/examples/example_toplevel.v b/examples/example_toplevel.v index f6cdf69..f1501e3 100644 --- a/examples/example_toplevel.v +++ b/examples/example_toplevel.v @@ -23,6 +23,16 @@ ; /* Cause syntax error */ `endif +`ifndef SPI_ROM_FILE + `define SPI_ROM_FILE "/dev/null" + `define SPI_ROM_WORDS_COUNT 0 +`else + `ifndef SPI_ROM_WORDS_COUNT + `error_SPI_ROM_WORDS_COUNT_not_defined +; /* Cause syntax error */ + `endif +`endif + `ifndef FINISH_ON_IMAGE_WRITES `define FINISH_ON_IMAGE_WRITES 0 `else @@ -67,7 +77,9 @@ module example(); #( .FONT_FILE(`FONT_FILE), .EMBEDDED_ROM_WORDS_COUNT(`ROM_WORDS_COUNT), - .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE) + .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE), + .SPI_ROM_WORDS_COUNT(`SPI_ROM_WORDS_COUNT), + .SPI_ROM_FILE(`SPI_ROM_FILE) ) soc ( .clock_100mhz(clock_100mhz), diff --git a/models/soc_with_peripherals.v b/models/soc_with_peripherals.v index 40ee341..ec92238 100644 --- a/models/soc_with_peripherals.v +++ b/models/soc_with_peripherals.v @@ -14,7 +14,7 @@ module soc_with_peripherals parameter EMBEDDED_ROM_WORDS_COUNT = "error, rom words count not given", parameter EMBEDDED_ROM_FILE = {DESIGN_DIR, "rom.mem"}, parameter SPI_ROM_WORDS_COUNT = 0, - parameter SPI_ROM_FILE = "/dev/zero" + parameter SPI_ROM_FILE = "/dev/null" ) ( input wire clock_100mhz, |