From 68c80359ba0983bc21a18c0270025be9b441c0bb Mon Sep 17 00:00:00 2001 From: Wojciech Kosior Date: Tue, 29 Dec 2020 19:56:04 +0100 Subject: add the ability to include additional data at the end of bitstream image and prepare an example, that reads thic data through SPI and displays it --- examples/example3a_spi_wasm/Makefile | 4 + examples/example3a_spi_wasm/data.txt | 40 +++++++ examples/example3a_spi_wasm/instructions.wat | 115 ++++++++++++++++++ examples/example3b_spi_tclasm/Makefile | 1 + examples/example3b_spi_tclasm/data.txt | 1 + examples/example3b_spi_tclasm/instructions.s.tcl | 142 +++++++++++++++++++++++ examples/example_toplevel.v | 14 ++- 7 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 examples/example3a_spi_wasm/Makefile create mode 100644 examples/example3a_spi_wasm/data.txt create mode 100644 examples/example3a_spi_wasm/instructions.wat create mode 120000 examples/example3b_spi_tclasm/Makefile create mode 120000 examples/example3b_spi_tclasm/data.txt create mode 100644 examples/example3b_spi_tclasm/instructions.s.tcl (limited to 'examples') diff --git a/examples/example3a_spi_wasm/Makefile b/examples/example3a_spi_wasm/Makefile new file mode 100644 index 0000000..0eff969 --- /dev/null +++ b/examples/example3a_spi_wasm/Makefile @@ -0,0 +1,4 @@ +SIMFLAGS += -DFINISH_ON_LED1=1 -DFINISH_ON_LED2=1 -DFINISH_ON_IMAGE_WRITES=1 +FLASH_DATA = data.txt + +include ../../Makefile.example diff --git a/examples/example3a_spi_wasm/data.txt b/examples/example3a_spi_wasm/data.txt new file mode 100644 index 0000000..2200f6b --- /dev/null +++ b/examples/example3a_spi_wasm/data.txt @@ -0,0 +1,40 @@ +WebAssembly or Wasm standard developed by W3C Community Group defines a bytecode +format for executable programs, as well as its text representation. The goal of the bytecode is to +enable efficient interpretation and keep the size of the binary small. Although it has been created +mainly for use on web pages, it is suited for other environments as well, including embedded +targets. Programming languages, even those with manual memory management and pointer +arithmetics, can be compiled to Wasm bytecode. Hence, a standard-conforming WebAssembly +interpreter is capable of running code written in any of the languages, for which a compiler +exists. This currently includes C, C++, C#, Rust and others. Wasm bytecode runs on a virtual +stack machine. Most existing environments either interpret it directly or use JIT compilation. +The goal of this thesis is to create a laboratory station for execution of WebAssembly on a +programmable logic device. The client is a person responsible for the equipment of a laboratory. +Project’s codename is WMC - WebAssembly Machine in Circuitry. + + +WebAsm interpretation could possibly be made faster with hardware designed specifically for +this task 1. Such hardware would also benefit from already existing tools for generating and +working with the bytecode. The entire range of languages, that can be compiled to the bytecode, +would be immediately available for use on such platform. +Although a WebAssembly processor would be unique in some sense, it could still be used in a +fashion similar to other soft processors for programmable logic, enabling sequential execution. +The processor could be integrated with hardware modules. Such a combination would allow for +implementing peripheral devices operated by Wasm software. If such WebAssembly processor +proves speed-efficient, it can be implemented in application-specific integrated circuit. + + +In terms of this thesis a laboratory station for execution of WebAssembly in a programmable +logic device is going to be created. It will consist of a selected FPGA board with a dedicated +programmer. Means of communication with the device, for example a VGA display or wired +connection to a computer, are also going to be ensured. +Wasm machine is going to be implemented in a hardware description language. Included will +be: the code, tools to generate bitstream and load it to the board, a test program in WebAssembly binary format, means of transfering it to the device and documentation for the product. +The client will be a person responsible for equipment of a research laboratory. Aside from +allowing researchers to evaluate Wasm bytecode execution on the FPGA board, the resulting +product should allow modification of the logic design, for example to add a peripheral module +or a custom instruction to the processor and perform experiments with it. +The designed Wasm machine is going to be a stack machine. Its distinguishable parts will be +control unit, arithmetic logic unit and peripheral interfaces. An in-circuit stack (as opposed to +stack fully contained in RAM) and floating-point unit might also be added. The machine shall +make use of memory module residing on the development board. It shall communicate with +devices external to FPGA chip through peripherals (e.g. VGA module, serial interface module). diff --git a/examples/example3a_spi_wasm/instructions.wat b/examples/example3a_spi_wasm/instructions.wat new file mode 100644 index 0000000..f09e080 --- /dev/null +++ b/examples/example3a_spi_wasm/instructions.wat @@ -0,0 +1,115 @@ +;; See instructions.wat of soc_print_number test. A lot has been taken from +;; there. +;; Relevant addresses are VGA text memory (0xFFC00), VGA regs (0x100600), +;; lower half of timer reg (0x1BFC08), SPI memory (0x13FC00), +;; SPI bytes_to_output reg (0x13FE00), SPI bytes_to_receive reg (0x13FE02) +;; and SPI operating reg (0x13FE04). + +(module + (memory 0 2) + (func $main + (local $transfers i32) + (local $offset i32) + (local $address i32) + (local $index i32) + + ;; power up flash chip + ;; set bytes_to_output to 1 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 1)) + ;; set bytes_to_receive to 0 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE02) (i32.const 0)) + ;; release power-down SPI command + (i32.store8 offset=0 align=1 + (i32.const 0x13FC00) (i32.const 0xAB)) + ;; start SPI operation + (i32.store16 offset=0 align=2 + (i32.const 0x13FE04) (i32.const 0x1)) + + ;; wait for at least 3000 ns after command gets sent + ;; reset the timer + (i32.store16 offset=0x0 align=2 + (i32.const 0x1BFC08) (i32.const 0x0)) + + ;; loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns + ;; is enough for power-up command to be sent + (loop $again + (br_if $again (i32.lt_u + (i32.load16_u offset=0x0 align=2 + (i32.const 0x1BFC08)) + (i32.const 45)))) + + ;; we'll transfer 2400 bytes by making 5 transfers of 480 bytes; + ;; our SPI peripheral can transfer at most 511 bytes in one SPI command + (set_local $transfers (i32.const 0)) + + (loop $outer + ;; set bytes_to_output to 5 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 5)) + ;; set bytes_to_receive to 480 + (i32.store16 offset=0 align=2 + (i32.const 0x13FE02) (i32.const 480)) + ;; fast read SPI command + (i32.store8 offset=0 align=1 + (i32.const 0x13FC00) (i32.const 0x0B)) + ;; prepare address - first, compute current offset + (set_local $offset (i32.mul (get_local $transfers) + (i32.const 480))) + ;; then, add computed offset to base address of 135100 + (set_local $address (i32.add (get_local $offset) + (i32.const 135100))) + ;; store the address in big endian + (i32.store16 offset=0 align=2 + (i32.const 0x13FC02) + (i32.div_u (get_local $address) + (i32.const 256))) + (i32.store8 offset=0 align=1 + (i32.const 0x13FC03) (get_local $address)) + (i32.store8 offset=0 align=1 + (i32.const 0x13FC01) + (i32.div_u (get_local $address) + (i32.const 65536))) + ;; start SPI operation + (i32.store16 offset=0 align=2 + (i32.const 0x13FE04) (i32.const 0x1)) + ;; force wait for operation completion + (i32.store16 offset=0 align=2 + (i32.const 0x13FE00) (i32.const 0)) + + ;; assume transferred data to be ascii text and print it to screen + ;; initialize index to 0 + (set_local $index (i32.const 0)) + ;; copy characters in a loop + (loop $inner + ;; copy 4 characters to VGA memory + (i32.store offset=0xFFC00 align=4 + (i32.add (get_local $index) + (get_local $offset)) + (i32.load offset=0x13FC00 align=4 + (get_local $index))) + ;; increase index + (set_local $index (i32.add (get_local $index) + (i32.const 4))) + ;; loop condition + (br_if $inner (i32.lt_u + (get_local $index) + (i32.const 480)))) + + ;; increase transfers count + (set_local $transfers (i32.add (get_local $transfers) + (i32.const 1))) + + ;; switch LED2 + (i32.store16 offset=0x0 align=2 + (i32.const 0x1BFC06) + (i32.rem_u (get_local $transfers) (i32.const 2))) + + ;; if less than 5 transfers were done, continue with another one + (br_if $outer (i32.lt_u (get_local $transfers) (i32.const 5)))) + + ;; write a non-zero value to the VGA power-on reg at 0x100600 (0x100A00) + (i32.store16 offset=0x0 align=2 + (i32.const 0x100600) (i32.const 0x1))) + (export "main" (func $main))) diff --git a/examples/example3b_spi_tclasm/Makefile b/examples/example3b_spi_tclasm/Makefile new file mode 120000 index 0000000..00e0a5f --- /dev/null +++ b/examples/example3b_spi_tclasm/Makefile @@ -0,0 +1 @@ +../example3a_spi_wasm/Makefile \ No newline at end of file diff --git a/examples/example3b_spi_tclasm/data.txt b/examples/example3b_spi_tclasm/data.txt new file mode 120000 index 0000000..b67d943 --- /dev/null +++ b/examples/example3b_spi_tclasm/data.txt @@ -0,0 +1 @@ +../example3a_spi_wasm/data.txt \ No newline at end of file diff --git a/examples/example3b_spi_tclasm/instructions.s.tcl b/examples/example3b_spi_tclasm/instructions.s.tcl new file mode 100644 index 0000000..294a73e --- /dev/null +++ b/examples/example3b_spi_tclasm/instructions.s.tcl @@ -0,0 +1,142 @@ +### Relevant addresses: +### * h140000 - h1401FF - SPI data transfer memory +### * h140200 - h140201 - SPI bytes_to_output reg +### * h140202 - h140203 - SPI bytes_to_receive reg +### * h140204 - h140207 - SPI operating reg +### * h1C0008 - h1C0009 - low bits of timer reg +### * h100000 - h1009FF - VGA text memory +### * h100A00 - h100A01 - VGA power-on register +### * h1C0006 - h1C0007 - LED2 register + +### The bitstream binary itself is 135100 bytes big. We append our data +### at the and of this binary file, so it can later be found at address +### 135100 on the flash chip. + +set_sp h100000; # 2 16-bit instructions + +### power up flash ship +## set bytes_to_output to 1 +const 1; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions +## set bytes_to_receive to 0 +const 0; # 1 16-bit instruction +storew h140202; # 2 16-bit instructions +## release power-down SPI command +const 0xAB; # 2 16-bit instructions +storeb h140000; # 2 16-bit instructions +## start SPI operation +const 1; # 1 16-bit instruction +storew h140204; # 2 16-bit instructions + +### wait for at least 3000 ns after command gets sent +## reset the timer +const 0; # 1 16-bit instruction +storew h1C0008; # 2 16-bit instructions + +## loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns is enough +## for power-up command to be sent +# there were 18 16-bit instructions up to this point, so loop address is 36 +loadwzx h1C0008; # 2 16-bit instructions +const 45; # 1 16-bit instruction +lt; # 1 16-bit instruction +cond_jump 36; # 1 16-bit instruction + +### we'll transfer a total of 2400 bytes by making 5 transfers of 480 bytes; +### our SPI peripheral is capable of transfering at most 511 bytes in one go +## initialize transfers counter +const 0; # 1 16-bit instruction +storew h080000; # 2 16-bit instructions + +### transfers loop starts here, it'll execute 5 times +# there were 26 16-bit instructions up to this point, so loop address is 52 +### read 480 bytes from SPI +## set bytes_to_output to 5 +const 5; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions +## set bytes_to_receive to 480 +const 480; # 2 16-bit instructions +storew h140202; # 2 16-bit instructions +## fast read SPI command +const 0x0B; # 1 16-bit instruction +storeb h140000; # 2 16-bit instructions +## prepare address; 135100 is base address of our data inside flash memory +const 135100; # 2 16-bit instructions +## compute the offset of bytes, that have already been read +loadwzx h080000; # 2 16-bit instructions +const 480; # 2 16-bit instructions +mul; # 1 16-bit instruction +## also store the computed offset for later +storew h080002; # 2 16-bit instructions +## add the offset +loadwzx h080002; # 2 16-bit instructions +add; # 1 16-bit instruction +## store the address in big endian +store h080008; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +const 256; # 2 16-bit instructions +div; # 1 16-bit instruction +storew h140002; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +storeb h140003; # 2 16-bit instructions +load h080008; # 2 16-bit instructions +const 65536; # 2 16-bit instructions +div; # 1 16-bit instruction +storeb h140001; # 2 16-bit instructions +## start SPI operation +const 1; # 1 16-bit instruction +storew h140204; # 2 16-bit instructions + +### force wait for operation completion +const 0; # 1 16-bit instruction +storew h140200; # 2 16-bit instructions + +### assume data to be ascii characters and print it to screen +### initialize index to 0 +const 0; # 1 16-bit instruction +storew h080004; # 2 16-bit instructions + +## print characters in a loop +# there were 77 16-bit instructions up to this point, so loop address is 154 +## load current index value, get 2 copies of it, add offset to one copy +loadwzx h080004 +loadwzx h080002 +add +loadwzx h080004 +## load 4 bytes from SPI memory +load+ h140000 +## write to VGA memory +store+ h100000 +## increase index +loadwzx h080004 +const 4 +add +storew h080004 +## compare index to 479 +loadwzx h080004 +const 479 +lt +cond_jump 154 + +## increase transfers count +loadwzx h080000 +const 1 +add +storew h080000 + +## switch LED2 +loadwzx h080000 +const 2 +rem +storew h1C0006 + +## if less than 5 transfers were done, continue with another one +loadwzx h080000 +const 5 +lt +cond_jump 52 + +### after loops, start generating VGA output +const -1 +store h100A00 + +halt diff --git a/examples/example_toplevel.v b/examples/example_toplevel.v index f6cdf69..f1501e3 100644 --- a/examples/example_toplevel.v +++ b/examples/example_toplevel.v @@ -23,6 +23,16 @@ ; /* Cause syntax error */ `endif +`ifndef SPI_ROM_FILE + `define SPI_ROM_FILE "/dev/null" + `define SPI_ROM_WORDS_COUNT 0 +`else + `ifndef SPI_ROM_WORDS_COUNT + `error_SPI_ROM_WORDS_COUNT_not_defined +; /* Cause syntax error */ + `endif +`endif + `ifndef FINISH_ON_IMAGE_WRITES `define FINISH_ON_IMAGE_WRITES 0 `else @@ -67,7 +77,9 @@ module example(); #( .FONT_FILE(`FONT_FILE), .EMBEDDED_ROM_WORDS_COUNT(`ROM_WORDS_COUNT), - .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE) + .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE), + .SPI_ROM_WORDS_COUNT(`SPI_ROM_WORDS_COUNT), + .SPI_ROM_FILE(`SPI_ROM_FILE) ) soc ( .clock_100mhz(clock_100mhz), -- cgit v1.2.3