aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.example35
-rw-r--r--Makefile.util5
-rw-r--r--examples/example3a_spi_wasm/Makefile4
-rw-r--r--examples/example3a_spi_wasm/data.txt40
-rw-r--r--examples/example3a_spi_wasm/instructions.wat115
l---------examples/example3b_spi_tclasm/Makefile1
l---------examples/example3b_spi_tclasm/data.txt1
-rw-r--r--examples/example3b_spi_tclasm/instructions.s.tcl142
-rw-r--r--examples/example_toplevel.v14
-rw-r--r--models/soc_with_peripherals.v2
10 files changed, 349 insertions, 10 deletions
diff --git a/Makefile.example b/Makefile.example
index c85dc8f..484c279 100644
--- a/Makefile.example
+++ b/Makefile.example
@@ -13,28 +13,49 @@ FONT := $(DESIGN_DIR)/font.mem
IVFLAGS += -I$(PROJ_DIR)/include/
-ROM_DEFINES = \
+EMBEDDED_ROM_DEFINES = \
-DROM_WORDS_COUNT=$(call FILE_LINES,instructions.mem) \
-DEMBEDDED_ROM_FILE=\"instructions.mem\" \
-DFONT_FILE=\"$(FONT)\"
+ifdef FLASH_DATA
+INCLUDE_SPI_IMAGE = 1
+
+# We put some random data in the image so as not to waste time generating
+# bitstream when we're only working in a simulator.
+spi.mem : $(PROJ_DIR)/tools/bin2hex $(FLASH_DATA)
+ (dd if=/dev/urandom bs=135100 count=1; cat $(FLASH_DATA)) \
+ 2>/dev/null | $< > $@
+else
+FLASH_DATA = /dev/null
+endif
+
+ifdef INCLUDE_SPI_IMAGE
+SPI_ROM_DEFINES = \
+ -DSPI_ROM_FILE=\"spi.mem\" \
+ -DSPI_ROM_WORDS_COUNT=$(call FILE_LINES,spi.mem)
+
+example.vvp : spi.mem
+simulate : spi.mem
+endif
+
example.vvp : $(DESIGN_DIR)/*.v soc_with_peripherals.v flash_memory.v sram.v \
vga_display.v example_toplevel.v $(FONT) messages.vh \
instructions.mem
- $(IV) $(IVFLAGS) $(SIMFLAGS) -DSIMULATION $(ROM_DEFINES) \
- -s example $(filter %.v,$^) -o $@
+ $(IV) $(IVFLAGS) $(SIMFLAGS) -DSIMULATION $(EMBEDDED_ROM_DEFINES) \
+ $(SPI_ROM_DEFINES) -s example $(filter %.v,$^) -o $@
simulate : example.vvp
$(VVP) $<
if [ -f VGAdump.mem ]; then $(MAKE) VGAdump.ppm; fi
design.v : instructions.mem $(DESIGN_DIR)/*.v $(FONT)
- $(IV) $(IVFLAGS) $(ROM_DEFINES) -E $(filter %.v,$^) -o $@
+ $(IV) $(IVFLAGS) $(EMBEDDED_ROM_DEFINES) -E $(filter %.v,$^) -o $@
clean :
find . -name "*.vvp" -delete
- rm $(call FIND_GENERATED_FILES,.) $(addprefix design.,v json asc bin) \
- $(addsuffix .log,yosys pnr) VGAdump.mem VGAdump.ppm \
- 2>/dev/null || true
+ rm $(call FIND_GENERATED_FILES,.) $(addsuffix .log,yosys pnr) \
+ $(addprefix design.,v json asc bin) spi.mem \
+ VGAdump.mem VGAdump.ppm 2>/dev/null || true
.PHONY : def simulate clean
diff --git a/Makefile.util b/Makefile.util
index 88939b1..85f14a9 100644
--- a/Makefile.util
+++ b/Makefile.util
@@ -21,8 +21,11 @@ design.asc pnr.log: $(PCF) design.json
$(PNR) --hx8k --asc design.asc --pcf $< --json design.json \
--package ct256 > pnr.log 2>&1
-design.bin : design.asc
+design.bin : design.asc $(FLASH_DATA)
$(ICEPACK) $< $@
+ifdef FLASH_DATA
+ cat $(FLASH_DATA) >> $@
+endif
prog : design.bin
sudo iceprogduino $<
diff --git a/examples/example3a_spi_wasm/Makefile b/examples/example3a_spi_wasm/Makefile
new file mode 100644
index 0000000..0eff969
--- /dev/null
+++ b/examples/example3a_spi_wasm/Makefile
@@ -0,0 +1,4 @@
+SIMFLAGS += -DFINISH_ON_LED1=1 -DFINISH_ON_LED2=1 -DFINISH_ON_IMAGE_WRITES=1
+FLASH_DATA = data.txt
+
+include ../../Makefile.example
diff --git a/examples/example3a_spi_wasm/data.txt b/examples/example3a_spi_wasm/data.txt
new file mode 100644
index 0000000..2200f6b
--- /dev/null
+++ b/examples/example3a_spi_wasm/data.txt
@@ -0,0 +1,40 @@
+WebAssembly or Wasm standard developed by W3C Community Group defines a bytecode
+format for executable programs, as well as its text representation. The goal of the bytecode is to
+enable efficient interpretation and keep the size of the binary small. Although it has been created
+mainly for use on web pages, it is suited for other environments as well, including embedded
+targets. Programming languages, even those with manual memory management and pointer
+arithmetics, can be compiled to Wasm bytecode. Hence, a standard-conforming WebAssembly
+interpreter is capable of running code written in any of the languages, for which a compiler
+exists. This currently includes C, C++, C#, Rust and others. Wasm bytecode runs on a virtual
+stack machine. Most existing environments either interpret it directly or use JIT compilation.
+The goal of this thesis is to create a laboratory station for execution of WebAssembly on a
+programmable logic device. The client is a person responsible for the equipment of a laboratory.
+Project’s codename is WMC - WebAssembly Machine in Circuitry.
+
+
+WebAsm interpretation could possibly be made faster with hardware designed specifically for
+this task 1. Such hardware would also benefit from already existing tools for generating and
+working with the bytecode. The entire range of languages, that can be compiled to the bytecode,
+would be immediately available for use on such platform.
+Although a WebAssembly processor would be unique in some sense, it could still be used in a
+fashion similar to other soft processors for programmable logic, enabling sequential execution.
+The processor could be integrated with hardware modules. Such a combination would allow for
+implementing peripheral devices operated by Wasm software. If such WebAssembly processor
+proves speed-efficient, it can be implemented in application-specific integrated circuit.
+
+
+In terms of this thesis a laboratory station for execution of WebAssembly in a programmable
+logic device is going to be created. It will consist of a selected FPGA board with a dedicated
+programmer. Means of communication with the device, for example a VGA display or wired
+connection to a computer, are also going to be ensured.
+Wasm machine is going to be implemented in a hardware description language. Included will
+be: the code, tools to generate bitstream and load it to the board, a test program in WebAssembly binary format, means of transfering it to the device and documentation for the product.
+The client will be a person responsible for equipment of a research laboratory. Aside from
+allowing researchers to evaluate Wasm bytecode execution on the FPGA board, the resulting
+product should allow modification of the logic design, for example to add a peripheral module
+or a custom instruction to the processor and perform experiments with it.
+The designed Wasm machine is going to be a stack machine. Its distinguishable parts will be
+control unit, arithmetic logic unit and peripheral interfaces. An in-circuit stack (as opposed to
+stack fully contained in RAM) and floating-point unit might also be added. The machine shall
+make use of memory module residing on the development board. It shall communicate with
+devices external to FPGA chip through peripherals (e.g. VGA module, serial interface module).
diff --git a/examples/example3a_spi_wasm/instructions.wat b/examples/example3a_spi_wasm/instructions.wat
new file mode 100644
index 0000000..f09e080
--- /dev/null
+++ b/examples/example3a_spi_wasm/instructions.wat
@@ -0,0 +1,115 @@
+;; See instructions.wat of soc_print_number test. A lot has been taken from
+;; there.
+;; Relevant addresses are VGA text memory (0xFFC00), VGA regs (0x100600),
+;; lower half of timer reg (0x1BFC08), SPI memory (0x13FC00),
+;; SPI bytes_to_output reg (0x13FE00), SPI bytes_to_receive reg (0x13FE02)
+;; and SPI operating reg (0x13FE04).
+
+(module
+ (memory 0 2)
+ (func $main
+ (local $transfers i32)
+ (local $offset i32)
+ (local $address i32)
+ (local $index i32)
+
+ ;; power up flash chip
+ ;; set bytes_to_output to 1
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE00) (i32.const 1))
+ ;; set bytes_to_receive to 0
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE02) (i32.const 0))
+ ;; release power-down SPI command
+ (i32.store8 offset=0 align=1
+ (i32.const 0x13FC00) (i32.const 0xAB))
+ ;; start SPI operation
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE04) (i32.const 0x1))
+
+ ;; wait for at least 3000 ns after command gets sent
+ ;; reset the timer
+ (i32.store16 offset=0x0 align=2
+ (i32.const 0x1BFC08) (i32.const 0x0))
+
+ ;; loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns
+ ;; is enough for power-up command to be sent
+ (loop $again
+ (br_if $again (i32.lt_u
+ (i32.load16_u offset=0x0 align=2
+ (i32.const 0x1BFC08))
+ (i32.const 45))))
+
+ ;; we'll transfer 2400 bytes by making 5 transfers of 480 bytes;
+ ;; our SPI peripheral can transfer at most 511 bytes in one SPI command
+ (set_local $transfers (i32.const 0))
+
+ (loop $outer
+ ;; set bytes_to_output to 5
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE00) (i32.const 5))
+ ;; set bytes_to_receive to 480
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE02) (i32.const 480))
+ ;; fast read SPI command
+ (i32.store8 offset=0 align=1
+ (i32.const 0x13FC00) (i32.const 0x0B))
+ ;; prepare address - first, compute current offset
+ (set_local $offset (i32.mul (get_local $transfers)
+ (i32.const 480)))
+ ;; then, add computed offset to base address of 135100
+ (set_local $address (i32.add (get_local $offset)
+ (i32.const 135100)))
+ ;; store the address in big endian
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FC02)
+ (i32.div_u (get_local $address)
+ (i32.const 256)))
+ (i32.store8 offset=0 align=1
+ (i32.const 0x13FC03) (get_local $address))
+ (i32.store8 offset=0 align=1
+ (i32.const 0x13FC01)
+ (i32.div_u (get_local $address)
+ (i32.const 65536)))
+ ;; start SPI operation
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE04) (i32.const 0x1))
+ ;; force wait for operation completion
+ (i32.store16 offset=0 align=2
+ (i32.const 0x13FE00) (i32.const 0))
+
+ ;; assume transferred data to be ascii text and print it to screen
+ ;; initialize index to 0
+ (set_local $index (i32.const 0))
+ ;; copy characters in a loop
+ (loop $inner
+ ;; copy 4 characters to VGA memory
+ (i32.store offset=0xFFC00 align=4
+ (i32.add (get_local $index)
+ (get_local $offset))
+ (i32.load offset=0x13FC00 align=4
+ (get_local $index)))
+ ;; increase index
+ (set_local $index (i32.add (get_local $index)
+ (i32.const 4)))
+ ;; loop condition
+ (br_if $inner (i32.lt_u
+ (get_local $index)
+ (i32.const 480))))
+
+ ;; increase transfers count
+ (set_local $transfers (i32.add (get_local $transfers)
+ (i32.const 1)))
+
+ ;; switch LED2
+ (i32.store16 offset=0x0 align=2
+ (i32.const 0x1BFC06)
+ (i32.rem_u (get_local $transfers) (i32.const 2)))
+
+ ;; if less than 5 transfers were done, continue with another one
+ (br_if $outer (i32.lt_u (get_local $transfers) (i32.const 5))))
+
+ ;; write a non-zero value to the VGA power-on reg at 0x100600 (0x100A00)
+ (i32.store16 offset=0x0 align=2
+ (i32.const 0x100600) (i32.const 0x1)))
+ (export "main" (func $main)))
diff --git a/examples/example3b_spi_tclasm/Makefile b/examples/example3b_spi_tclasm/Makefile
new file mode 120000
index 0000000..00e0a5f
--- /dev/null
+++ b/examples/example3b_spi_tclasm/Makefile
@@ -0,0 +1 @@
+../example3a_spi_wasm/Makefile \ No newline at end of file
diff --git a/examples/example3b_spi_tclasm/data.txt b/examples/example3b_spi_tclasm/data.txt
new file mode 120000
index 0000000..b67d943
--- /dev/null
+++ b/examples/example3b_spi_tclasm/data.txt
@@ -0,0 +1 @@
+../example3a_spi_wasm/data.txt \ No newline at end of file
diff --git a/examples/example3b_spi_tclasm/instructions.s.tcl b/examples/example3b_spi_tclasm/instructions.s.tcl
new file mode 100644
index 0000000..294a73e
--- /dev/null
+++ b/examples/example3b_spi_tclasm/instructions.s.tcl
@@ -0,0 +1,142 @@
+### Relevant addresses:
+### * h140000 - h1401FF - SPI data transfer memory
+### * h140200 - h140201 - SPI bytes_to_output reg
+### * h140202 - h140203 - SPI bytes_to_receive reg
+### * h140204 - h140207 - SPI operating reg
+### * h1C0008 - h1C0009 - low bits of timer reg
+### * h100000 - h1009FF - VGA text memory
+### * h100A00 - h100A01 - VGA power-on register
+### * h1C0006 - h1C0007 - LED2 register
+
+### The bitstream binary itself is 135100 bytes big. We append our data
+### at the and of this binary file, so it can later be found at address
+### 135100 on the flash chip.
+
+set_sp h100000; # 2 16-bit instructions
+
+### power up flash ship
+## set bytes_to_output to 1
+const 1; # 1 16-bit instruction
+storew h140200; # 2 16-bit instructions
+## set bytes_to_receive to 0
+const 0; # 1 16-bit instruction
+storew h140202; # 2 16-bit instructions
+## release power-down SPI command
+const 0xAB; # 2 16-bit instructions
+storeb h140000; # 2 16-bit instructions
+## start SPI operation
+const 1; # 1 16-bit instruction
+storew h140204; # 2 16-bit instructions
+
+### wait for at least 3000 ns after command gets sent
+## reset the timer
+const 0; # 1 16-bit instruction
+storew h1C0008; # 2 16-bit instructions
+
+## loop until 45 ticks pass; 45 ticks is 3600 ns; additional 600ns is enough
+## for power-up command to be sent
+# there were 18 16-bit instructions up to this point, so loop address is 36
+loadwzx h1C0008; # 2 16-bit instructions
+const 45; # 1 16-bit instruction
+lt; # 1 16-bit instruction
+cond_jump 36; # 1 16-bit instruction
+
+### we'll transfer a total of 2400 bytes by making 5 transfers of 480 bytes;
+### our SPI peripheral is capable of transfering at most 511 bytes in one go
+## initialize transfers counter
+const 0; # 1 16-bit instruction
+storew h080000; # 2 16-bit instructions
+
+### transfers loop starts here, it'll execute 5 times
+# there were 26 16-bit instructions up to this point, so loop address is 52
+### read 480 bytes from SPI
+## set bytes_to_output to 5
+const 5; # 1 16-bit instruction
+storew h140200; # 2 16-bit instructions
+## set bytes_to_receive to 480
+const 480; # 2 16-bit instructions
+storew h140202; # 2 16-bit instructions
+## fast read SPI command
+const 0x0B; # 1 16-bit instruction
+storeb h140000; # 2 16-bit instructions
+## prepare address; 135100 is base address of our data inside flash memory
+const 135100; # 2 16-bit instructions
+## compute the offset of bytes, that have already been read
+loadwzx h080000; # 2 16-bit instructions
+const 480; # 2 16-bit instructions
+mul; # 1 16-bit instruction
+## also store the computed offset for later
+storew h080002; # 2 16-bit instructions
+## add the offset
+loadwzx h080002; # 2 16-bit instructions
+add; # 1 16-bit instruction
+## store the address in big endian
+store h080008; # 2 16-bit instructions
+load h080008; # 2 16-bit instructions
+const 256; # 2 16-bit instructions
+div; # 1 16-bit instruction
+storew h140002; # 2 16-bit instructions
+load h080008; # 2 16-bit instructions
+storeb h140003; # 2 16-bit instructions
+load h080008; # 2 16-bit instructions
+const 65536; # 2 16-bit instructions
+div; # 1 16-bit instruction
+storeb h140001; # 2 16-bit instructions
+## start SPI operation
+const 1; # 1 16-bit instruction
+storew h140204; # 2 16-bit instructions
+
+### force wait for operation completion
+const 0; # 1 16-bit instruction
+storew h140200; # 2 16-bit instructions
+
+### assume data to be ascii characters and print it to screen
+### initialize index to 0
+const 0; # 1 16-bit instruction
+storew h080004; # 2 16-bit instructions
+
+## print characters in a loop
+# there were 77 16-bit instructions up to this point, so loop address is 154
+## load current index value, get 2 copies of it, add offset to one copy
+loadwzx h080004
+loadwzx h080002
+add
+loadwzx h080004
+## load 4 bytes from SPI memory
+load+ h140000
+## write to VGA memory
+store+ h100000
+## increase index
+loadwzx h080004
+const 4
+add
+storew h080004
+## compare index to 479
+loadwzx h080004
+const 479
+lt
+cond_jump 154
+
+## increase transfers count
+loadwzx h080000
+const 1
+add
+storew h080000
+
+## switch LED2
+loadwzx h080000
+const 2
+rem
+storew h1C0006
+
+## if less than 5 transfers were done, continue with another one
+loadwzx h080000
+const 5
+lt
+cond_jump 52
+
+### after loops, start generating VGA output
+const -1
+store h100A00
+
+halt
diff --git a/examples/example_toplevel.v b/examples/example_toplevel.v
index f6cdf69..f1501e3 100644
--- a/examples/example_toplevel.v
+++ b/examples/example_toplevel.v
@@ -23,6 +23,16 @@
; /* Cause syntax error */
`endif
+`ifndef SPI_ROM_FILE
+ `define SPI_ROM_FILE "/dev/null"
+ `define SPI_ROM_WORDS_COUNT 0
+`else
+ `ifndef SPI_ROM_WORDS_COUNT
+ `error_SPI_ROM_WORDS_COUNT_not_defined
+; /* Cause syntax error */
+ `endif
+`endif
+
`ifndef FINISH_ON_IMAGE_WRITES
`define FINISH_ON_IMAGE_WRITES 0
`else
@@ -67,7 +77,9 @@ module example();
#(
.FONT_FILE(`FONT_FILE),
.EMBEDDED_ROM_WORDS_COUNT(`ROM_WORDS_COUNT),
- .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE)
+ .EMBEDDED_ROM_FILE(`EMBEDDED_ROM_FILE),
+ .SPI_ROM_WORDS_COUNT(`SPI_ROM_WORDS_COUNT),
+ .SPI_ROM_FILE(`SPI_ROM_FILE)
) soc
(
.clock_100mhz(clock_100mhz),
diff --git a/models/soc_with_peripherals.v b/models/soc_with_peripherals.v
index 40ee341..ec92238 100644
--- a/models/soc_with_peripherals.v
+++ b/models/soc_with_peripherals.v
@@ -14,7 +14,7 @@ module soc_with_peripherals
parameter EMBEDDED_ROM_WORDS_COUNT = "error, rom words count not given",
parameter EMBEDDED_ROM_FILE = {DESIGN_DIR, "rom.mem"},
parameter SPI_ROM_WORDS_COUNT = 0,
- parameter SPI_ROM_FILE = "/dev/zero"
+ parameter SPI_ROM_FILE = "/dev/null"
)
(
input wire clock_100mhz,