From dc41391380effbe0d16d024e290b87a7f5d39bf2 Mon Sep 17 00:00:00 2001 From: Wojciech Kosior Date: Sat, 5 Sep 2020 12:58:42 +0200 Subject: start another attempt for good stack machine design --- design/stack_machine.v | 476 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 design/stack_machine.v diff --git a/design/stack_machine.v b/design/stack_machine.v new file mode 100644 index 0000000..d0ca36b --- /dev/null +++ b/design/stack_machine.v @@ -0,0 +1,476 @@ +`default_nettype none + +module stack_machine_new + ( + /* Those 2 are supposed to be common for both wishbone interfaces */ + input wire CLK_I, + input wire RST_I, + + /* Instruction reading interface */ + input wire I_ACK_I, + output reg [19:0] I_ADR_O, + input wire [15:0] I_DAT_I, + output reg [15:0] I_DAT_O, /* Not used, interface read-only */ + output reg I_STB_O, + output reg I_CYC_O, + output reg I_WE_O, /* Always 0, interface read-only */ + input wire I_STALL_I, + + /* Data interface */ + input wire D_ACK_I, + input wire D_ERR_I, /* We'll start using it soon */ + output reg [20:0] D_ADR_O, + input wire [31:0] D_DAT_I, + output reg [31:0] D_DAT_O, + output reg [3:0] D_SEL_O, /* We'll start using it soon */ + output reg D_STB_O, + output reg D_CYC_O, + output reg D_WE_O, + input wire D_STALL_I, + + /* non-wishbone */ + output wire finished + ); + + /* TODO: get back to the good old habit of using wires for all ports */ + always @* begin + if (CLK_I || !CLK_I) begin /* avoiding "found no sensitivities" warning */ + I_DAT_O = 16'bx; + I_WE_O = 1'b0; + + D_SEL_O = 4'hF; /* This one is temporary */ + end + end + + reg [20:0] pc; + reg [20:0] sp; + + always @* begin /* pc and sp should always be word-aligned */ + if (CLK_I || !CLK_I) begin + pc[0] = 0; + sp[0] = 0; + end + end +`define SET_PC(address) if (1) begin pc[20:1] <= (address) / 2; end else +`define SET_SP(address) if (1) begin sp[20:1] <= (address) / 2; end else + + reg [31:0] r0; + reg [31:0] r1; + reg [31:0] im; + + reg im_initialized; + + parameter STEP_LOADING_INSTRUCTION = 1'b0; + parameter STEP_EXECUTING = 1'b1; + reg step; + reg first_execution_tick; + + reg [15:0] instruction; + + /* Results of instruction parsing */ + + /* + * This flag informs us, that this is the special instruction used solely + * for setting im (it uses 15-bit payload instead of 7-bit one) + */ + wire set_im; + assign set_im = instruction[15]; + + /* + * This flag informs us whether instruction uses immediate (all instructions + * that use it must contain a 7-bit payload) + */ + wire use_im; + assign use_im = instruction[14] && !set_im; + + /* Payloads for both kinds of instructions, that modify im */ + wire [6:0] short_payload; + assign short_payload = instruction[6:0]; + + wire [14:0] long_payload; + assign long_payload = instruction[14:0]; + + /* Sign-extending payload when setting im */ + wire payload_msb; + assign payload_msb = set_im ? long_payload[14] : short_payload[6]; + + wire [31:0] sign_extended_payload; + assign sign_extended_payload = set_im ? {{17{payload_msb}}, long_payload} : + use_im ? {{25{payload_msb}}, short_payload} : + 32'bx; + + /* Shifting payload into im that was already partially initialized */ + wire [31:0] im_shifted_payload; + assign im_shifted_payload = set_im ? {im[16:0], long_payload} : + use_im ? {im[24:0], short_payload} : + 32'bx; + + /* + * If im has already been partially initialized, we'll just shift our + * payload into it. Otherwise, we sign-extend our payload and put it in im. + */ + wire [31:0] im_effective; + assign im_effective = im_initialized ? + im_shifted_payload : + sign_extended_payload; + + /* Upon instruction stack can grow, shrink or remain the same size */ + wire stack_shrinks; + assign stack_shrinks = instruction[13] == 1'b1 && !set_im; + + wire stack_shrinks_by_2; + assign stack_shrinks_by_2 = stack_shrinks && instruction[12] == 1'b0; + + wire stack_grows; + assign stack_grows = instruction[13:12] == 2'b01 && !set_im; + + wire stack_same_size; + assign stack_same_size = instruction[13:12] == 2'b00 || set_im; + + /* If instruction[11:10] == 2'b11, we have some load or store */ + wire store; + assign store = stack_shrinks && use_im && instruction[11:10] == 2'b11; + + wire load; + assign load = (stack_grows || stack_same_size) && use_im && + instruction[11:10] == 2'b11; + + /* + * Loads and stores can use either im or r1+im (r0+im) as address. Obviously, + * a variant of load/store that uses r1 (r0), consumes one more operand. + */ + wire addressing_with_operand; + assign addressing_with_operand = (load && stack_same_size) || + (store && stack_shrinks_by_2); + + wire [20:0] address_operand; + assign address_operand = load ? r1[20:0] : r0[20:0]; + + wire [20:0] addr_to_use; + assign addr_to_use = addressing_with_operand ? + im_effective + address_operand : im_effective; + + /* + * Those tell us, how many bytes are load'ed or store'd. We might also later + * use those flags with instructions (e.g. type promotion). + */ + wire byte_operation; + wire word_operation; + wire dword_operation; + wire qword_operation; /* We won't implement these in hw */ + + assign byte_operation = instruction[9:8] == 2'b00; + assign word_operation = instruction[9:8] == 2'b01; + assign dword_operation = instruction[9:8] == 2'b10; + assign qword_operation = instruction[9:8] == 2'b11; + + /* Flag mainly meant for load instructions, but not exclusively */ + wire sign_extend; + assign sign_extend = instruction[7]; + + /* Instructions other than load and store go here */ + + /* Instructions, that do not change stack size */ + wire instr_halt; + assign instr_halt = !set_im && !use_im && stack_same_size && + instruction[11:0] == 12'd0; + + wire instr_nop; + assign instr_nop = !set_im && !use_im && stack_same_size && + instruction[11:0] == 12'd1; + + wire instr_set_sp; + assign instr_set_sp = use_im && stack_same_size && + instruction[11:7] == 5'd0; + + /* Instructions, that grom stack */ + wire instr_const; + assign instr_const = use_im && stack_grows && instruction[11:7] == 5'd0; + + /* Instructions, that shrink stack */ + /* none now */ + + reg halt; /* Set once a halt instruction is encountered */ + assign finished = halt; + + always @* + I_ADR_O = pc / 2; + + reg instruction_requested; + + reg [31:0] stack_put_value; + + reg load_store_unrequested; + reg [1:0] stack_transfer_unrequested; + + wire data_request_happens; + wire [1:0] stack_transfer_request_happens; + assign data_request_happens = D_STB_O && !D_STALL_I; + assign stack_transfer_request_happens[0] = !load_store_unrequested && + data_request_happens; + assign stack_transfer_request_happens[1] = !load_store_unrequested && + !stack_transfer_unrequested[0] && + data_request_happens; + + reg load_store_uncompleted; + reg [1:0] stack_transfer_uncompleted; + + wire data_command_completes; + wire [1:0] stack_transfer_completes; + assign data_command_completes = D_ACK_I && D_CYC_O; + assign stack_transfer_completes[0] = !load_store_uncompleted && + data_command_completes; + assign stack_transfer_completes[1] = !load_store_uncompleted && + !stack_transfer_uncompleted[0] && + data_command_completes; + + always @ (posedge CLK_I) begin + if (RST_I) begin + `SET_PC(0); + `SET_SP(21'h0FFFFF); + + I_STB_O <= 0; + I_CYC_O <= 0; + + step <= STEP_LOADING_INSTRUCTION; + instruction_requested <= 0; + + stack_put_value <= 31'bx; + + D_ADR_O <= 21'bx; + D_DAT_O <= 32'bx; + D_SEL_O <= 4'bx; + D_STB_O <= 0; + D_CYC_O <= 0; + D_WE_O <= 0; + + halt <= 0; + end else begin // if (RST_I) + case (step) + STEP_LOADING_INSTRUCTION : begin + instruction <= I_DAT_I; + + if (I_STB_O && !I_STALL_I) + instruction_requested <= 1; + + I_STB_O <= !instruction_requested && !(I_STB_O && !I_STALL_I); + I_CYC_O <= 1; + + if (I_CYC_O && I_ACK_I) begin + instruction_requested <= 0; + + `SET_PC(pc + 2); + + step <= STEP_EXECUTING; + I_CYC_O <= 0; + end + + first_execution_tick <= 1; + load_store_unrequested <= 0; + stack_transfer_unrequested <= 2'b0; + load_store_uncompleted <= 0; + stack_transfer_uncompleted <= 2'b0; + end // case: STEP_LOADING_INSTRUCTION + STEP_EXECUTING : begin + first_execution_tick <= 0; + + if (instr_halt) + halt <= 1; + + if (((stack_grows || stack_shrinks || load || store) && + first_execution_tick) || + (load_store_uncompleted && + !data_command_completes) || + (stack_transfer_uncompleted[1] && + !stack_transfer_completes[1]) || halt) begin + step <= STEP_EXECUTING; /* Remain where we are */ + end else begin + step <= STEP_LOADING_INSTRUCTION; + + I_STB_O <= 1; + I_CYC_O <= 1; + + D_CYC_O <= 0; + D_STB_O <= 0; + D_WE_O <= 0; + end + + if (first_execution_tick) begin + if (load || store) begin + load_store_unrequested <= 1; + load_store_uncompleted <= 1; + end + + if (stack_shrinks_by_2) begin + stack_transfer_unrequested <= 2'b11; + stack_transfer_uncompleted <= 2'b11; + end else if (stack_grows || stack_shrinks) begin + stack_transfer_unrequested <= 2'b10; + stack_transfer_uncompleted <= 2'b10; + end + end + + if (first_execution_tick) begin + if (load) begin + D_ADR_O <= addr_to_use; + D_DAT_O <= 32'bx; + /* D_SEL_O <= ????; */ /* We'll later set this one */ + D_STB_O <= 1; + D_CYC_O <= 1; + D_WE_O <= 0; + end else if (store) begin + D_ADR_O <= addr_to_use; + D_DAT_O <= r1; + /* D_SEL_O <= ????; */ /* We'll later set this one */ + D_STB_O <= 1; + D_CYC_O <= 1; + D_WE_O <= 1; + end else if (stack_shrinks) begin + `SET_SP(sp + 4); + D_ADR_O <= sp; + D_DAT_O <= 32'bx; + /* D_SEL_O <= 4'hF; */ + D_STB_O <= 1; + D_CYC_O <= 1; + D_WE_O <= 0; + end else if (stack_grows) begin + `SET_SP(sp - 4); + D_ADR_O <= sp - 4; + D_DAT_O <= r0; + /* D_SEL_O <= 4'hF; */ + D_STB_O <= 1; + D_CYC_O <= 1; + D_WE_O <= 1; + end + + /* + * If we want to offload value to memory because of stack + * growth, we may need to wait for load or store to complete + * first. In such case we need to back up the stack value. + */ + stack_put_value <= r0; + end // if (first_execution_tick) + + if (data_request_happens) begin + if (load_store_unrequested) begin + load_store_unrequested <= 0; + end else begin + stack_transfer_unrequested + <= {stack_transfer_unrequested[0], 1'b0}; + end + + if (load_store_unrequested || + stack_transfer_unrequested[0]) begin + if (stack_shrinks) begin + `SET_SP(sp + 4); + D_ADR_O <= sp; + D_DAT_O <= 32'bx; + /* D_SEL_O <= 4'hF; */ + D_STB_O <= 1; + D_WE_O <= 0; + end else if (stack_grows) begin + `SET_SP(sp - 4); + D_ADR_O <= sp - 4; + D_DAT_O <= stack_put_value; + /* D_SEL_O <= 4'hF; */ + D_STB_O <= 1; + D_WE_O <= 1; + end + end else begin // if (load_store_unrequested ||... + D_ADR_O <= 21'bx; + D_DAT_O <= 32'bx; + /* D_SEL_O <= 4'bx; */ + D_STB_O <= 0; + D_WE_O <= 0; + end // else: !if(load_store_unrequested ||... + end // if (data_request_happens) + + if (data_command_completes) begin + if (load_store_uncompleted) begin + load_store_uncompleted <= 0; + end else begin + stack_transfer_uncompleted + <= {stack_transfer_uncompleted[0], 1'b0}; + end + + if (!(load_store_uncompleted || + stack_transfer_uncompleted[0])) + D_CYC_O <= 0; + end + + if (stack_shrinks && stack_transfer_completes) + r0 <= D_DAT_I; + + if (store) + r1 <= r0; + + if (stack_grows && first_execution_tick) + r0 <= r1; + + if (load && load_store_uncompleted) + r1 <= D_DAT_I; + + if (!first_execution_tick && use_im) + im <= 32'bx; + + im_initialized <= set_im; + + if (set_im || use_im) + im <= im_effective; + else + im <= 32'bx; + + if (instr_const && first_execution_tick) + r1 <= im_effective; + + if (instr_set_sp) + `SET_SP(im_effective); + end // case: STEP_EXECUTING + endcase // case (step) + end // else: !if(RST_I) + end // always @ (posedge CLK_I) + +`ifdef SIMULATION + /* + * RST should still be used when powering up, even in benches; + * this is just to avoid undefined values + */ + initial begin + I_ADR_O <= 0; + I_STB_O <= 0; + I_CYC_O <= 0; + + D_ADR_O <= 0; + D_DAT_O <= 0; + D_STB_O <= 0; + D_CYC_O <= 0; + D_WE_O <= 0; + + `SET_PC(0); + `SET_SP(0); + + r0 <= 0; + r1 <= 0; + im <= 0; + + im_initialized <= 0; + + step <= 0; + first_execution_tick <= 0; + + instruction <= 0; + + halt <= 0; + + instruction_requested <= 0; + + stack_put_value <= 0; + + load_store_unrequested <= 0; + stack_transfer_unrequested <= 2'b0; + + load_store_uncompleted <= 0; + stack_transfer_uncompleted <= 2'b0; + end // initial begin +`endif +endmodule // stack_machine_new -- cgit v1.2.3