`default_nettype none `include "messages.vh" /* * This module provides 2 Wishbone MASTER interfaces described below. * CLK_I and RST_I signals are shared between those interfaces. * Two interfaces can, but don't have to, be made to access the same memory map. * Instructions interface never performs writes (its WE_O is hardwired to low). * * | *WISHBONE DATASHEET* | * |---------------------------------------------------------------------------| * | *Description* | *Specification* | * |---------------------------------+-----------------------------------------| * | General description | stack machine core data interface | * |---------------------------------+-----------------------------------------| * | Supported cycles | MASTER, pipelined READ/WRITE | * |---------------------------------+-----------------------------------------| * | Data port, size | 32-bit | * | Data port, granularity | 8-bit | * | Data port, maximum operand size | 32-bit | * | Data transfer ordering | Little endian | * | Data transfer ordering | Undefined | * | Address port, size | 21-bit | * |---------------------------------+-----------------------------------------| * | Clock frequency constraints | NONE | * |---------------------------------+-----------------------------------------| * | | *Signal name* | *WISHBONE Equiv.* | * | |------------------+----------------------| * | | D_ACK_I | ACK_I | * | | D_ADR_O | ADR_O() | * | Supported signal list and cross | CLK_I | CLK_I | * | reference to equivalent | D_DAT_I | DAT_I() | * | WISHBONE signals | D_DAT_O | DAT_O() | * | | D_SEL_O | SEL_O | * | | D_STB_O | STB_O | * | | D_CYC_O | CYC_O | * | | D_WE_O | WE_O | * | | RST_I | RST_I | * | | D_STALL_I | STALL_I | * |---------------------------------+-----------------------------------------| * | Special requirements | NONE | * * * | *WISHBONE DATASHEET* | * |---------------------------------------------------------------------------| * | *Description* | *Specification* | * |---------------------------------+-----------------------------------------| * | General description | stack machine core instructions | * | | interface | * |---------------------------------+-----------------------------------------| * | Supported cycles | MASTER, pipelined READ | * |---------------------------------+-----------------------------------------| * | Data port, size | 16-bit | * | Data port, granularity | 16-bit | * | Data port, maximum operand size | 16-bit | * | Data transfer ordering | Big endian and/or little endian | * | Data transfer ordering | Undefined | * | Address port, size | 20-bit | * |---------------------------------+-----------------------------------------| * | Clock frequency constraints | NONE | * |---------------------------------+-----------------------------------------| * | | *Signal name* | *WISHBONE Equiv.* | * | |------------------+----------------------| * | | I_ACK_I | ACK_I | * | | I_ADR_O | ADR_O() | * | Supported signal list and cross | CLK_I | CLK_I | * | reference to equivalent | I_DAT_I | DAT_I() | * | WISHBONE signals | I_DAT_O | DAT_O() | * | | I_SEL_O | SEL_O | * | | I_STB_O | STB_O | * | | I_CYC_O | CYC_O | * | | I_WE_O | WE_O | * | | RST_I | RST_I | * | | I_STALL_I | STALL_I | * |---------------------------------+-----------------------------------------| * | Special requirements | NONE | */ module stack_machine_new ( /* Those 2 are supposed to be common for both wishbone interfaces */ input wire CLK_I, input wire RST_I, /* Instruction reading interface */ input wire I_ACK_I, output reg [19:0] I_ADR_O, input wire [15:0] I_DAT_I, output reg [15:0] I_DAT_O, /* Not used, interface read-only */ output reg I_STB_O, output reg I_CYC_O, output reg I_WE_O, /* Always 0, interface read-only */ input wire I_STALL_I, /* Data interface */ input wire D_ACK_I, output reg [20:0] D_ADR_O, input wire [31:0] D_DAT_I, output reg [31:0] D_DAT_O, output reg [3:0] D_SEL_O, output reg D_STB_O, output reg D_CYC_O, output reg D_WE_O, input wire D_STALL_I, /* non-wishbone */ output wire finished ); /* TODO: get back to the good old habit of using wires for all ports */ always @* begin if (CLK_I || !CLK_I) begin /* avoiding "found no sensitivities" warning */ I_DAT_O = 16'bx; I_WE_O = 1'b0; end end reg [20:0] pc; reg [20:0] sp; always @* begin /* pc and sp should always be word-aligned */ if (CLK_I || !CLK_I) begin pc[0] = 0; sp[0] = 0; end end `define SET_PC(address) if (1) begin pc[20:1] <= (address) / 2; end else `define SET_SP(address) if (1) begin sp[20:1] <= (address) / 2; end else reg [31:0] r0; reg [31:0] r1; wire signed [31:0] r0s; wire signed [31:0] r1s; assign r0s = r0; assign r1s = r1; reg [31:0] im; reg im_initialized; parameter STEP_LOADING_INSTRUCTION = 1'b0; parameter STEP_EXECUTING = 1'b1; reg step; reg first_execution_tick; reg [15:0] instruction; /* Results of instruction parsing */ /* * This flag informs us, that this is the special instruction used solely * for setting im (it uses 15-bit payload instead of 7-bit one) */ wire set_im; assign set_im = instruction[15]; /* * This flag informs us whether instruction uses immediate (all instructions * that use it must contain a 7-bit payload) */ wire use_im; assign use_im = instruction[14] && !set_im; /* Payloads for both kinds of instructions, that modify im */ wire [6:0] short_payload; assign short_payload = instruction[6:0]; wire [14:0] long_payload; assign long_payload = instruction[14:0]; /* Sign-extending payload when setting im */ wire payload_msb; assign payload_msb = set_im ? long_payload[14] : short_payload[6]; wire [31:0] sign_extended_payload; assign sign_extended_payload = set_im ? {{17{payload_msb}}, long_payload} : use_im ? {{25{payload_msb}}, short_payload} : 32'bx; /* Shifting payload into im that was already partially initialized */ wire [31:0] im_shifted_payload; assign im_shifted_payload = set_im ? {im[16:0], long_payload} : use_im ? {im[24:0], short_payload} : 32'bx; /* * If im has already been partially initialized, we'll just shift our * payload into it. Otherwise, we sign-extend our payload and put it in im. */ wire [31:0] im_effective; assign im_effective = im_initialized ? im_shifted_payload : sign_extended_payload; /* Upon instruction stack can grow, shrink or remain the same size */ wire stack_shrinks; assign stack_shrinks = instruction[13] == 1'b1 && !set_im; wire stack_shrinks_by_1; assign stack_shrinks_by_1 = stack_shrinks && instruction[12] == 1'b1; wire stack_shrinks_by_2; assign stack_shrinks_by_2 = stack_shrinks && instruction[12] == 1'b0; wire stack_grows; assign stack_grows = instruction[13:12] == 2'b01 && !set_im; wire stack_same_size; assign stack_same_size = instruction[13:12] == 2'b00 || set_im; /* If instruction[11:10] == 2'b11, we have some load or store */ wire store; assign store = stack_shrinks && use_im && instruction[11:10] == 2'b11; wire load; assign load = (stack_grows || stack_same_size) && use_im && instruction[11:10] == 2'b11; /* * Loads and stores can use either im or r1+im (r0+im) as address. Obviously, * a variant of load/store that uses r1 (r0), consumes one more operand. */ wire addressing_with_operand; assign addressing_with_operand = (load && stack_same_size) || (store && stack_shrinks_by_2); wire [20:0] address_operand; assign address_operand = load ? r1[20:0] : r0[20:0]; wire [20:0] addr_to_use; assign addr_to_use = addressing_with_operand ? im_effective + address_operand : im_effective; /* * Those tell us, how many bytes are load'ed or store'd. We might also later * use those flags with instructions (e.g. type promotion). */ wire byte_operation; wire word_operation; wire dword_operation; wire qword_operation; /* We won't implement these in hw */ wire [3:0] instruction_select_mask; assign byte_operation = instruction[9:8] == 2'b00; assign word_operation = instruction[9:8] == 2'b01; assign dword_operation = instruction[9:8] == 2'b10; assign qword_operation = instruction[9:8] == 2'b11; assign instruction_select_mask = byte_operation ? 4'b0001 : word_operation ? 4'b0011 : 4'b1111; /* Flag mainly meant for load instructions, but not exclusively */ wire sign_extend; assign sign_extend = instruction[7]; wire loaded_value_sign; assign loaded_value_sign = !sign_extend ? 0 : byte_operation ? D_DAT_I[7] : word_operation ? D_DAT_I[15] : 1'bx; /* Instructions other than load and store go here */ /* Instructions, that do not change stack size */ wire instr_halt; assign instr_halt = !set_im && !use_im && stack_same_size && instruction[11:0] == 12'd0; wire instr_nop; assign instr_nop = !set_im && !use_im && stack_same_size && instruction[11:0] == 12'd1; wire instr_swap; assign instr_swap = !set_im && !use_im && stack_same_size && instruction[11:0] == 12'd2; wire instr_set_sp; assign instr_set_sp = use_im && stack_same_size && instruction[11:7] == 5'd0; wire instr_jump; assign instr_jump = use_im && stack_same_size && instruction[11:7] == 5'd1; wire instr_add_sp; assign instr_add_sp = use_im && stack_same_size && instruction[11:7] == 5'd2; /* Instructions, that grow stack */ wire instr_tee; assign instr_tee = !set_im && !use_im && stack_grows && instruction[11:0] == 12'd0; wire instr_get_frame; assign instr_get_frame = !set_im && !use_im && stack_grows && instruction[11:0] == 12'd1; wire instr_const; assign instr_const = use_im && stack_grows && instruction[11:7] == 5'd0; wire instr_call; assign instr_call = use_im && stack_grows && instruction[11:7] == 5'd1; /* Instructions, that shrink stack */ wire instr_add; assign instr_add = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd0; wire instr_sub; assign instr_sub = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd1; wire instr_udiv; assign instr_udiv = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd2; wire instr_mul; assign instr_mul = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd3; wire instr_drop; assign instr_drop = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd4; wire instr_eq; assign instr_eq = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd7; wire instr_lt; assign instr_lt = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd8; wire instr_ult; assign instr_ult = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd9; wire instr_le; assign instr_le = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd10; wire instr_ule; assign instr_ule = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd11; wire instr_gt; assign instr_gt = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd12; wire instr_ugt; assign instr_ugt = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd13; wire instr_ge; assign instr_ge = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd14; wire instr_uge; assign instr_uge = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd15; wire instr_urem; assign instr_urem = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'd16; wire instr_ret; assign instr_ret = !set_im && !use_im && stack_shrinks_by_1 && instruction[11:0] == 12'b000010000000; wire instr_cond_jump; assign instr_cond_jump = use_im && stack_shrinks_by_1 && instruction[11:7] == 5'd1; wire instr_cond_jump_n; assign instr_cond_jump_n = use_im && stack_shrinks_by_1 && instruction[11:7] == 5'd2; reg halt; /* Set once a halt instruction is encountered */ assign finished = halt; /* module for division */ wire [31:0] div_quotient; wire [31:0] div_remainder; wire div_done; div #( .WIDTH(32) ) div ( .clock(CLK_I), .start(step == STEP_EXECUTING && first_execution_tick), .dividend(r0), .divisor(r1), .quotient(div_quotient), .remainder(div_remainder), .done(div_done) ); reg arithmetic_uncompleted; wire arithmetic_completes; assign arithmetic_completes = instr_udiv || instr_urem ? div_done : instr_halt ? 0 : 1; always @* I_ADR_O = pc / 2; reg instruction_requested; reg [31:0] stack_put_value; reg load_store_unrequested; reg [1:0] stack_transfer_unrequested; wire data_request_happens; wire [1:0] stack_transfer_request_happens; assign data_request_happens = D_STB_O && !D_STALL_I; assign stack_transfer_request_happens[0] = !load_store_unrequested && data_request_happens; assign stack_transfer_request_happens[1] = !load_store_unrequested && !stack_transfer_unrequested[0] && data_request_happens; reg load_store_uncompleted; reg [1:0] stack_transfer_uncompleted; wire data_command_completes; wire [1:0] stack_transfer_completes; assign data_command_completes = D_ACK_I && D_CYC_O; assign stack_transfer_completes[0] = !load_store_uncompleted && data_command_completes; assign stack_transfer_completes[1] = !load_store_uncompleted && !stack_transfer_uncompleted[0] && data_command_completes; always @ (posedge CLK_I) begin if (RST_I) begin `SET_PC(0); `SET_SP(21'h0FFFFC); I_STB_O <= 0; I_CYC_O <= 0; step <= STEP_LOADING_INSTRUCTION; instruction_requested <= 0; stack_put_value <= 31'bx; D_ADR_O <= 21'bx; D_DAT_O <= 32'bx; D_SEL_O <= 4'bx; D_STB_O <= 0; D_CYC_O <= 0; D_WE_O <= 0; halt <= 0; end else begin // if (RST_I) case (step) STEP_LOADING_INSTRUCTION : begin instruction <= I_DAT_I; if (I_STB_O && !I_STALL_I) instruction_requested <= 1; I_STB_O <= !instruction_requested && !(I_STB_O && !I_STALL_I); I_CYC_O <= 1; if (I_CYC_O && I_ACK_I) begin instruction_requested <= 0; `SET_PC(pc + 2); step <= STEP_EXECUTING; I_CYC_O <= 0; end arithmetic_uncompleted <= 1; first_execution_tick <= 1; load_store_unrequested <= 0; stack_transfer_unrequested <= 2'b0; load_store_uncompleted <= 0; stack_transfer_uncompleted <= 2'b0; end // case: STEP_LOADING_INSTRUCTION STEP_EXECUTING : begin first_execution_tick <= 0; if (arithmetic_completes) arithmetic_uncompleted <= 0; if (((stack_grows || stack_shrinks || load || store) && first_execution_tick) || (load_store_uncompleted && !data_command_completes) || (stack_transfer_uncompleted[1] && !stack_transfer_completes[1]) || (arithmetic_uncompleted && !arithmetic_completes)) begin step <= STEP_EXECUTING; /* Remain where we are */ end else begin step <= STEP_LOADING_INSTRUCTION; I_STB_O <= 1; I_CYC_O <= 1; D_CYC_O <= 0; end if (first_execution_tick) begin if (load || store) begin load_store_unrequested <= 1; load_store_uncompleted <= 1; end if (stack_shrinks_by_2) begin stack_transfer_unrequested <= 2'b11; stack_transfer_uncompleted <= 2'b11; end else if (stack_grows || stack_shrinks) begin stack_transfer_unrequested <= 2'b10; stack_transfer_uncompleted <= 2'b10; end end if (first_execution_tick) begin if (load) begin D_ADR_O <= addr_to_use; D_DAT_O <= 32'bx; D_SEL_O <= instruction_select_mask; D_STB_O <= 1; D_CYC_O <= 1; D_WE_O <= 0; end else if (store) begin D_ADR_O <= addr_to_use; D_DAT_O <= r1; D_SEL_O <= instruction_select_mask; D_STB_O <= 1; D_CYC_O <= 1; D_WE_O <= 1; end else if (stack_shrinks) begin `SET_SP(sp + 4); D_ADR_O <= sp; D_DAT_O <= 32'bx; D_SEL_O <= 4'b1111; D_STB_O <= 1; D_CYC_O <= 1; D_WE_O <= 0; end else if (stack_grows) begin `SET_SP(sp - 4); D_ADR_O <= sp - 4; D_DAT_O <= r0; D_SEL_O <= 4'b1111; D_STB_O <= 1; D_CYC_O <= 1; D_WE_O <= 1; end /* * If we want to offload value to memory because of stack * growth, we may need to wait for load or store to complete * first. In such case we need to back up the stack value. */ stack_put_value <= r0; end // if (first_execution_tick) if (data_request_happens) begin if (load_store_unrequested) begin load_store_unrequested <= 0; end else begin stack_transfer_unrequested <= {stack_transfer_unrequested[0], 1'b0}; end if (stack_transfer_unrequested[0] || (load_store_unrequested && stack_transfer_unrequested[1])) begin if (stack_shrinks) begin `SET_SP(sp + 4); D_ADR_O <= sp; D_DAT_O <= 32'bx; D_SEL_O <= 4'b1111; D_STB_O <= 1; D_WE_O <= 0; end else /* if (stack_grows) */ begin `SET_SP(sp - 4); D_ADR_O <= sp - 4; D_DAT_O <= stack_put_value; D_SEL_O <= 4'b1111; D_STB_O <= 1; D_WE_O <= 1; end end else begin // if (stack_transfer_unrequested[0] ||... D_ADR_O <= 21'bx; D_DAT_O <= 32'bx; D_SEL_O <= 4'bx; D_STB_O <= 0; D_WE_O <= 0; end // else: !if(stack_transfer_unrequested[0] ||... end // if (data_request_happens) if (data_command_completes) begin if (load_store_uncompleted) begin load_store_uncompleted <= 0; end else begin stack_transfer_uncompleted <= {stack_transfer_uncompleted[0], 1'b0}; end if (!(load_store_uncompleted || stack_transfer_uncompleted[0])) D_CYC_O <= 0; end if (stack_shrinks && stack_transfer_completes) r0 <= D_DAT_I; if (store) r1 <= r0; if (stack_grows && first_execution_tick) r0 <= r1; if (load && load_store_uncompleted) begin if (byte_operation) r1 <= {{24{loaded_value_sign}}, D_DAT_I[7:0]}; else if (word_operation) r1 <= {{16{loaded_value_sign}}, D_DAT_I[15:0]}; else r1 <= D_DAT_I; end if (!first_execution_tick && use_im) im <= 32'bx; im_initialized <= set_im; if (set_im || use_im) im <= im_effective; else im <= 32'bx; /* Instructions, that do not change stack size */ if (instr_halt) halt <= 1; if (instr_nop) r1 <= r1; if (instr_swap) {r0, r1} <= {r1, r0}; if (instr_set_sp) `SET_SP(im_effective); if (instr_add_sp) `SET_SP(im_effective + sp); if (instr_jump) `SET_PC(im_effective); /* Instructions, that grow stack */ if (instr_tee) r1 <= r1; if (instr_get_frame && first_execution_tick) r1 <= sp; if (instr_const && first_execution_tick) r1 <= im_effective; if (instr_call && first_execution_tick) begin r1 <= pc; `SET_PC(im_effective); end /* Instructions, that shrink stack */ if (instr_add && arithmetic_uncompleted) r1 <= r0 + r1; if (instr_sub && arithmetic_uncompleted) r1 <= r0 - r1; if (instr_udiv && arithmetic_uncompleted) r1 <= div_quotient; if (instr_urem && arithmetic_uncompleted) r1 <= div_remainder; if (instr_mul && arithmetic_uncompleted) r1 <= r0 * r1; if (instr_drop && arithmetic_uncompleted) r1 <= r0; if ((instr_cond_jump || instr_cond_jump_n) && arithmetic_uncompleted) begin r1 <= r0; if ((r1 && instr_cond_jump) || (!r1 && instr_cond_jump_n)) `SET_PC(im_effective); end if (instr_eq && arithmetic_uncompleted) r1 <= r0 == r1; if (instr_lt && arithmetic_uncompleted) r1 <= r0s < r1s; if (instr_ult && arithmetic_uncompleted) r1 <= r0 < r1; if (instr_le && arithmetic_uncompleted) r1 <= r0s <= r1s; if (instr_ule && arithmetic_uncompleted) r1 <= r0 <= r1; if (instr_gt && arithmetic_uncompleted) r1 <= r0s > r1s; if (instr_ugt && arithmetic_uncompleted) r1 <= r0 > r1; if (instr_ge && arithmetic_uncompleted) r1 <= r0s >= r1s; if (instr_uge && arithmetic_uncompleted) r1 <= r0 >= r1; if (instr_ret && arithmetic_uncompleted) begin r1 <= r0; `SET_PC(r1); end if (first_execution_tick) begin `DBG(("r0: %x r1: %x", r0, r1)); `DBG(("CPU: Executing %0s instruction", store ? "store (kind?)" : load ? "load (kind?)" : instr_halt ? "halt" : instr_nop ? "nop" : instr_swap ? "swap" : instr_set_sp ? "set_sp" : instr_jump ? "jump" : instr_add_sp ? "add_sp" : instr_tee ? "tee" : instr_get_frame ? "get_frame" : instr_const ? "const" : instr_call ? "call" : instr_add ? "add" : instr_sub ? "sub" : instr_udiv ? "udiv" : instr_mul ? "mul" : instr_drop ? "drop" : instr_eq ? "eq" : instr_lt ? "lt" : instr_ult ? "ult" : instr_le ? "le" : instr_ule ? "ule" : instr_gt ? "gt" : instr_ugt ? "ugt" : instr_ge ? "ge" : instr_uge ? "uge" : instr_urem ? "urem" : instr_ret ? "ret" : instr_cond_jump ? "cond_jump" : instr_cond_jump_n ? "cond_jump_n" : set_im ? "im" : "unknown")); end // if (first_execution_tick) end // case: STEP_EXECUTING endcase // case (step) end // else: !if(RST_I) end // always @ (posedge CLK_I) `ifdef SIMULATION /* * RST should still be used when powering up, even in benches; * this is just to avoid undefined values */ initial begin I_ADR_O <= 0; I_STB_O <= 0; I_CYC_O <= 0; D_ADR_O <= 0; D_DAT_O <= 0; D_STB_O <= 0; D_CYC_O <= 0; D_WE_O <= 0; `SET_PC(0); `SET_SP(0); r0 <= 0; r1 <= 0; im <= 0; im_initialized <= 0; step <= 0; first_execution_tick <= 0; instruction <= 0; halt <= 0; instruction_requested <= 0; stack_put_value <= 0; load_store_unrequested <= 0; stack_transfer_unrequested <= 2'b0; load_store_uncompleted <= 0; stack_transfer_uncompleted <= 2'b0; end // initial begin `endif endmodule // stack_machine_new