aboutsummaryrefslogtreecommitdiff
`default_nettype none

`include "messages.vh"

/*
 * This module provides 2 Wishbone MASTER interfaces described below.
 * CLK_I and RST_I signals are shared between those interfaces.
 * Two interfaces can, but don't have to, be made to access the same memory map.
 * Instructions interface never performs writes (its WE_O is hardwired to low).
 *
 * | *WISHBONE DATASHEET*                                                      |
 * |---------------------------------------------------------------------------|
 * | *Description*                   | *Specification*                         |
 * |---------------------------------+-----------------------------------------|
 * | General description             | stack machine core data interface       |
 * |---------------------------------+-----------------------------------------|
 * | Supported cycles                | MASTER, pipelined READ/WRITE            |
 * |---------------------------------+-----------------------------------------|
 * | Data port, size                 | 32-bit                                  |
 * | Data port, granularity          | 8-bit                                   |
 * | Data port, maximum operand size | 32-bit                                  |
 * | Data transfer ordering          | Little endian                           |
 * | Data transfer ordering          | Undefined                               |
 * | Address port, size              | 21-bit                                  |
 * |---------------------------------+-----------------------------------------|
 * | Clock frequency constraints     | NONE                                    |
 * |---------------------------------+-----------------------------------------|
 * |                                 | *Signal name*    | *WISHBONE Equiv.*    |
 * |                                 |------------------+----------------------|
 * |                                 | D_ACK_I          | ACK_I                |
 * |                                 | D_ADR_O          | ADR_O()              |
 * | Supported signal list and cross | CLK_I            | CLK_I                |
 * |     reference to equivalent     | D_DAT_I          | DAT_I()              |
 * |     WISHBONE signals            | D_DAT_O          | DAT_O()              |
 * |                                 | D_SEL_O          | SEL_O                |
 * |                                 | D_STB_O          | STB_O                |
 * |                                 | D_CYC_O          | CYC_O                |
 * |                                 | D_WE_O           | WE_O                 |
 * |                                 | RST_I            | RST_I                |
 * |                                 | D_STALL_I        | STALL_I              |
 * |---------------------------------+-----------------------------------------|
 * | Special requirements            | NONE                                    |
 *
 *
 * | *WISHBONE DATASHEET*                                                      |
 * |---------------------------------------------------------------------------|
 * | *Description*                   | *Specification*                         |
 * |---------------------------------+-----------------------------------------|
 * | General description             | stack machine core instructions         |
 * |                                 |     interface                           |
 * |---------------------------------+-----------------------------------------|
 * | Supported cycles                | MASTER, pipelined READ                  |
 * |---------------------------------+-----------------------------------------|
 * | Data port, size                 | 16-bit                                  |
 * | Data port, granularity          | 16-bit                                  |
 * | Data port, maximum operand size | 16-bit                                  |
 * | Data transfer ordering          | Big endian and/or little endian         |
 * | Data transfer ordering          | Undefined                               |
 * | Address port, size              | 20-bit                                  |
 * |---------------------------------+-----------------------------------------|
 * | Clock frequency constraints     | NONE                                    |
 * |---------------------------------+-----------------------------------------|
 * |                                 | *Signal name*    | *WISHBONE Equiv.*    |
 * |                                 |------------------+----------------------|
 * |                                 | I_ACK_I          | ACK_I                |
 * |                                 | I_ADR_O          | ADR_O()              |
 * | Supported signal list and cross | CLK_I            | CLK_I                |
 * |     reference to equivalent     | I_DAT_I          | DAT_I()              |
 * |     WISHBONE signals            | I_DAT_O          | DAT_O()              |
 * |                                 | I_SEL_O          | SEL_O                |
 * |                                 | I_STB_O          | STB_O                |
 * |                                 | I_CYC_O          | CYC_O                |
 * |                                 | I_WE_O           | WE_O                 |
 * |                                 | RST_I            | RST_I                |
 * |                                 | I_STALL_I        | STALL_I              |
 * |---------------------------------+-----------------------------------------|
 * | Special requirements            | NONE                                    |
 */

module stack_machine_new
  (
   /* Those 2 are supposed to be common for both wishbone interfaces */
   input wire 	     CLK_I,
   input wire 	     RST_I,

   /* Instruction reading interface */
   input wire 	     I_ACK_I,
   output reg [19:0] I_ADR_O,
   input wire [15:0] I_DAT_I,
   output reg [15:0] I_DAT_O, /* Not used, interface read-only */
   output reg 	     I_STB_O,
   output reg 	     I_CYC_O,
   output reg 	     I_WE_O, /* Always 0, interface read-only */
   input wire 	     I_STALL_I,

   /* Data interface */
   input wire 	     D_ACK_I,
   output reg [20:0] D_ADR_O,
   input wire [31:0] D_DAT_I,
   output reg [31:0] D_DAT_O,
   output reg [3:0]  D_SEL_O,
   output reg 	     D_STB_O,
   output reg 	     D_CYC_O,
   output reg 	     D_WE_O,
   input wire 	     D_STALL_I,

   /* non-wishbone */
   output wire 	     finished
   );

   /* TODO: get back to the good old habit of using wires for all ports */
   always @* begin
      if (CLK_I || !CLK_I) begin /* avoiding "found no sensitivities" warning */
	 I_DAT_O = 16'bx;
	 I_WE_O = 1'b0;
      end
   end

   reg [20:0] 	      pc;
   reg [20:0] 	      sp;

   always @* begin /* pc and sp should always be word-aligned */
      if (CLK_I || !CLK_I) begin
	 pc[0] = 0;
	 sp[0] = 0;
      end
   end
`define SET_PC(address) if (1) begin pc[20:1] <= (address) / 2; end else
`define SET_SP(address) if (1) begin sp[20:1] <= (address) / 2; end else

   reg [31:0] 	      r0;
   reg [31:0] 	      r1;

   wire signed [31:0] r0s;
   wire signed [31:0] r1s;
   assign r0s = r0;
   assign r1s = r1;

   reg [31:0] 	      im;
   reg 		      im_initialized;

   parameter STEP_LOADING_INSTRUCTION = 1'b0;
   parameter STEP_EXECUTING = 1'b1;
   reg  	      step;
   reg 		      first_execution_tick;

   reg [15:0] 	      instruction;

   /* Results of instruction parsing */

   /*
    * This flag informs us, that this is the special instruction used solely
    * for setting im (it uses 15-bit payload instead of 7-bit one)
    */
   wire 	      set_im;
   assign set_im = instruction[15];

   /*
    * This flag informs us whether instruction uses immediate (all instructions
    * that use it must contain a 7-bit payload)
    */
   wire 	      use_im;
   assign use_im = instruction[14] && !set_im;

   /* Payloads for both kinds of instructions, that modify im */
   wire [6:0] 	      short_payload;
   assign short_payload = instruction[6:0];

   wire [14:0] 	      long_payload;
   assign long_payload = instruction[14:0];

   /* Sign-extending payload when setting im */
   wire 	      payload_msb;
   assign payload_msb = set_im ? long_payload[14] : short_payload[6];

   wire [31:0] 	      sign_extended_payload;
   assign sign_extended_payload = set_im ? {{17{payload_msb}}, long_payload} :
				  use_im ? {{25{payload_msb}}, short_payload} :
				  32'bx;

   /* Shifting payload into im that was already partially initialized */
   wire [31:0] 	      im_shifted_payload;
   assign im_shifted_payload = set_im ? {im[16:0], long_payload} :
			       use_im ? {im[24:0], short_payload} :
			       32'bx;

   /*
    * If im has already been partially initialized, we'll just shift our
    * payload into it. Otherwise, we sign-extend our payload and put it in im.
    */
   wire [31:0] 	      im_effective;
   assign im_effective = im_initialized ?
			 im_shifted_payload :
			 sign_extended_payload;

   /* Upon instruction stack can grow, shrink or remain the same size */
   wire 	      stack_shrinks;
   assign stack_shrinks = instruction[13] == 1'b1 && !set_im;

   wire 	      stack_shrinks_by_1;
   assign stack_shrinks_by_1 = stack_shrinks && instruction[12] == 1'b1;

   wire 	      stack_shrinks_by_2;
   assign stack_shrinks_by_2 = stack_shrinks && instruction[12] == 1'b0;

   wire 	      stack_grows;
   assign stack_grows = instruction[13:12] == 2'b01 && !set_im;

   wire 	      stack_same_size;
   assign stack_same_size = instruction[13:12] == 2'b00 || set_im;

   /* If instruction[11:10] == 2'b11, we have some load or store */
   wire 	      store;
   assign store = stack_shrinks && use_im && instruction[11:10] == 2'b11;

   wire 	      load;
   assign load = (stack_grows || stack_same_size) && use_im &&
		 instruction[11:10] == 2'b11;

   /*
    * Loads and stores can use either im or r1+im (r0+im) as address. Obviously,
    * a variant of load/store that uses r1 (r0), consumes one more operand.
    */
   wire 	      addressing_with_operand;
   assign addressing_with_operand = (load && stack_same_size) ||
				    (store && stack_shrinks_by_2);

   wire [20:0] 	      address_operand;
   assign address_operand = load ? r1[20:0] : r0[20:0];

   wire [20:0] 	      addr_to_use;
   assign addr_to_use = addressing_with_operand ?
			im_effective + address_operand : im_effective;

   /*
    * Those tell us, how many bytes are load'ed or store'd. We might also later
    * use those flags with instructions (e.g. type promotion).
    */
   wire 	      byte_operation;
   wire 	      word_operation;
   wire 	      dword_operation;
   wire 	      qword_operation; /* We won't implement these in hw */
   wire [3:0] 	      instruction_select_mask;

   assign byte_operation  = instruction[9:8] == 2'b00;
   assign word_operation  = instruction[9:8] == 2'b01;
   assign dword_operation = instruction[9:8] == 2'b10;
   assign qword_operation = instruction[9:8] == 2'b11;
   assign instruction_select_mask = byte_operation ? 4'b0001 :
				    word_operation ? 4'b0011 :
				    4'b1111;

   /* Flag mainly meant for load instructions, but not exclusively */
   wire 	      sign_extend;
   assign sign_extend = instruction[7];

   wire 	      loaded_value_sign;
   assign loaded_value_sign = !sign_extend ? 0 :
			      byte_operation ? D_DAT_I[7] :
			      word_operation ? D_DAT_I[15] : 1'bx;

   /* Instructions other than load and store go here */

   /* Instructions, that do not change stack size */
   wire 	      instr_halt;
   assign instr_halt = !set_im && !use_im && stack_same_size &&
		       instruction[11:0] == 12'd0;

   wire 	      instr_nop;
   assign instr_nop = !set_im && !use_im && stack_same_size &&
		      instruction[11:0] == 12'd1;

   wire 	      instr_swap;
   assign instr_swap = !set_im && !use_im && stack_same_size &&
		      instruction[11:0] == 12'd2;

   wire 	      instr_set_sp;
   assign instr_set_sp = use_im && stack_same_size &&
			 instruction[11:7] == 5'd0;

   wire 	      instr_jump;
   assign instr_jump = use_im && stack_same_size &&
		       instruction[11:7] == 5'd1;

   wire 	      instr_add_sp;
   assign instr_add_sp = use_im && stack_same_size &&
			 instruction[11:7] == 5'd2;

   /* Instructions, that grow stack */
   wire 	      instr_tee;
   assign instr_tee = !set_im && !use_im && stack_grows &&
		      instruction[11:0] == 12'd0;

   wire 	      instr_get_frame;
   assign instr_get_frame = !set_im && !use_im && stack_grows &&
			    instruction[11:0] == 12'd1;

   wire 	      instr_const;
   assign instr_const = use_im && stack_grows &&
			instruction[11:7] == 5'd0;

   wire 	      instr_call;
   assign instr_call = use_im && stack_grows &&
		       instruction[11:7] == 5'd1;

   /* Instructions, that shrink stack */
   wire 	      instr_add;
   assign instr_add = !set_im && !use_im && stack_shrinks_by_1 &&
		      instruction[11:0] == 12'd0;

   wire 	      instr_sub;
   assign instr_sub = !set_im && !use_im && stack_shrinks_by_1 &&
		      instruction[11:0] == 12'd1;

   wire 	      instr_udiv;
   assign instr_udiv = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd2;

   wire 	      instr_mul;
   assign instr_mul = !set_im && !use_im && stack_shrinks_by_1 &&
		      instruction[11:0] == 12'd3;

   wire 	      instr_drop;
   assign instr_drop = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd4;

   wire 	      instr_eq;
   assign instr_eq = !set_im && !use_im && stack_shrinks_by_1 &&
		     instruction[11:0] == 12'd7;

   wire 	      instr_lt;
   assign instr_lt = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd8;

   wire 	      instr_ult;
   assign instr_ult = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd9;

   wire 	      instr_le;
   assign instr_le = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd10;

   wire 	      instr_ule;
   assign instr_ule = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd11;

   wire 	      instr_gt;
   assign instr_gt = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd12;

   wire 	      instr_ugt;
   assign instr_ugt = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd13;

   wire 	      instr_ge;
   assign instr_ge = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd14;

   wire 	      instr_uge;
   assign instr_uge = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd15;

   wire 	      instr_urem;
   assign instr_urem = !set_im && !use_im && stack_shrinks_by_1 &&
		       instruction[11:0] == 12'd16;

   wire 	      instr_ret;
   assign instr_ret = !set_im && !use_im && stack_shrinks_by_1 &&
		      instruction[11:0] == 12'b000010000000;

   wire 	      instr_cond_jump;
   assign instr_cond_jump = use_im && stack_shrinks_by_1 &&
			    instruction[11:7] == 5'd1;

   wire 	      instr_cond_jump_n;
   assign instr_cond_jump_n = use_im && stack_shrinks_by_1 &&
			      instruction[11:7] == 5'd2;


   reg 		      halt; /* Set once a halt instruction is encountered */
   assign finished = halt;


   /* module for division */
   wire [31:0] 	      div_quotient;
   wire [31:0] 	      div_remainder;
   wire 	      div_done;

   div
     #(
       .WIDTH(32)
       ) div
   (
    .clock(CLK_I),
    .start(step == STEP_EXECUTING && first_execution_tick),
    .dividend(r0),
    .divisor(r1),

    .quotient(div_quotient),
    .remainder(div_remainder),
    .done(div_done)
    );


   reg 		      arithmetic_uncompleted;
   wire 	      arithmetic_completes;
   assign arithmetic_completes = instr_udiv || instr_urem ? div_done :
				 instr_halt ? 0 :
				 1;


   always @*
     I_ADR_O = pc / 2;

   reg 		      instruction_requested;

   reg [31:0] 	      stack_put_value;

   reg 		      load_store_unrequested;
   reg [1:0] 	      stack_transfer_unrequested;

   wire 	      data_request_happens;
   wire [1:0] 	      stack_transfer_request_happens;
   assign data_request_happens = D_STB_O && !D_STALL_I;
   assign stack_transfer_request_happens[0] = !load_store_unrequested &&
					      data_request_happens;
   assign stack_transfer_request_happens[1] = !load_store_unrequested &&
					      !stack_transfer_unrequested[0] &&
					      data_request_happens;

   reg 		      load_store_uncompleted;
   reg [1:0] 	      stack_transfer_uncompleted;

   wire 	      data_command_completes;
   wire [1:0] 	      stack_transfer_completes;
   assign data_command_completes = D_ACK_I && D_CYC_O;
   assign stack_transfer_completes[0] = !load_store_uncompleted &&
					data_command_completes;
   assign stack_transfer_completes[1] = !load_store_uncompleted &&
					!stack_transfer_uncompleted[0] &&
					data_command_completes;

   always @ (posedge CLK_I) begin
      if (RST_I) begin
	 `SET_PC(0);
	 `SET_SP(21'h0FFFFC);

	 I_STB_O <= 0;
	 I_CYC_O <= 0;

	 step <= STEP_LOADING_INSTRUCTION;
	 instruction_requested <= 0;

	 stack_put_value <= 31'bx;

	 D_ADR_O <= 21'bx;
	 D_DAT_O <= 32'bx;
	 D_SEL_O <= 4'bx;
	 D_STB_O <= 0;
	 D_CYC_O <= 0;
	 D_WE_O <= 0;

	 halt <= 0;
      end else begin // if (RST_I)
	 case (step)
	   STEP_LOADING_INSTRUCTION : begin
	      instruction <= I_DAT_I;

	      if (I_STB_O && !I_STALL_I)
		instruction_requested <= 1;

	      I_STB_O <= !instruction_requested && !(I_STB_O && !I_STALL_I);
	      I_CYC_O <= 1;

	      if (I_CYC_O && I_ACK_I) begin
		 instruction_requested <= 0;

		 `SET_PC(pc + 2);

		 step <= STEP_EXECUTING;
		 I_CYC_O <= 0;
	      end

	      arithmetic_uncompleted <= 1;

	      first_execution_tick <= 1;
	      load_store_unrequested <= 0;
	      stack_transfer_unrequested <= 2'b0;
 	      load_store_uncompleted <= 0;
	      stack_transfer_uncompleted <= 2'b0;
	   end // case: STEP_LOADING_INSTRUCTION
	   STEP_EXECUTING : begin
	      first_execution_tick <= 0;

	      if (arithmetic_completes)
		arithmetic_uncompleted <= 0;

	      if (((stack_grows || stack_shrinks || load || store) &&
		   first_execution_tick) ||
		  (load_store_uncompleted &&
		   !data_command_completes) ||
		  (stack_transfer_uncompleted[1] &&
		   !stack_transfer_completes[1]) ||
		  (arithmetic_uncompleted &&
		   !arithmetic_completes)) begin
		 step <= STEP_EXECUTING; /* Remain where we are */
	      end else begin
		 step <= STEP_LOADING_INSTRUCTION;

		 I_STB_O <= 1;
		 I_CYC_O <= 1;

		 D_CYC_O <= 0;
	      end

	      if (first_execution_tick) begin
		 if (load || store) begin
		    load_store_unrequested <= 1;
		    load_store_uncompleted <= 1;
		 end

		 if (stack_shrinks_by_2) begin
		    stack_transfer_unrequested <= 2'b11;
		    stack_transfer_uncompleted <= 2'b11;
		 end else if (stack_grows || stack_shrinks) begin
		    stack_transfer_unrequested <= 2'b10;
		    stack_transfer_uncompleted <= 2'b10;
		 end
	      end

	      if (first_execution_tick) begin
		 if (load) begin
		    D_ADR_O <= addr_to_use;
		    D_DAT_O <= 32'bx;
		    D_SEL_O <= instruction_select_mask;
		    D_STB_O <= 1;
		    D_CYC_O <= 1;
		    D_WE_O <= 0;
		 end else if (store) begin
		    D_ADR_O <= addr_to_use;
		    D_DAT_O <= r1;
		    D_SEL_O <= instruction_select_mask;
		    D_STB_O <= 1;
		    D_CYC_O <= 1;
		    D_WE_O <= 1;
		 end else if (stack_shrinks) begin
		    `SET_SP(sp + 4);
		    D_ADR_O <= sp;
		    D_DAT_O <= 32'bx;
		    D_SEL_O <= 4'b1111;
		    D_STB_O <= 1;
		    D_CYC_O <= 1;
		    D_WE_O <= 0;
		 end else if (stack_grows) begin
		    `SET_SP(sp - 4);
		    D_ADR_O <= sp - 4;
		    D_DAT_O <= r0;
		    D_SEL_O <= 4'b1111;
		    D_STB_O <= 1;
		    D_CYC_O <= 1;
		    D_WE_O <= 1;
		 end

		 /*
		  * If we want to offload value to memory because of stack
		  * growth, we may need to wait for load or store to complete
		  * first. In such case we need to back up the stack value.
		  */
		 stack_put_value <= r0;
	      end // if (first_execution_tick)

	      if (data_request_happens) begin
		 if (load_store_unrequested) begin
		    load_store_unrequested <= 0;
		 end else begin
		    stack_transfer_unrequested
		      <= {stack_transfer_unrequested[0], 1'b0};
		 end

		 if (stack_transfer_unrequested[0] ||
		     (load_store_unrequested &&
		      stack_transfer_unrequested[1])) begin
		    if (stack_shrinks) begin
		       `SET_SP(sp + 4);
		       D_ADR_O <= sp;
		       D_DAT_O <= 32'bx;
		       D_SEL_O <= 4'b1111;
		       D_STB_O <= 1;
		       D_WE_O <= 0;
		    end else /* if (stack_grows) */ begin
		       `SET_SP(sp - 4);
		       D_ADR_O <= sp - 4;
		       D_DAT_O <= stack_put_value;
		       D_SEL_O <= 4'b1111;
		       D_STB_O <= 1;
		       D_WE_O <= 1;
		    end
		 end else begin // if (stack_transfer_unrequested[0] ||...
		    D_ADR_O <= 21'bx;
		    D_DAT_O <= 32'bx;
		    D_SEL_O <= 4'bx;
		    D_STB_O <= 0;
		    D_WE_O <= 0;
		 end // else: !if(stack_transfer_unrequested[0] ||...
	      end // if (data_request_happens)

	      if (data_command_completes) begin
		 if (load_store_uncompleted) begin
		    load_store_uncompleted <= 0;
		 end else begin
		    stack_transfer_uncompleted
		      <= {stack_transfer_uncompleted[0], 1'b0};
		 end

		 if (!(load_store_uncompleted ||
		       stack_transfer_uncompleted[0]))
		   D_CYC_O <= 0;
	      end

	      if (stack_shrinks && stack_transfer_completes)
		r0 <= D_DAT_I;

	      if (store)
		r1 <= r0;

	      if (stack_grows && first_execution_tick)
		r0 <= r1;

	      if (load && load_store_uncompleted) begin
		 if (byte_operation)
		   r1 <= {{24{loaded_value_sign}}, D_DAT_I[7:0]};
		 else if (word_operation)
		   r1 <= {{16{loaded_value_sign}}, D_DAT_I[15:0]};
		 else
		   r1 <= D_DAT_I;
	      end

	      if (!first_execution_tick && use_im)
		im <= 32'bx;

	      im_initialized <= set_im;

	      if (set_im || use_im)
		im <= im_effective;
	      else
		im <= 32'bx;

	      /* Instructions, that do not change stack size */
	      if (instr_halt)
		halt <= 1;

	      if (instr_nop)
		r1 <= r1;

	      if (instr_swap)
		{r0, r1} <= {r1, r0};

	      if (instr_set_sp)
		`SET_SP(im_effective);

	      if (instr_add_sp)
		`SET_SP(im_effective + sp);

	      if (instr_jump)
		`SET_PC(im_effective);

	      /* Instructions, that grow stack */
	      if (instr_tee)
		r1 <= r1;

	      if (instr_get_frame && first_execution_tick)
		r1 <= sp;

	      if (instr_const && first_execution_tick)
		r1 <= im_effective;

	      if (instr_call && first_execution_tick) begin
		 r1 <= pc;
		 `SET_PC(im_effective);
	      end

	      /* Instructions, that shrink stack */
	      if (instr_add && arithmetic_uncompleted)
		r1 <= r0 + r1;

	      if (instr_sub && arithmetic_uncompleted)
		r1 <= r0 - r1;

	      if (instr_udiv && arithmetic_uncompleted)
		r1 <= div_quotient;

	      if (instr_urem && arithmetic_uncompleted)
		r1 <= div_remainder;

	      if (instr_mul && arithmetic_uncompleted)
		r1 <= r0 * r1;

	      if (instr_drop && arithmetic_uncompleted)
		r1 <= r0;

	      if ((instr_cond_jump || instr_cond_jump_n) &&
		  arithmetic_uncompleted) begin
		 r1 <= r0;

		 if ((r1 && instr_cond_jump) ||
		     (!r1 && instr_cond_jump_n))
		   `SET_PC(im_effective);
	      end

	      if (instr_eq && arithmetic_uncompleted)
		r1 <= r0 == r1;

	      if (instr_lt && arithmetic_uncompleted)
		r1 <= r0s < r1s;

	      if (instr_ult && arithmetic_uncompleted)
		r1 <= r0 < r1;

	      if (instr_le && arithmetic_uncompleted)
		r1 <= r0s <= r1s;

	      if (instr_ule && arithmetic_uncompleted)
		r1 <= r0 <= r1;

	      if (instr_gt && arithmetic_uncompleted)
		r1 <= r0s > r1s;

	      if (instr_ugt && arithmetic_uncompleted)
		r1 <= r0 > r1;

	      if (instr_ge && arithmetic_uncompleted)
		r1 <= r0s >= r1s;

	      if (instr_uge && arithmetic_uncompleted)
		r1 <= r0 >= r1;

	      if (instr_ret && arithmetic_uncompleted) begin
		 r1 <= r0;
		 `SET_PC(r1);
	      end

	      if (first_execution_tick) begin
		 `DBG(("r0: %x    r1: %x", r0, r1));
		 `DBG(("CPU: Executing %0s instruction",
		       store             ? "store (kind?)" :
		       load              ? "load (kind?)" :
		       instr_halt        ? "halt" :
		       instr_nop         ? "nop" :
		       instr_swap        ? "swap" :
		       instr_set_sp      ? "set_sp" :
		       instr_jump        ? "jump" :
		       instr_add_sp      ? "add_sp" :
		       instr_tee         ? "tee" :
		       instr_get_frame   ? "get_frame" :
		       instr_const       ? "const" :
		       instr_call        ? "call" :
		       instr_add         ? "add" :
		       instr_sub         ? "sub" :
		       instr_udiv        ? "udiv" :
		       instr_mul         ? "mul" :
		       instr_drop        ? "drop" :
		       instr_eq          ? "eq" :
		       instr_lt          ? "lt" :
		       instr_ult         ? "ult" :
		       instr_le          ? "le" :
		       instr_ule         ? "ule" :
		       instr_gt          ? "gt" :
		       instr_ugt         ? "ugt" :
		       instr_ge          ? "ge" :
		       instr_uge         ? "uge" :
		       instr_urem        ? "urem" :
		       instr_ret         ? "ret" :
		       instr_cond_jump   ? "cond_jump" :
		       instr_cond_jump_n ? "cond_jump_n" :
		       set_im            ? "im" :
		       "unknown"));
	      end // if (first_execution_tick)
	   end // case: STEP_EXECUTING
	 endcase // case (step)
      end // else: !if(RST_I)
   end // always @ (posedge CLK_I)

`ifdef SIMULATION
   /*
    * RST should still be used when powering up, even in benches;
    * this is just to avoid undefined values
    */
   initial begin
      I_ADR_O <= 0;
      I_STB_O <= 0;
      I_CYC_O <= 0;

      D_ADR_O <= 0;
      D_DAT_O <= 0;
      D_STB_O <= 0;
      D_CYC_O <= 0;
      D_WE_O <= 0;

      `SET_PC(0);
      `SET_SP(0);

      r0 <= 0;
      r1 <= 0;
      im <= 0;

      im_initialized <= 0;

      step <= 0;
      first_execution_tick <= 0;

      instruction <= 0;

      halt <= 0;

      instruction_requested <= 0;

      stack_put_value <= 0;

      load_store_unrequested <= 0;
      stack_transfer_unrequested <= 2'b0;

      load_store_uncompleted <= 0;
      stack_transfer_uncompleted <= 2'b0;
   end // initial begin
`endif
endmodule // stack_machine_new