From cdee092d6413ef0af53c63ac7eaf1d075be864ea Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Tue, 30 May 2023 13:04:20 +0200 Subject: [PATCH 01/38] Issue #814 correction. Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index 7d4c831ea..030d17052 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -1314,7 +1314,7 @@ SIMD ALU operations +------------------------------------------------------------+------------------------------------------------------------------+ | **cv.avgu[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = ((rs1[i] + op2[i]) & {0xFFFF, 0xFF}) >> 1 | | | | - | | Note: Logical shift right. | + | | Note: Immediate is zero-extended, shift is logical. | +------------------------------------------------------------+------------------------------------------------------------------+ | **cv.min[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = rs1[i] < op2[i] ? rs1[i] : op2[i] | +------------------------------------------------------------+------------------------------------------------------------------+ From f2a1997cfd2379ca09ed89795ec4eb25cec9c1e7 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 31 May 2023 15:38:38 +0200 Subject: [PATCH 02/38] Issue #813 correction. Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index 030d17052..e0988eb20 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -1331,14 +1331,29 @@ SIMD ALU operations | **cv.srl[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = rs1[i] >> op2[i] | | | | | | Note: Immediate is zero-extended, shift is logical. | + | | | + | | Only Imm6[3:0] and rs2[3:0] are used for .h instruction and | + | | Imm6[2:0] and rs2[2:0] for .b instruction. | + | | | + | | Other bits are not used and must be set to 0. | +------------------------------------------------------------+------------------------------------------------------------------+ | **cv.sra[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = rs1[i] >>> op2[i] | | | | | | Note: Immediate is zero-extended, shift is arithmetic. | + | | | + | | Only Imm6[3:0] and rs2[3:0] are used for .h instruction and | + | | Imm6[2:0] and rs2[2:0] for .b instruction. | + | | | + | | Other bits are not used and must be set to 0. | +------------------------------------------------------------+------------------------------------------------------------------+ | **cv.sll[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = rs1[i] << op2[i] | | | | | | Note: Immediate is zero-extended, shift is logical. | + | | | + | | Only Imm6[3:0] and rs2[3:0] are used for .h instruction and | + | | Imm6[2:0] and rs2[2:0] for .b instruction. | + | | | + | | Other bits are not used and must be set to 0. | +------------------------------------------------------------+------------------------------------------------------------------+ | **cv.or[.sc,.sci]{.h,.b} rD, rs1, [rs2, Imm6]** | rD[i] = rs1[i] \| op2[i] | +------------------------------------------------------------+------------------------------------------------------------------+ From 9998d8e22e671c36e1848025e53669ba967b918b Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 31 May 2023 16:57:30 +0200 Subject: [PATCH 03/38] Added pseudo-intructions section. Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 46 ++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index e0988eb20..e96dea7ae 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -17,10 +17,10 @@ .. _custom-isa-extensions: -CORE-V Instruction Set Custom Extension -======================================= +CORE-V Instruction Set Custom Extensions +======================================== -CV32E40P supports the following CORE-V ISA X Custom Extension, which can be enabled by setting ``COREV_PULP`` == 1. +CV32E40P supports the following CORE-V ISA X Custom Extensions, which can be enabled by setting ``COREV_PULP`` == 1. * Post-Increment load and stores, see :ref:`corev_load_store`, invoked in the tool chain with ``-march=rv32i*_xcvmem``. * Hardware Loop extension, see :ref:`corev_hardware_loop`, invoked in the tool chain with ``-march=rv32i*_xcvhwlp``. @@ -44,6 +44,17 @@ To use such instructions, you need to compile your SW with the CORE-V GCC or Cla Clang/LLVM assembler will be supported by 30 June 2023, with builtin function support by 31 December 2023. +.. _pseudo_instructions: + +Pseudo-instructions +------------------- + +This specification also includes documentation of some CORE-V pseudo-instructions. Pseudo-instructions are implemented in the assembler +that are similar to a base instruction but provides control information to the assembler as opposed to generating its base instruction. +This makes it easier to program as we gain clarity on the intention of the programmer. + + * 16-Bit x 16-Bit Multiplication pseudo-instructions, see :ref:`corev_16_bit_multiply_pseudo_instructions`. + .. _corev_load_store: Post-Increment Load & Store Instructions and Register-Register Load & Store Instructions @@ -1077,6 +1088,35 @@ The custom multiply-accumulate extensions are only supported if ``COREV_PULP`` = | | If Is3 is equal to 0, 2^(Is3-1) is equivalent to 0. | +-----------------------------------------------+------------------------------------------------------------------------------+ +.. _corev_16_bit_multiply_pseudo_instructions: + +16-Bit x 16-Bit Multiplication pseudo-instructions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. table:: 16-Bit Multiplication pseudo-instructions + :name: 16-Bit Multiplication pseudo-instructions + :widths: 23 27 50 + :class: no-scrollbar-table + + +-----------------------------------------+--------------------------------------------+--------------------------------------------------------------+ + | **Mnemonic** | **Base Instruction** | **Description** | + +=========================================+============================================+==============================================================+ + | **cv.mulu rD, rs1, rs2** | **cv.muluN rD, rs1, rs2, 0** | rD[31:0] = (Zext(rs1[15:0]) \* Zext(rs2[15:0])) >> 0 | + | | | | + | | | Note: Logical shift right. | + +-----------------------------------------+--------------------------------------------+--------------------------------------------------------------+ + | **cv.mulhhu rD, rs1, rs2** | **cv.mulhhuN rD, rs1, rs2, 0** | rD[31:0] = (Zext(rs1[31:16]) \* Zext(rs2[31:16])) >> 0 | + | | | | + | | | Note: Logical shift right. | + +-----------------------------------------+--------------------------------------------+--------------------------------------------------------------+ + | **cv.muls rD, rs1, rs2** | **cv.mulsN rD, rs1, rs2, 0** | rD[31:0] = (Sext(rs1[15:0]) \* Sext(rs2[15:0])) >> 0 | + | | | | + | | | Note: Arithmetic shift right. | + +-----------------------------------------+--------------------------------------------+--------------------------------------------------------------+ + | **cv.mulhhs rD, rs1, rs2** | **cv.mulhhsN rD, rs1, rs2, 0** | rD[31:0] = (Sext(rs1[31:16]) \* Sext(rs2[31:16])) >> 0 | + | | | | + | | | Note: Arithmetic shift right. | + +-----------------------------------------+--------------------------------------------+--------------------------------------------------------------+ + 16-Bit x 16-Bit Multiply-Accumulate operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 618e9ac1663b225d5956425a613f577c890c0b75 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 31 May 2023 16:58:13 +0200 Subject: [PATCH 04/38] Integrated clock gating cell section in Integration one. Signed-off-by: Pascal Gouedo --- docs/source/getting_started.rst | 45 --------------------------------- docs/source/integration.rst | 22 ++++++++++++++++ 2 files changed, 22 insertions(+), 45 deletions(-) delete mode 100644 docs/source/getting_started.rst diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst deleted file mode 100644 index 4774abe62..000000000 --- a/docs/source/getting_started.rst +++ /dev/null @@ -1,45 +0,0 @@ -.. - Copyright (c) 2023 OpenHW Group - - Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://solderpad.org/licenses/ - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - -.. _getting-started: - -Getting Started with CV32E40P -============================= - -This page discusses initial steps and requirements to start using CV32E40P in your design. - -.. _clock-gating-cell: - -Clock Gating Cell ------------------ - -CV32E40P requires clock gating cells. -These cells are usually specific to the selected target technology and thus not provided as part of the RTL design. -A simulation-only version of the clock gating cell is provided in ``cv32e40p_sim_clock_gate.sv``. This file contains -a module called ``cv32e40p_clock_gate`` that has the following ports: - -* ``clk_i``: Clock Input -* ``en_i``: Clock Enable Input -* ``scan_cg_en_i``: Scan Clock Gate Enable Input (activates the clock even though ``en_i`` is not set) -* ``clk_o``: Gated Clock Output - -Inside CV32E40P, clock gating cells are used in both ``cv32e40p_sleep_unit.sv`` and ``cv32e40p_top.sv``. - -The ``cv32e40p_sim_clock_gate.sv`` file is not intended for synthesis. For ASIC synthesis and FPGA synthesis the manifest -should be adapted to use a customer specific file that implements the ``cv32e40p_clock_gate`` module using design primitives -that are appropriate for the intended synthesis target technology. - diff --git a/docs/source/integration.rst b/docs/source/integration.rst index 253ae77fd..27b0238c9 100644 --- a/docs/source/integration.rst +++ b/docs/source/integration.rst @@ -203,3 +203,25 @@ Interfaces +-------------------------+--------------------------------------------------------------------------------+ | ``debug_*`` | Debug interface, see :ref:`debug-support` | +-------------------------+-------------------------+---------+--------------------------------------------+ + +.. _clock-gating-cell: + +Clock Gating Cell +----------------- + +CV32E40P requires clock gating cells. +These cells are usually specific to the selected target technology and thus not provided as part of the RTL design. +A simulation-only version of the clock gating cell is provided in ``cv32e40p_sim_clock_gate.sv``. This file contains +a module called ``cv32e40p_clock_gate`` that has the following ports: + +* ``clk_i``: Clock Input +* ``en_i``: Clock Enable Input +* ``scan_cg_en_i``: Scan Clock Gate Enable Input (activates the clock even though ``en_i`` is not set) +* ``clk_o``: Gated Clock Output + +Inside CV32E40P, clock gating cells are used in both ``cv32e40p_sleep_unit.sv`` and ``cv32e40p_top.sv``. + +The ``cv32e40p_sim_clock_gate.sv`` file is not intended for synthesis. For ASIC synthesis and FPGA synthesis the manifest +should be adapted to use a customer specific file that implements the ``cv32e40p_clock_gate`` module using design primitives +that are appropriate for the intended synthesis target technology. + From e3af7b117f2a66e9b95abc68efeb1a90f869933c Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 31 May 2023 16:59:06 +0200 Subject: [PATCH 05/38] Re-ordered section with ISA first then design details. Signed-off-by: Pascal Gouedo --- docs/source/corev_hw_loop.rst | 4 ++-- docs/source/index.rst | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/source/corev_hw_loop.rst b/docs/source/corev_hw_loop.rst index 11e422b31..848e63d88 100644 --- a/docs/source/corev_hw_loop.rst +++ b/docs/source/corev_hw_loop.rst @@ -17,8 +17,8 @@ .. _hwloop-specs: -CORE-V Hardware Loop Extensions -=============================== +CORE-V Hardware Loop feature +============================ To increase the efficiency of small loops, CV32E40P supports hardware loops (HWLoop). They can be enabled by setting the ``COREV_PULP`` parameter. diff --git a/docs/source/index.rst b/docs/source/index.rst index 5853a12b0..88a1283cb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -24,20 +24,19 @@ OpenHW Group CV32E40P User Manual preface intro - getting_started integration + fpu verification + corev_hw_loop + instruction_set_extensions + perf_counters + control_status_registers + exceptions_interrupts + debug pipeline instruction_fetch load_store_unit register_file - fpu sleep - corev_hw_loop - control_status_registers - perf_counters - exceptions_interrupts - debug - instruction_set_extensions core_versions glossary From b677dfad51f9446d94ce5bea0428eee7ba1902fc Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 17 May 2023 18:04:04 +0800 Subject: [PATCH 06/38] Correcting fpu instruction managment in rvfi --- bhv/cv32e40p_rvfi.sv | 165 +++++++++++++++++------- bhv/cv32e40p_tb_wrapper.sv | 25 ++-- bhv/insn_trace.sv | 15 ++- bhv/pipe_freeze_trace.sv | 248 +++++++++++++++++++------------------ 4 files changed, 272 insertions(+), 181 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index d003a98e5..1e60eacc8 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -98,6 +98,10 @@ module cv32e40p_rvfi input logic [31:0] ex_reg_wdata_i, input logic apu_en_ex_i, + input logic apu_singlecycle_i, + input logic apu_multicycle_i, + input logic wb_contention_lsu_i, + input logic wb_contention_i, input logic branch_in_ex_i, input logic branch_decision_ex_i, @@ -294,6 +298,7 @@ module cv32e40p_rvfi input logic [4:0] csr_fcsr_fflags_n_i, input logic [4:0] csr_fcsr_fflags_q_i, + input logic csr_fcsr_fflags_we_i, input logic [2:0] csr_fcsr_frm_n_i, input logic [2:0] csr_fcsr_frm_q_i, @@ -929,11 +934,59 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; //those event are for debug purpose event e_dev_send_wb_1, e_dev_send_wb_2; event e_dev_commit_rf_to_ex_1, e_dev_commit_rf_to_ex_2, e_dev_commit_rf_to_ex_3; + event e_ex_to_wb_1, e_ex_to_wb_2; //used to match memory response to memory request and corresponding instruction integer cnt_data_req, cnt_data_resp; integer cnt_apu_req, cnt_apu_resp; + insn_trace_t apu_trace_q[$]; + insn_trace_t trace_apu_req, trace_apu_resp; + + function void csr_to_apu_resp(); + `CSR_FROM_PIPE(apu_resp, fcsr) + `CSR_FROM_PIPE(apu_resp, fflags) + endfunction + + function void csr_to_apu_req(); + `CSR_FROM_PIPE(apu_req, misa) + `CSR_FROM_PIPE(apu_req, tdata1) + trace_apu_req.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; + trace_apu_req.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; + trace_apu_req.m_csr.tinfo_rmask = '1; + trace_apu_req.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; + trace_apu_req.m_csr.tinfo_wmask = '0; + + + `CSR_FROM_PIPE(apu_req, frm) + + + endfunction + + bit s_apu_to_alu_port; + bit s_apu_to_lsu_port; + + function void apu_resp(); + if (!r_pipe_freeze_trace.wb_contention_lsu) begin + if (apu_trace_q.size() > 0) begin + trace_apu_resp = apu_trace_q.pop_front(); + if (s_apu_to_alu_port) begin + if (r_pipe_freeze_trace.ex_reg_we) begin + trace_apu_resp.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; + trace_apu_resp.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; + end + end else if (s_apu_to_lsu_port) begin + if (r_pipe_freeze_trace.rf_we_wb) begin + trace_apu_resp.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + trace_apu_resp.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + end + end + csr_to_apu_resp(); + send_rvfi(trace_apu_resp); + end + end + endfunction + task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; @@ -942,6 +995,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_id_done; bit s_apu_wb_ok; + bit s_apu_0_cycle_reps; trace_if = new(); trace_id = new(); trace_ex = new(); @@ -951,6 +1005,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_id_done = 1'b0; s_apu_wb_ok = 1'b0; + s_apu_0_cycle_reps = 1'b0; next_send = 1; cnt_data_req = 0; @@ -997,6 +1052,22 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (r_pipe_freeze_trace.apu_rvalid) begin cnt_apu_resp = cnt_apu_resp + 1; + if (r_pipe_freeze_trace.apu_singlecycle | r_pipe_freeze_trace.apu_multicycle) begin + s_apu_to_alu_port = 1'b1; + s_apu_to_lsu_port = 1'b0; + end else begin + s_apu_to_lsu_port = 1'b1; + s_apu_to_alu_port = 1'b0; + end + end else begin + s_apu_to_lsu_port = 1'b0; + s_apu_to_alu_port = 1'b0; + end + + if(r_pipe_freeze_trace.apu_req && r_pipe_freeze_trace.apu_gnt && r_pipe_freeze_trace.apu_rvalid && (cnt_apu_resp == cnt_apu_req)) begin + s_apu_0_cycle_reps = 1'b1; + end else begin + s_apu_0_cycle_reps = 1'b0; end if (trace_ex.m_valid & s_wb_valid_adjusted) begin @@ -1011,54 +1082,41 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_apu_wb_ok = 1'b0; end - s_new_valid_insn = r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.is_decoding; + s_new_valid_insn = r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.is_decoding;// && !r_pipe_freeze_trace.apu_rvalid; - s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE); + s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE);// && !r_pipe_freeze_trace.apu_rvalid;; //WB_STAGE - if (trace_wb.m_valid) begin - if (trace_wb.m_is_apu) begin - if (s_apu_wb_ok | trace_wb.m_is_apu_ok) begin - if (s_apu_wb_ok && r_pipe_freeze_trace.apu_rvalid) begin //FPU is returnong valid data - if (r_pipe_freeze_trace.ex_reg_we) begin - trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; - trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; - end - end - - - send_rvfi(trace_wb); - trace_wb.m_valid = 1'b0; - end - end else begin - if (r_pipe_freeze_trace.rf_we_wb) begin - if((trace_wb.m_rd_addr[0] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin - trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - end else if (trace_wb.m_2_rd_insn && (trace_wb.m_rd_addr[1] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin - trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - end + if (r_pipe_freeze_trace.apu_rvalid && (apu_trace_q.size() > 0)) begin + apu_resp(); + end else if (trace_wb.m_valid) begin + if (r_pipe_freeze_trace.rf_we_wb) begin + if((trace_wb.m_rd_addr[0] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin + trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + end else if (trace_wb.m_2_rd_insn && (trace_wb.m_rd_addr[1] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin + trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; end + end - if (!trace_wb.m_data_missaligned) begin - send_rvfi(trace_wb); - ->e_dev_send_wb_1; - trace_wb.m_valid = 1'b0; - end else begin - if (s_wb_valid_adjusted) begin - if (r_pipe_freeze_trace.rf_we_wb) begin - if (!trace_wb.m_ex_fw) begin - trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - end - if (trace_wb.m_data_missaligned && !trace_wb.m_got_first_data) begin - trace_wb.m_got_first_data = 1'b1; - end else begin - send_rvfi(trace_wb); - ->e_dev_send_wb_2; - trace_wb.m_valid = 1'b0; - end + if (!trace_wb.m_data_missaligned) begin + send_rvfi(trace_wb); + ->e_dev_send_wb_1; + trace_wb.m_valid = 1'b0; + end else begin + if (s_wb_valid_adjusted) begin + if (r_pipe_freeze_trace.rf_we_wb) begin + if (!trace_wb.m_ex_fw) begin + trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + end + if (trace_wb.m_data_missaligned && !trace_wb.m_got_first_data) begin + trace_wb.m_got_first_data = 1'b1; + end else begin + send_rvfi(trace_wb); + ->e_dev_send_wb_2; + trace_wb.m_valid = 1'b0; end end end @@ -1074,6 +1132,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(ex, fflags) `CSR_FROM_PIPE(ex, frm) `CSR_FROM_PIPE(ex, fcsr) + trace_ex.m_csr.fflags_wmask = '0; + trace_ex.m_csr.frm_wmask = '0; + trace_ex.m_csr.fcsr_wmask = '0; if (s_wb_valid_adjusted) begin if (trace_wb.m_valid) begin @@ -1096,7 +1157,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (!s_ex_valid_adjusted & !trace_ex.m_csr.got_minstret) begin minstret_to_ex(); end - + ->e_ex_to_wb_1; trace_wb.move_down_pipe(trace_ex); trace_ex.m_valid = 1'b0; end @@ -1114,7 +1175,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end - s_ex_valid_adjusted = r_pipe_freeze_trace.ex_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE); + s_ex_valid_adjusted = r_pipe_freeze_trace.ex_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) && !r_pipe_freeze_trace.apu_rvalid; //EX_STAGE if (trace_id.m_valid) begin @@ -1135,8 +1196,15 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (r_pipe_freeze_trace.apu_req) begin trace_id.m_is_apu = 1'b1; trace_id.m_apu_req_id = cnt_apu_req; - if (r_pipe_freeze_trace.apu_rvalid && (cnt_apu_req == cnt_apu_resp)) begin - trace_id.m_is_apu_ok = 1'b1; + trace_apu_req = new(); + trace_apu_req.copy_full(trace_id); + csr_to_apu_req(); + trace_apu_req.set_to_apu(); + apu_trace_q.push_back(trace_apu_req); + trace_id.m_valid = 1'b0; + + if(r_pipe_freeze_trace.apu_rvalid && (cnt_apu_req == cnt_apu_resp)) begin//APU return in the same cycle + apu_resp(); end end @@ -1145,7 +1213,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; send_rvfi(trace_ex); trace_ex.m_valid = 1'b0; end - if (r_pipe_freeze_trace.ex_reg_we) begin + if (r_pipe_freeze_trace.ex_reg_we && !r_pipe_freeze_trace.apu_rvalid) begin trace_id.m_ex_fw = 1'b1; trace_id.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; @@ -1194,6 +1262,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_wb.m_valid) begin send_rvfi(trace_ex); end else begin + ->e_ex_to_wb_2; trace_wb.move_down_pipe(trace_ex); end trace_ex.m_valid = 1'b0; diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index 52aa271db..3ae3c4c5f 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -263,12 +263,16 @@ module cv32e40p_tb_wrapper .debug_csr_save_i (cv32e40p_top_i.core_i.debug_csr_save), //// EX probes //// - .ex_valid_i (cv32e40p_top_i.core_i.ex_valid), - .ex_ready_i (cv32e40p_top_i.core_i.ex_ready), - .ex_reg_addr_i (cv32e40p_top_i.core_i.regfile_alu_waddr_fw), - .ex_reg_we_i (cv32e40p_top_i.core_i.regfile_alu_we_fw), - .ex_reg_wdata_i(cv32e40p_top_i.core_i.regfile_alu_wdata_fw), - .apu_en_ex_i (cv32e40p_top_i.core_i.apu_en_ex), + .ex_valid_i (cv32e40p_top_i.core_i.ex_valid), + .ex_ready_i (cv32e40p_top_i.core_i.ex_ready), + .ex_reg_addr_i (cv32e40p_top_i.core_i.regfile_alu_waddr_fw), + .ex_reg_we_i (cv32e40p_top_i.core_i.regfile_alu_we_fw), + .ex_reg_wdata_i (cv32e40p_top_i.core_i.regfile_alu_wdata_fw), + .apu_en_ex_i (cv32e40p_top_i.core_i.apu_en_ex), + .apu_singlecycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_singlecycle), + .apu_multicycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_multicycle), + .wb_contention_lsu_i(cv32e40p_top_i.core_i.ex_stage_i.wb_contention_lsu), + .wb_contention_i (cv32e40p_top_i.core_i.ex_stage_i.wb_contention), // .rf_we_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_we_fw_i), // .rf_addr_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_waddr_fw_i), @@ -387,10 +391,11 @@ module cv32e40p_tb_wrapper }), //TODO: get this from the design instead of the pkg .csr_marchid_i(MARCHID), //TODO: get this from the design instead of the pkg - .csr_fcsr_fflags_n_i(cv32e40p_top_i.core_i.cs_registers_i.fflags_n), - .csr_fcsr_fflags_q_i(cv32e40p_top_i.core_i.cs_registers_i.fflags_q), - .csr_fcsr_frm_n_i (cv32e40p_top_i.core_i.cs_registers_i.frm_n), - .csr_fcsr_frm_q_i (cv32e40p_top_i.core_i.cs_registers_i.frm_q) + .csr_fcsr_fflags_n_i (cv32e40p_top_i.core_i.cs_registers_i.fflags_n), + .csr_fcsr_fflags_q_i (cv32e40p_top_i.core_i.cs_registers_i.fflags_q), + .csr_fcsr_fflags_we_i(cv32e40p_top_i.core_i.cs_registers_i.fflags_we_i), + .csr_fcsr_frm_n_i (cv32e40p_top_i.core_i.cs_registers_i.frm_n), + .csr_fcsr_frm_q_i (cv32e40p_top_i.core_i.cs_registers_i.frm_q) ); `endif diff --git a/bhv/insn_trace.sv b/bhv/insn_trace.sv index 46815eb09..65a3dc2d6 100644 --- a/bhv/insn_trace.sv +++ b/bhv/insn_trace.sv @@ -108,7 +108,7 @@ } m_csr; enum logic[2:0] { - IF, ID, EX, WB, WB_2 + IF, ID, EX, WB, WB_2, APU } m_stage; @@ -269,10 +269,15 @@ function void move_down_pipe(insn_trace_t m_source); this.copy_full(m_source); case(this.m_stage) - IF: this.m_stage = ID; - ID: this.m_stage = EX; - EX: this.m_stage = WB; - WB: this.m_stage = WB_2; + IF : this.m_stage = ID; + ID : this.m_stage = EX; + EX : this.m_stage = WB; + WB : this.m_stage = WB_2; + APU: this.m_stage = APU; endcase endfunction + + function void set_to_apu(); + this.m_stage = APU; + endfunction endclass diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index ff6b477d9..dcad1677a 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -89,6 +89,10 @@ typedef struct { logic [31:0] ex_reg_wdata; logic apu_en_ex; + logic apu_singlecycle; + logic apu_multicycle; + logic wb_contention_lsu; + logic wb_contention; logic branch_in_ex; logic branch_decision_ex; @@ -352,149 +356,153 @@ task monitor_pipeline(); wait(clk_i_d == 1'b0 & rst_ni == 1'b1); // r_pipe_freeze_trace. <= ; - r_pipe_freeze_trace.instr_req = instr_req_i; - r_pipe_freeze_trace.instr_grant = instr_grant_i; - r_pipe_freeze_trace.instr_rvalid = instr_rvalid_i; - r_pipe_freeze_trace.is_decoding = is_decoding_i; - r_pipe_freeze_trace.is_illegal = is_illegal_i; - r_pipe_freeze_trace.trigger_match = trigger_match_i; - r_pipe_freeze_trace.data_misaligned = data_misaligned_i; - r_pipe_freeze_trace.lsu_data_we_ex = lsu_data_we_ex_i; - - r_pipe_freeze_trace.debug_mode = debug_mode_i; - r_pipe_freeze_trace.debug_cause = debug_cause_i; - r_pipe_freeze_trace.prefetch_req = prefetch_req_i; - r_pipe_freeze_trace.pc_set = pc_set_i; + r_pipe_freeze_trace.instr_req = instr_req_i; + r_pipe_freeze_trace.instr_grant = instr_grant_i; + r_pipe_freeze_trace.instr_rvalid = instr_rvalid_i; + r_pipe_freeze_trace.is_decoding = is_decoding_i; + r_pipe_freeze_trace.is_illegal = is_illegal_i; + r_pipe_freeze_trace.trigger_match = trigger_match_i; + r_pipe_freeze_trace.data_misaligned = data_misaligned_i; + r_pipe_freeze_trace.lsu_data_we_ex = lsu_data_we_ex_i; + + r_pipe_freeze_trace.debug_mode = debug_mode_i; + r_pipe_freeze_trace.debug_cause = debug_cause_i; + r_pipe_freeze_trace.prefetch_req = prefetch_req_i; + r_pipe_freeze_trace.pc_set = pc_set_i; //// IF probes //// - r_pipe_freeze_trace.if_valid = if_valid_i; - r_pipe_freeze_trace.if_ready = if_ready_i; - r_pipe_freeze_trace.instr_valid_if = instr_valid_if_i; - r_pipe_freeze_trace.instr_if = instr_if_i; - r_pipe_freeze_trace.pc_if = pc_if_i; - r_pipe_freeze_trace.instr_pmp_err_if = instr_pmp_err_if_i; - - r_pipe_freeze_trace.instr_valid_id = instr_valid_id_i; - r_pipe_freeze_trace.instr_rdata_id = instr_rdata_id_i; - r_pipe_freeze_trace.is_fetch_failed_id = is_fetch_failed_id_i; - r_pipe_freeze_trace.instr_req_int = instr_req_int_i; - r_pipe_freeze_trace.clear_instr_valid = clear_instr_valid_i; + r_pipe_freeze_trace.if_valid = if_valid_i; + r_pipe_freeze_trace.if_ready = if_ready_i; + r_pipe_freeze_trace.instr_valid_if = instr_valid_if_i; + r_pipe_freeze_trace.instr_if = instr_if_i; + r_pipe_freeze_trace.pc_if = pc_if_i; + r_pipe_freeze_trace.instr_pmp_err_if = instr_pmp_err_if_i; + + r_pipe_freeze_trace.instr_valid_id = instr_valid_id_i; + r_pipe_freeze_trace.instr_rdata_id = instr_rdata_id_i; + r_pipe_freeze_trace.is_fetch_failed_id = is_fetch_failed_id_i; + r_pipe_freeze_trace.instr_req_int = instr_req_int_i; + r_pipe_freeze_trace.clear_instr_valid = clear_instr_valid_i; //// ID probes //// - r_pipe_freeze_trace.pc_id = pc_id_i; - r_pipe_freeze_trace.id_valid = id_valid_i; - - r_pipe_freeze_trace.id_ready = id_ready_i; - r_pipe_freeze_trace.rf_re_id = rf_re_id_i; - r_pipe_freeze_trace.sys_en_id = sys_en_id_i; - r_pipe_freeze_trace.sys_mret_insn_id = sys_mret_insn_id_i; - r_pipe_freeze_trace.jump_in_id = jump_in_id_i; - r_pipe_freeze_trace.jump_target_id = jump_target_id_i; - r_pipe_freeze_trace.is_compressed_id = is_compressed_id_i; - r_pipe_freeze_trace.ebrk_insn_dec = ebrk_insn_dec_i; - r_pipe_freeze_trace.csr_cause = csr_cause_i; - r_pipe_freeze_trace.debug_csr_save = debug_csr_save_i; + r_pipe_freeze_trace.pc_id = pc_id_i; + r_pipe_freeze_trace.id_valid = id_valid_i; + + r_pipe_freeze_trace.id_ready = id_ready_i; + r_pipe_freeze_trace.rf_re_id = rf_re_id_i; + r_pipe_freeze_trace.sys_en_id = sys_en_id_i; + r_pipe_freeze_trace.sys_mret_insn_id = sys_mret_insn_id_i; + r_pipe_freeze_trace.jump_in_id = jump_in_id_i; + r_pipe_freeze_trace.jump_target_id = jump_target_id_i; + r_pipe_freeze_trace.is_compressed_id = is_compressed_id_i; + r_pipe_freeze_trace.ebrk_insn_dec = ebrk_insn_dec_i; + r_pipe_freeze_trace.csr_cause = csr_cause_i; + r_pipe_freeze_trace.debug_csr_save = debug_csr_save_i; // LSU - r_pipe_freeze_trace.lsu_en_id = lsu_en_id_i; - r_pipe_freeze_trace.lsu_we_id = lsu_we_id_i; - r_pipe_freeze_trace.lsu_size_id = lsu_size_id_i; + r_pipe_freeze_trace.lsu_en_id = lsu_en_id_i; + r_pipe_freeze_trace.lsu_we_id = lsu_we_id_i; + r_pipe_freeze_trace.lsu_size_id = lsu_size_id_i; // Register reads - r_pipe_freeze_trace.rs1_addr_id = rs1_addr_id_i; - r_pipe_freeze_trace.rs2_addr_id = rs2_addr_id_i; - r_pipe_freeze_trace.operand_a_fw_id = operand_a_fw_id_i; - r_pipe_freeze_trace.operand_b_fw_id = operand_b_fw_id_i; + r_pipe_freeze_trace.rs1_addr_id = rs1_addr_id_i; + r_pipe_freeze_trace.rs2_addr_id = rs2_addr_id_i; + r_pipe_freeze_trace.operand_a_fw_id = operand_a_fw_id_i; + r_pipe_freeze_trace.operand_b_fw_id = operand_b_fw_id_i; //// EX probes //// // Register writes in EX - r_pipe_freeze_trace.ex_ready = ex_ready_i; - r_pipe_freeze_trace.ex_valid = ex_valid_i; - - r_pipe_freeze_trace.ex_reg_we = ex_reg_we_i; - r_pipe_freeze_trace.ex_reg_addr = ex_reg_addr_i; - r_pipe_freeze_trace.ex_reg_wdata = ex_reg_wdata_i; - - r_pipe_freeze_trace.apu_en_ex = apu_en_ex_i; - - r_pipe_freeze_trace.branch_in_ex = branch_in_ex_i; - r_pipe_freeze_trace.branch_decision_ex = branch_decision_ex_i; - r_pipe_freeze_trace.dret_in_ex = dret_in_ex_i; + r_pipe_freeze_trace.ex_ready = ex_ready_i; + r_pipe_freeze_trace.ex_valid = ex_valid_i; + + r_pipe_freeze_trace.ex_reg_we = ex_reg_we_i; + r_pipe_freeze_trace.ex_reg_addr = ex_reg_addr_i; + r_pipe_freeze_trace.ex_reg_wdata = ex_reg_wdata_i; + + r_pipe_freeze_trace.apu_en_ex = apu_en_ex_i; + r_pipe_freeze_trace.apu_singlecycle = apu_singlecycle_i; + r_pipe_freeze_trace.apu_multicycle = apu_multicycle_i; + r_pipe_freeze_trace.wb_contention_lsu = wb_contention_lsu_i; + r_pipe_freeze_trace.wb_contention = wb_contention_i; + + r_pipe_freeze_trace.branch_in_ex = branch_in_ex_i; + r_pipe_freeze_trace.branch_decision_ex = branch_decision_ex_i; + r_pipe_freeze_trace.dret_in_ex = dret_in_ex_i; // LSU - r_pipe_freeze_trace.lsu_en_ex = lsu_en_ex_i; - r_pipe_freeze_trace.lsu_pmp_err_ex = lsu_pmp_err_ex_i; + r_pipe_freeze_trace.lsu_en_ex = lsu_en_ex_i; + r_pipe_freeze_trace.lsu_pmp_err_ex = lsu_pmp_err_ex_i; r_pipe_freeze_trace.lsu_pma_err_atomic_ex = lsu_pma_err_atomic_ex_i; - r_pipe_freeze_trace.branch_target_ex = branch_target_ex_i; + r_pipe_freeze_trace.branch_target_ex = branch_target_ex_i; - r_pipe_freeze_trace.data_addr_ex = data_addr_ex_i; - r_pipe_freeze_trace.data_wdata_ex = data_wdata_ex_i; - r_pipe_freeze_trace.lsu_split_q_ex = lsu_split_q_ex_i; + r_pipe_freeze_trace.data_addr_ex = data_addr_ex_i; + r_pipe_freeze_trace.data_wdata_ex = data_wdata_ex_i; + r_pipe_freeze_trace.lsu_split_q_ex = lsu_split_q_ex_i; //// WB probes //// - r_pipe_freeze_trace.pc_wb = pc_wb_i; - r_pipe_freeze_trace.wb_ready = wb_ready_i; - r_pipe_freeze_trace.wb_valid = wb_valid_i; - r_pipe_freeze_trace.ebreak_in_wb = ebreak_in_wb_i; - r_pipe_freeze_trace.instr_rdata_wb = instr_rdata_wb_i; - r_pipe_freeze_trace.csr_en_wb = csr_en_wb_i; - r_pipe_freeze_trace.sys_wfi_insn_wb = sys_wfi_insn_wb_i; + r_pipe_freeze_trace.pc_wb = pc_wb_i; + r_pipe_freeze_trace.wb_ready = wb_ready_i; + r_pipe_freeze_trace.wb_valid = wb_valid_i; + r_pipe_freeze_trace.ebreak_in_wb = ebreak_in_wb_i; + r_pipe_freeze_trace.instr_rdata_wb = instr_rdata_wb_i; + r_pipe_freeze_trace.csr_en_wb = csr_en_wb_i; + r_pipe_freeze_trace.sys_wfi_insn_wb = sys_wfi_insn_wb_i; // Register writes - r_pipe_freeze_trace.rf_we_wb = rf_we_wb_i; - r_pipe_freeze_trace.rf_addr_wb = rf_addr_wb_i; - r_pipe_freeze_trace.rf_wdata_wb = rf_wdata_wb_i; + r_pipe_freeze_trace.rf_we_wb = rf_we_wb_i; + r_pipe_freeze_trace.rf_addr_wb = rf_addr_wb_i; + r_pipe_freeze_trace.rf_wdata_wb = rf_wdata_wb_i; // LSU - r_pipe_freeze_trace.lsu_rdata_wb = lsu_rdata_wb_i; - - r_pipe_freeze_trace.data_we_ex = data_we_ex_i; - r_pipe_freeze_trace.data_atop_ex = data_atop_ex_i; - r_pipe_freeze_trace.data_type_ex = data_type_ex_i; - r_pipe_freeze_trace.alu_operand_c_ex = alu_operand_c_ex_i; - r_pipe_freeze_trace.data_reg_offset_ex = data_reg_offset_ex_i; - r_pipe_freeze_trace.data_load_event_ex = data_load_event_ex_i; - r_pipe_freeze_trace.data_sign_ext_ex = data_sign_ext_ex_i; - r_pipe_freeze_trace.lsu_rdata = lsu_rdata_i; - r_pipe_freeze_trace.data_req_ex = data_req_ex_i; - r_pipe_freeze_trace.alu_operand_a_ex = alu_operand_a_ex_i; - r_pipe_freeze_trace.alu_operand_b_ex = alu_operand_b_ex_i; - r_pipe_freeze_trace.useincr_addr_ex = useincr_addr_ex_i; - r_pipe_freeze_trace.data_misaligned_ex = data_misaligned_ex_i; - r_pipe_freeze_trace.p_elw_start = p_elw_start_i; - r_pipe_freeze_trace.p_elw_finish = p_elw_finish_i; - r_pipe_freeze_trace.lsu_ready_ex = lsu_ready_ex_i; - r_pipe_freeze_trace.lsu_ready_wb = lsu_ready_wb_i; - - r_pipe_freeze_trace.data_req_pmp = data_req_pmp_i; - r_pipe_freeze_trace.data_gnt_pmp = data_gnt_pmp_i; - r_pipe_freeze_trace.data_rvalid = data_rvalid_i; - r_pipe_freeze_trace.data_err_pmp = data_err_pmp_i; - r_pipe_freeze_trace.data_addr_pmp = data_addr_pmp_i; - r_pipe_freeze_trace.data_we = data_we_i; - r_pipe_freeze_trace.data_atop = data_atop_i; - r_pipe_freeze_trace.data_be = data_be_i; - r_pipe_freeze_trace.data_wdata = data_wdata_i; - r_pipe_freeze_trace.data_rdata = data_rdata_i; + r_pipe_freeze_trace.lsu_rdata_wb = lsu_rdata_wb_i; + + r_pipe_freeze_trace.data_we_ex = data_we_ex_i; + r_pipe_freeze_trace.data_atop_ex = data_atop_ex_i; + r_pipe_freeze_trace.data_type_ex = data_type_ex_i; + r_pipe_freeze_trace.alu_operand_c_ex = alu_operand_c_ex_i; + r_pipe_freeze_trace.data_reg_offset_ex = data_reg_offset_ex_i; + r_pipe_freeze_trace.data_load_event_ex = data_load_event_ex_i; + r_pipe_freeze_trace.data_sign_ext_ex = data_sign_ext_ex_i; + r_pipe_freeze_trace.lsu_rdata = lsu_rdata_i; + r_pipe_freeze_trace.data_req_ex = data_req_ex_i; + r_pipe_freeze_trace.alu_operand_a_ex = alu_operand_a_ex_i; + r_pipe_freeze_trace.alu_operand_b_ex = alu_operand_b_ex_i; + r_pipe_freeze_trace.useincr_addr_ex = useincr_addr_ex_i; + r_pipe_freeze_trace.data_misaligned_ex = data_misaligned_ex_i; + r_pipe_freeze_trace.p_elw_start = p_elw_start_i; + r_pipe_freeze_trace.p_elw_finish = p_elw_finish_i; + r_pipe_freeze_trace.lsu_ready_ex = lsu_ready_ex_i; + r_pipe_freeze_trace.lsu_ready_wb = lsu_ready_wb_i; + + r_pipe_freeze_trace.data_req_pmp = data_req_pmp_i; + r_pipe_freeze_trace.data_gnt_pmp = data_gnt_pmp_i; + r_pipe_freeze_trace.data_rvalid = data_rvalid_i; + r_pipe_freeze_trace.data_err_pmp = data_err_pmp_i; + r_pipe_freeze_trace.data_addr_pmp = data_addr_pmp_i; + r_pipe_freeze_trace.data_we = data_we_i; + r_pipe_freeze_trace.data_atop = data_atop_i; + r_pipe_freeze_trace.data_be = data_be_i; + r_pipe_freeze_trace.data_wdata = data_wdata_i; + r_pipe_freeze_trace.data_rdata = data_rdata_i; //// APU //// - r_pipe_freeze_trace.apu_req = apu_req_i; - r_pipe_freeze_trace.apu_gnt = apu_gnt_i; - r_pipe_freeze_trace.apu_rvalid = apu_rvalid_i; + r_pipe_freeze_trace.apu_req = apu_req_i; + r_pipe_freeze_trace.apu_gnt = apu_gnt_i; + r_pipe_freeze_trace.apu_rvalid = apu_rvalid_i; // PC // - r_pipe_freeze_trace.branch_addr_n = branch_addr_n_i; + r_pipe_freeze_trace.branch_addr_n = branch_addr_n_i; // Controller FSM probes - r_pipe_freeze_trace.ctrl_fsm_cs = ctrl_fsm_cs_i; - r_pipe_freeze_trace.pc_mux = pc_mux_i; - r_pipe_freeze_trace.exc_pc_mux = exc_pc_mux_i; + r_pipe_freeze_trace.ctrl_fsm_cs = ctrl_fsm_cs_i; + r_pipe_freeze_trace.pc_mux = pc_mux_i; + r_pipe_freeze_trace.exc_pc_mux = exc_pc_mux_i; // CSR - r_pipe_freeze_trace.csr.addr = csr_addr_i; - r_pipe_freeze_trace.csr.we = csr_we_i; - r_pipe_freeze_trace.csr.wdata_int = csr_wdata_int_i; + r_pipe_freeze_trace.csr.addr = csr_addr_i; + r_pipe_freeze_trace.csr.we = csr_we_i; + r_pipe_freeze_trace.csr.wdata_int = csr_wdata_int_i; - r_pipe_freeze_trace.csr.jvt_we = csr_jvt_we_i; - r_pipe_freeze_trace.csr.mstatus_n = csr_mstatus_n_i; - r_pipe_freeze_trace.csr.mstatus_q = csr_mstatus_q_i; - r_pipe_freeze_trace.csr.mstatus_fs_n = csr_mstatus_fs_n_i; - r_pipe_freeze_trace.csr.mstatus_fs_q = csr_mstatus_fs_q_i; + r_pipe_freeze_trace.csr.jvt_we = csr_jvt_we_i; + r_pipe_freeze_trace.csr.mstatus_n = csr_mstatus_n_i; + r_pipe_freeze_trace.csr.mstatus_q = csr_mstatus_q_i; + r_pipe_freeze_trace.csr.mstatus_fs_n = csr_mstatus_fs_n_i; + r_pipe_freeze_trace.csr.mstatus_fs_q = csr_mstatus_fs_q_i; if (FPU == 1 && ZFINX == 0) begin r_pipe_freeze_trace.csr.mstatus_full_q[31] = (r_pipe_freeze_trace.csr.mstatus_fs_q == FS_DIRTY) ? 1'b1 : 1'b0; @@ -648,6 +656,10 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.fcsr_q = {24'b0, csr_fcsr_frm_q_i, csr_fcsr_fflags_q_i}; compute_csr_we(); + if (csr_fcsr_fflags_we_i) begin + r_pipe_freeze_trace.csr.fflags_we = 1'b1; + r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + end // #1; ->e_pipe_monitor_ok; From dcda529feb4381dd3b91e9280d6c33dd0f6396d1 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Thu, 18 May 2023 13:26:39 +0800 Subject: [PATCH 07/38] Correcting issue on gpr repporting when fpu instruction --- bhv/cv32e40p_rvfi.sv | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 1e60eacc8..a325e32bf 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -935,6 +935,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; event e_dev_send_wb_1, e_dev_send_wb_2; event e_dev_commit_rf_to_ex_1, e_dev_commit_rf_to_ex_2, e_dev_commit_rf_to_ex_3; event e_ex_to_wb_1, e_ex_to_wb_2; + event e_id_to_ex_1, e_id_to_ex_2; //used to match memory response to memory request and corresponding instruction integer cnt_data_req, cnt_data_resp; @@ -1129,12 +1130,12 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(ex, mip) `CSR_FROM_PIPE(ex, tdata1) tinfo_to_ex(); - `CSR_FROM_PIPE(ex, fflags) - `CSR_FROM_PIPE(ex, frm) - `CSR_FROM_PIPE(ex, fcsr) - trace_ex.m_csr.fflags_wmask = '0; - trace_ex.m_csr.frm_wmask = '0; - trace_ex.m_csr.fcsr_wmask = '0; + // `CSR_FROM_PIPE(ex, fflags) + // `CSR_FROM_PIPE(ex, frm) + // `CSR_FROM_PIPE(ex, fcsr) + // trace_ex.m_csr.fflags_wmask = '0; + // trace_ex.m_csr.frm_wmask = '0; + // trace_ex.m_csr.fcsr_wmask = '0; if (s_wb_valid_adjusted) begin if (trace_wb.m_valid) begin @@ -1193,6 +1194,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, mscratch) `CSR_FROM_PIPE(id, mie) + `CSR_FROM_PIPE(id, fflags) + `CSR_FROM_PIPE(id, frm) + `CSR_FROM_PIPE(id, fcsr) + trace_ex.m_csr.fflags_wmask = '0; + trace_ex.m_csr.frm_wmask = '0; + trace_ex.m_csr.fcsr_wmask = '0; + if (r_pipe_freeze_trace.apu_req) begin trace_id.m_is_apu = 1'b1; trace_id.m_apu_req_id = cnt_apu_req; @@ -1241,7 +1249,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end end - + ->e_id_to_ex_1; trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; @@ -1284,7 +1292,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_mem_req_id[0] = cnt_data_req; end end + end else if (r_pipe_freeze_trace.rf_we_wb && !r_pipe_freeze_trace.ex_reg_we) begin + trace_id.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; end + ->e_id_to_ex_2; trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; end From fb219d7f7c4761b128090b03a6c879cb7ca6a566 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Mon, 15 May 2023 11:59:02 +0800 Subject: [PATCH 08/38] Adding hwloop csr --- bhv/cv32e40p_rvfi.sv | 102 ++++++++++++++++++++++++++++++++++++- bhv/cv32e40p_tb_wrapper.sv | 24 +++++++++ bhv/insn_trace.sv | 15 ++++++ bhv/pipe_freeze_trace.sv | 11 ++++ 4 files changed, 151 insertions(+), 1 deletion(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index a325e32bf..a8e950413 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -77,6 +77,11 @@ module cv32e40p_rvfi input logic [5:0] csr_cause_i, input logic debug_csr_save_i, + // HWLOOP regs + input logic [ 1:0][31:0] hwlp_start_q_i, + input logic [ 1:0][31:0] hwlp_end_q_i, + input logic [ 1:0][31:0] hwlp_counter_q_i, + input logic [ 1:0][31:0] hwlp_counter_n_i, // LSU input logic lsu_en_id_i, input logic lsu_we_id_i, @@ -561,7 +566,32 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_mconfigptr_rmask, output logic [31:0] rvfi_csr_mconfigptr_wmask, output logic [31:0] rvfi_csr_mconfigptr_rdata, - output logic [31:0] rvfi_csr_mconfigptr_wdata + output logic [31:0] rvfi_csr_mconfigptr_wdata, + + output logic [31:0] rvfi_csr_lpstart0_rmask, + output logic [31:0] rvfi_csr_lpstart0_wmask, + output logic [31:0] rvfi_csr_lpstart0_rdata, + output logic [31:0] rvfi_csr_lpstart0_wdata, + output logic [31:0] rvfi_csr_lpend0_rmask, + output logic [31:0] rvfi_csr_lpend0_wmask, + output logic [31:0] rvfi_csr_lpend0_rdata, + output logic [31:0] rvfi_csr_lpend0_wdata, + output logic [31:0] rvfi_csr_lpcount0_rmask, + output logic [31:0] rvfi_csr_lpcount0_wmask, + output logic [31:0] rvfi_csr_lpcount0_rdata, + output logic [31:0] rvfi_csr_lpcount0_wdata, + output logic [31:0] rvfi_csr_lpstart1_rmask, + output logic [31:0] rvfi_csr_lpstart1_wmask, + output logic [31:0] rvfi_csr_lpstart1_rdata, + output logic [31:0] rvfi_csr_lpstart1_wdata, + output logic [31:0] rvfi_csr_lpend1_rmask, + output logic [31:0] rvfi_csr_lpend1_wmask, + output logic [31:0] rvfi_csr_lpend1_rdata, + output logic [31:0] rvfi_csr_lpend1_wdata, + output logic [31:0] rvfi_csr_lpcount1_rmask, + output logic [31:0] rvfi_csr_lpcount1_wmask, + output logic [31:0] rvfi_csr_lpcount1_rdata, + output logic [31:0] rvfi_csr_lpcount1_wdata ); @@ -791,6 +821,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `SET_RVFI_CSR_FROM_INSN(frm) `SET_RVFI_CSR_FROM_INSN(fcsr) + `SET_RVFI_CSR_FROM_INSN(lpstart0) + `SET_RVFI_CSR_FROM_INSN(lpend0) + `SET_RVFI_CSR_FROM_INSN(lpcount0) + `SET_RVFI_CSR_FROM_INSN(lpstart1) + `SET_RVFI_CSR_FROM_INSN(lpend1) + `SET_RVFI_CSR_FROM_INSN(lpcount1) + endfunction function void minstret_to_id(); @@ -868,6 +905,66 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_csr.marchid_wmask = '0; endfunction + function void lpcount1_to_id(); + trace_id.m_csr.lpcount1_we = '0; + trace_id.m_csr.lpcount1_rdata = r_pipe_freeze_trace.hwloop.counter_q[1]; + trace_id.m_csr.lpcount1_rmask = '1; + trace_id.m_csr.lpcount1_wdata = '0; + trace_id.m_csr.lpcount1_wmask = '0; + endfunction + + function void lpcount0_to_id(); + trace_id.m_csr.lpcount0_we = '0; + trace_id.m_csr.lpcount0_rdata = r_pipe_freeze_trace.hwloop.counter_q[0]; + trace_id.m_csr.lpcount0_rmask = '1; + trace_id.m_csr.lpcount0_wdata = '0; + trace_id.m_csr.lpcount0_wmask = '0; + endfunction + + function void lpend0_to_id(); + trace_id.m_csr.lpend0_we = '0; + trace_id.m_csr.lpend0_rdata = r_pipe_freeze_trace.hwloop.end_q[0]; + trace_id.m_csr.lpend0_rmask = '1; + trace_id.m_csr.lpend0_wdata = '0; + trace_id.m_csr.lpend0_wmask = '0; + endfunction + + function void lpend1_to_id(); + trace_id.m_csr.lpend1_we = '0; + trace_id.m_csr.lpend1_rdata = r_pipe_freeze_trace.hwloop.end_q[1]; + trace_id.m_csr.lpend1_rmask = '1; + trace_id.m_csr.lpend1_wdata = '0; + trace_id.m_csr.lpend1_wmask = '0; + endfunction + + function void lpstart0_to_id(); + trace_id.m_csr.lpstart0_we = '0; + trace_id.m_csr.lpstart0_rdata = r_pipe_freeze_trace.hwloop.start_q[0]; + trace_id.m_csr.lpstart0_rmask = '1; + trace_id.m_csr.lpstart0_wdata = '0; + trace_id.m_csr.lpstart0_wmask = '0; + endfunction + + function void lpstart1_to_id(); + trace_id.m_csr.lpstart1_we = '0; + trace_id.m_csr.lpstart1_rdata = r_pipe_freeze_trace.hwloop.start_q[1]; + trace_id.m_csr.lpstart1_rmask = '1; + trace_id.m_csr.lpstart1_wdata = '0; + trace_id.m_csr.lpstart1_wmask = '0; + endfunction + + function void hwloop_to_id(); + lpcount0_to_id(); + lpend0_to_id(); + lpstart0_to_id(); + + lpcount1_to_id(); + lpend1_to_id(); + lpstart1_to_id(); + + + endfunction + function void check_trap(); bit s_dbg_exception, s_exception, s_irq; s_dbg_exception = 1'b0; @@ -1250,6 +1347,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end ->e_id_to_ex_1; + hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; @@ -1297,6 +1395,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; end ->e_id_to_ex_2; + + hwloop_to_id(); trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; end diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index 3ae3c4c5f..d4747a4d7 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -215,6 +215,24 @@ module cv32e40p_tb_wrapper `endif `ifdef CV32E40P_RVFI + logic [ 1:0][31:0] hwlp_start_q; + logic [ 1:0][31:0] hwlp_end_q; + logic [ 1:0][31:0] hwlp_counter_q; + logic [ 1:0][31:0] hwlp_counter_n; + generate + if(COREV_PULP) begin + assign hwlp_start_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_start_q ; + assign hwlp_end_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_end_q ; + assign hwlp_counter_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_counter_q; + assign hwlp_counter_n = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_counter_n; + end else begin + assign hwlp_start_q = '0; + assign hwlp_end_q = '0; + assign hwlp_counter_q = '0; + assign hwlp_counter_n = '0; + end + endgenerate + cv32e40p_rvfi #( .FPU (FPU), .ZFINX(ZFINX) @@ -262,6 +280,12 @@ module cv32e40p_tb_wrapper .csr_cause_i (cv32e40p_top_i.core_i.csr_cause), .debug_csr_save_i (cv32e40p_top_i.core_i.debug_csr_save), + // HWLOOP regs + .hwlp_start_q_i (hwlp_start_q ), + .hwlp_end_q_i (hwlp_end_q ), + .hwlp_counter_q_i(hwlp_counter_q), + .hwlp_counter_n_i(hwlp_counter_n), + //// EX probes //// .ex_valid_i (cv32e40p_top_i.core_i.ex_valid), .ex_ready_i (cv32e40p_top_i.core_i.ex_ready), diff --git a/bhv/insn_trace.sv b/bhv/insn_trace.sv index 65a3dc2d6..63c57d5cf 100644 --- a/bhv/insn_trace.sv +++ b/bhv/insn_trace.sv @@ -105,6 +105,14 @@ `DEFINE_CSR(fflags) `DEFINE_CSR(frm ) `DEFINE_CSR(fcsr ) + + `DEFINE_CSR(lpstart0 ) + `DEFINE_CSR(lpend0 ) + `DEFINE_CSR(lpcount0 ) + `DEFINE_CSR(lpstart1 ) + `DEFINE_CSR(lpend1 ) + `DEFINE_CSR(lpcount1 ) + } m_csr; enum logic[2:0] { @@ -264,6 +272,13 @@ `ASSIGN_CSR(frm ) `ASSIGN_CSR(fcsr ) + `ASSIGN_CSR(lpstart0) + `ASSIGN_CSR(lpend0 ) + `ASSIGN_CSR(lpcount0) + `ASSIGN_CSR(lpstart1) + `ASSIGN_CSR(lpend1 ) + `ASSIGN_CSR(lpcount1) + endfunction function void move_down_pipe(insn_trace_t m_source); diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index dcad1677a..174416a52 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -311,6 +311,12 @@ typedef struct { logic fcsr_we; } csr; + struct { + logic [ 1:0][31:0] start_q; + logic [ 1:0][31:0] end_q; + logic [ 1:0][31:0] counter_q; + logic [ 1:0][31:0] counter_n; + } hwloop; } pipe_trace_t; pipe_trace_t r_pipe_freeze_trace; @@ -661,6 +667,11 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.fcsr_we = 1'b1; end + r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i ; + r_pipe_freeze_trace.hwloop.end_q = hwlp_end_q_i ; + r_pipe_freeze_trace.hwloop.counter_q = hwlp_counter_q_i; + r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; + // #1; ->e_pipe_monitor_ok; wait(clk_i_d == 1'b1); From a05d37f8df8c957c7c981b61834eb2434278339b Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 24 May 2023 13:19:34 +0800 Subject: [PATCH 09/38] Handling floating point csr inside rvfi for non fpu instruction Running verible running verible 2 Davide verible fix --- bhv/cv32e40p_apu_tracer.sv | 2 +- bhv/cv32e40p_rvfi.sv | 133 +++++++++++++++++----------- bhv/cv32e40p_rvfi_trace.sv | 2 +- bhv/cv32e40p_tb_wrapper.sv | 22 ++--- bhv/cv32e40p_tracer.sv | 15 ++-- bhv/pipe_freeze_trace.sv | 16 ++-- rtl/cv32e40p_aligner.sv | 4 +- rtl/cv32e40p_alu.sv | 38 ++++---- rtl/cv32e40p_alu_div.sv | 10 +-- rtl/cv32e40p_core.sv | 86 +++++++++--------- rtl/cv32e40p_ex_stage.sv | 2 +- rtl/cv32e40p_fp_wrapper.sv | 43 +++++---- rtl/cv32e40p_hwloop_regs.sv | 2 +- rtl/cv32e40p_id_stage.sv | 8 +- rtl/cv32e40p_if_stage.sv | 6 +- rtl/cv32e40p_int_controller.sv | 12 +-- rtl/cv32e40p_load_store_unit.sv | 30 +++---- rtl/cv32e40p_mult.sv | 4 +- rtl/cv32e40p_prefetch_buffer.sv | 14 +-- rtl/cv32e40p_prefetch_controller.sv | 6 +- rtl/cv32e40p_sleep_unit.sv | 4 +- rtl/cv32e40p_top.sv | 2 +- 22 files changed, 239 insertions(+), 222 deletions(-) diff --git a/bhv/cv32e40p_apu_tracer.sv b/bhv/cv32e40p_apu_tracer.sv index 6183b00d8..2c84aa329 100644 --- a/bhv/cv32e40p_apu_tracer.sv +++ b/bhv/cv32e40p_apu_tracer.sv @@ -52,7 +52,7 @@ module cv32e40p_apu_tracer ( // open/close output file for writing initial begin - wait(rst_n == 1'b1); + wait (rst_n == 1'b1); $sformat(fn, "apu_trace_core_%h.log", hart_id_i); $display("[APU_TRACER %2d] Output filename is: %s", hart_id_i, fn); apu_trace = $fopen(fn, "w"); diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index a8e950413..2c2f91640 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -76,21 +76,21 @@ module cv32e40p_rvfi input logic [5:0] csr_cause_i, - input logic debug_csr_save_i, + input logic debug_csr_save_i, // HWLOOP regs input logic [ 1:0][31:0] hwlp_start_q_i, input logic [ 1:0][31:0] hwlp_end_q_i, input logic [ 1:0][31:0] hwlp_counter_q_i, input logic [ 1:0][31:0] hwlp_counter_n_i, // LSU - input logic lsu_en_id_i, - input logic lsu_we_id_i, - input logic [ 1:0] lsu_size_id_i, + input logic lsu_en_id_i, + input logic lsu_we_id_i, + input logic [ 1:0] lsu_size_id_i, // Register reads - input logic [ 5:0] rs1_addr_id_i, - input logic [ 5:0] rs2_addr_id_i, - input logic [31:0] operand_a_fw_id_i, - input logic [31:0] operand_b_fw_id_i, + input logic [ 5:0] rs1_addr_id_i, + input logic [ 5:0] rs2_addr_id_i, + input logic [31:0] operand_a_fw_id_i, + input logic [31:0] operand_b_fw_id_i, //// EX probes //// @@ -291,7 +291,7 @@ module cv32e40p_rvfi input logic [ 7:0] csr_pmpcfg_n_i [16], input logic [ 7:0] csr_pmpcfg_q_i [16], input logic [15:0] csr_pmpcfg_we_i, - input logic [31:0] csr_pmpaddr_n_i, // PMP address input shared for all pmpaddr registers + input logic [31:0] csr_pmpaddr_n_i, // PMP address input shared for all pmpaddr registers input logic [31:0] csr_pmpaddr_q_i [16], input logic [15:0] csr_pmpaddr_we_i, input logic [31:0] csr_mseccfg_n_i, @@ -396,7 +396,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_mcountinhibit_wmask, output logic [31:0] rvfi_csr_mcountinhibit_rdata, output logic [31:0] rvfi_csr_mcountinhibit_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmevent_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmevent_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmevent_wmask, output logic [31:0][31:0] rvfi_csr_mhpmevent_rdata, output logic [31:0][31:0] rvfi_csr_mhpmevent_wdata, @@ -448,7 +448,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_tselect_wmask, output logic [31:0] rvfi_csr_tselect_rdata, output logic [31:0] rvfi_csr_tselect_wdata, - output logic [ 3:0][31:0] rvfi_csr_tdata_rmask, // 1-3 implemented + output logic [ 3:0][31:0] rvfi_csr_tdata_rmask, // 1-3 implemented output logic [ 3:0][31:0] rvfi_csr_tdata_wmask, output logic [ 3:0][31:0] rvfi_csr_tdata_rdata, output logic [ 3:0][31:0] rvfi_csr_tdata_wdata, @@ -472,7 +472,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_dpc_wmask, output logic [31:0] rvfi_csr_dpc_rdata, output logic [31:0] rvfi_csr_dpc_wdata, - output logic [ 1:0][31:0] rvfi_csr_dscratch_rmask, // 0-1 implemented + output logic [ 1:0][31:0] rvfi_csr_dscratch_rmask, // 0-1 implemented output logic [ 1:0][31:0] rvfi_csr_dscratch_wmask, output logic [ 1:0][31:0] rvfi_csr_dscratch_rdata, output logic [ 1:0][31:0] rvfi_csr_dscratch_wdata, @@ -484,7 +484,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_minstret_wmask, output logic [31:0] rvfi_csr_minstret_rdata, output logic [31:0] rvfi_csr_minstret_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmcounter_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmcounter_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmcounter_wmask, output logic [31:0][31:0] rvfi_csr_mhpmcounter_rdata, output logic [31:0][31:0] rvfi_csr_mhpmcounter_wdata, @@ -496,7 +496,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_minstreth_wmask, output logic [31:0] rvfi_csr_minstreth_rdata, output logic [31:0] rvfi_csr_minstreth_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmcounterh_wmask, output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rdata, output logic [31:0][31:0] rvfi_csr_mhpmcounterh_wdata, @@ -508,7 +508,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_instret_wmask, output logic [31:0] rvfi_csr_instret_rdata, output logic [31:0] rvfi_csr_instret_wdata, - output logic [31:0][31:0] rvfi_csr_hpmcounter_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_hpmcounter_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_hpmcounter_wmask, output logic [31:0][31:0] rvfi_csr_hpmcounter_rdata, output logic [31:0][31:0] rvfi_csr_hpmcounter_wdata, @@ -520,7 +520,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_instreth_wmask, output logic [31:0] rvfi_csr_instreth_rdata, output logic [31:0] rvfi_csr_instreth_wdata, - output logic [31:0][31:0] rvfi_csr_hpmcounterh_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_hpmcounterh_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_hpmcounterh_wmask, output logic [31:0][31:0] rvfi_csr_hpmcounterh_rdata, output logic [31:0][31:0] rvfi_csr_hpmcounterh_wdata, @@ -686,9 +686,31 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_csr_``CSR_NAME``_wdata = new_rvfi_trace.m_csr.``CSR_NAME``_wdata; \ rvfi_csr_``CSR_NAME``_wmask = new_rvfi_trace.m_csr.``CSR_NAME``_wmask; + logic [31:0] s_fflags_mirror; + logic [31:0] s_frm_mirror; + logic [31:0] s_fcsr_mirror; function void set_rvfi(); insn_trace_t new_rvfi_trace; - new_rvfi_trace = rvfi_trace_q.pop_front(); + new_rvfi_trace = rvfi_trace_q.pop_front(); + + if (new_rvfi_trace.m_is_apu) begin + if (new_rvfi_trace.m_csr.fflags_we) begin + s_fflags_mirror = new_rvfi_trace.m_csr.fflags_wdata; + end + if (new_rvfi_trace.m_csr.frm_we) begin + s_frm_mirror = new_rvfi_trace.m_csr.frm_wdata; + end + if (new_rvfi_trace.m_csr.fcsr_we) begin + s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; + end + + end else begin + new_rvfi_trace.m_csr.fflags_rdata = s_fflags_mirror; + new_rvfi_trace.m_csr.frm_rdata = s_frm_mirror; + new_rvfi_trace.m_csr.fcsr_rdata = s_fcsr_mirror; + end + + rvfi_order = new_rvfi_trace.m_order; rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; rvfi_insn = new_rvfi_trace.m_insn; @@ -906,51 +928,51 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction function void lpcount1_to_id(); - trace_id.m_csr.lpcount1_we = '0; - trace_id.m_csr.lpcount1_rdata = r_pipe_freeze_trace.hwloop.counter_q[1]; - trace_id.m_csr.lpcount1_rmask = '1; - trace_id.m_csr.lpcount1_wdata = '0; - trace_id.m_csr.lpcount1_wmask = '0; + trace_id.m_csr.lpcount1_we = '0; + trace_id.m_csr.lpcount1_rdata = r_pipe_freeze_trace.hwloop.counter_q[1]; + trace_id.m_csr.lpcount1_rmask = '1; + trace_id.m_csr.lpcount1_wdata = '0; + trace_id.m_csr.lpcount1_wmask = '0; endfunction function void lpcount0_to_id(); - trace_id.m_csr.lpcount0_we = '0; - trace_id.m_csr.lpcount0_rdata = r_pipe_freeze_trace.hwloop.counter_q[0]; - trace_id.m_csr.lpcount0_rmask = '1; - trace_id.m_csr.lpcount0_wdata = '0; - trace_id.m_csr.lpcount0_wmask = '0; + trace_id.m_csr.lpcount0_we = '0; + trace_id.m_csr.lpcount0_rdata = r_pipe_freeze_trace.hwloop.counter_q[0]; + trace_id.m_csr.lpcount0_rmask = '1; + trace_id.m_csr.lpcount0_wdata = '0; + trace_id.m_csr.lpcount0_wmask = '0; endfunction function void lpend0_to_id(); - trace_id.m_csr.lpend0_we = '0; - trace_id.m_csr.lpend0_rdata = r_pipe_freeze_trace.hwloop.end_q[0]; - trace_id.m_csr.lpend0_rmask = '1; - trace_id.m_csr.lpend0_wdata = '0; - trace_id.m_csr.lpend0_wmask = '0; + trace_id.m_csr.lpend0_we = '0; + trace_id.m_csr.lpend0_rdata = r_pipe_freeze_trace.hwloop.end_q[0]; + trace_id.m_csr.lpend0_rmask = '1; + trace_id.m_csr.lpend0_wdata = '0; + trace_id.m_csr.lpend0_wmask = '0; endfunction function void lpend1_to_id(); - trace_id.m_csr.lpend1_we = '0; - trace_id.m_csr.lpend1_rdata = r_pipe_freeze_trace.hwloop.end_q[1]; - trace_id.m_csr.lpend1_rmask = '1; - trace_id.m_csr.lpend1_wdata = '0; - trace_id.m_csr.lpend1_wmask = '0; + trace_id.m_csr.lpend1_we = '0; + trace_id.m_csr.lpend1_rdata = r_pipe_freeze_trace.hwloop.end_q[1]; + trace_id.m_csr.lpend1_rmask = '1; + trace_id.m_csr.lpend1_wdata = '0; + trace_id.m_csr.lpend1_wmask = '0; endfunction function void lpstart0_to_id(); - trace_id.m_csr.lpstart0_we = '0; - trace_id.m_csr.lpstart0_rdata = r_pipe_freeze_trace.hwloop.start_q[0]; - trace_id.m_csr.lpstart0_rmask = '1; - trace_id.m_csr.lpstart0_wdata = '0; - trace_id.m_csr.lpstart0_wmask = '0; + trace_id.m_csr.lpstart0_we = '0; + trace_id.m_csr.lpstart0_rdata = r_pipe_freeze_trace.hwloop.start_q[0]; + trace_id.m_csr.lpstart0_rmask = '1; + trace_id.m_csr.lpstart0_wdata = '0; + trace_id.m_csr.lpstart0_wmask = '0; endfunction function void lpstart1_to_id(); - trace_id.m_csr.lpstart1_we = '0; - trace_id.m_csr.lpstart1_rdata = r_pipe_freeze_trace.hwloop.start_q[1]; - trace_id.m_csr.lpstart1_rmask = '1; - trace_id.m_csr.lpstart1_wdata = '0; - trace_id.m_csr.lpstart1_wmask = '0; + trace_id.m_csr.lpstart1_we = '0; + trace_id.m_csr.lpstart1_rdata = r_pipe_freeze_trace.hwloop.start_q[1]; + trace_id.m_csr.lpstart1_rmask = '1; + trace_id.m_csr.lpstart1_wdata = '0; + trace_id.m_csr.lpstart1_wmask = '0; endfunction function void hwloop_to_id(); @@ -1061,6 +1083,15 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction + function void fcsr_to_wb(); + `CSR_FROM_PIPE(wb, fflags) + `CSR_FROM_PIPE(wb, frm) + `CSR_FROM_PIPE(wb, fcsr) + trace_wb.m_csr.fflags_wmask = '0; + trace_wb.m_csr.frm_wmask = '0; + trace_wb.m_csr.fcsr_wmask = '0; + endfunction + bit s_apu_to_alu_port; bit s_apu_to_lsu_port; @@ -1115,7 +1146,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; $display("*****Starting pipeline computing*****\n"); forever begin - wait(e_pipe_monitor_ok.triggered); + wait (e_pipe_monitor_ok.triggered); #1; check_trap(); @@ -1498,16 +1529,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_mode = 2'b11; //priv_lvl_i; //TODO: correct this if needed $display("*****Starting update rvfi task*****\n"); - wait(clk_i_d == 1'b1); + wait (clk_i_d == 1'b1); forever begin - wait(clk_i_d == 1'b1); + wait (clk_i_d == 1'b1); if (rvfi_trace_q.size() != 0) begin set_rvfi(); rvfi_valid = 1'b1; end else begin rvfi_valid = 1'b0; end - wait(clk_i_d == 1'b0); + wait (clk_i_d == 1'b0); end endtask diff --git a/bhv/cv32e40p_rvfi_trace.sv b/bhv/cv32e40p_rvfi_trace.sv index 6ed4b773e..6bd798577 100644 --- a/bhv/cv32e40p_rvfi_trace.sv +++ b/bhv/cv32e40p_rvfi_trace.sv @@ -181,7 +181,7 @@ instr_trace_t trace_retire; end initial begin - wait(rst_n == 1'b1); + wait (rst_n == 1'b1); $sformat(fn, "trace_core.log"); $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index d4747a4d7..a77db5614 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -86,7 +86,7 @@ module cv32e40p_tb_wrapper input logic [31:0] data_rdata_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -215,22 +215,22 @@ module cv32e40p_tb_wrapper `endif `ifdef CV32E40P_RVFI - logic [ 1:0][31:0] hwlp_start_q; - logic [ 1:0][31:0] hwlp_end_q; - logic [ 1:0][31:0] hwlp_counter_q; - logic [ 1:0][31:0] hwlp_counter_n; + logic [1:0][31:0] hwlp_start_q; + logic [1:0][31:0] hwlp_end_q; + logic [1:0][31:0] hwlp_counter_q; + logic [1:0][31:0] hwlp_counter_n; generate - if(COREV_PULP) begin + if (COREV_PULP) begin assign hwlp_start_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_start_q ; - assign hwlp_end_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_end_q ; + assign hwlp_end_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_end_q; assign hwlp_counter_q = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_counter_q; assign hwlp_counter_n = cv32e40p_top_i.core_i.id_stage_i.gen_hwloop_regs.hwloop_regs_i.hwlp_counter_n; - end else begin + end else begin assign hwlp_start_q = '0; assign hwlp_end_q = '0; assign hwlp_counter_q = '0; assign hwlp_counter_n = '0; - end + end endgenerate cv32e40p_rvfi #( @@ -281,8 +281,8 @@ module cv32e40p_tb_wrapper .debug_csr_save_i (cv32e40p_top_i.core_i.debug_csr_save), // HWLOOP regs - .hwlp_start_q_i (hwlp_start_q ), - .hwlp_end_q_i (hwlp_end_q ), + .hwlp_start_q_i (hwlp_start_q), + .hwlp_end_q_i (hwlp_end_q), .hwlp_counter_q_i(hwlp_counter_q), .hwlp_counter_n_i(hwlp_counter_n), diff --git a/bhv/cv32e40p_tracer.sv b/bhv/cv32e40p_tracer.sv index 8208f2e61..5a31c24d8 100644 --- a/bhv/cv32e40p_tracer.sv +++ b/bhv/cv32e40p_tracer.sv @@ -178,7 +178,7 @@ module cv32e40p_tracer end initial begin - wait(rst_n == 1'b1); + wait (rst_n == 1'b1); $sformat(fn, "trace_core_%h.log", hart_id_i); $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); @@ -201,7 +201,7 @@ module cv32e40p_tracer always @(trace_wb) trace_wb_is_delay_instr = (trace_wb != null && is_wb_delay_instr( - trace_wb + trace_wb )) ? 1 : 0; assign rd = {rd_is_fp, instr[11:07]}; @@ -214,9 +214,8 @@ module cv32e40p_tracer foreach (trace.regs_write[i]) if (trace.regs_write[i].addr == reg_addr) begin trace.regs_write[i].value = wdata; - `uvm_info(info_tag, $sformatf( - "Write mapped %0d, %0d:0x%08x pc:0x%08x", i, reg_addr, wdata, trace.pc), - UVM_DEBUG) + `uvm_info(info_tag, $sformatf("Write mapped %0d, %0d:0x%08x pc:0x%08x", i, reg_addr, wdata, + trace.pc), UVM_DEBUG) end else begin `uvm_info(info_tag, $sformatf( "Unmapped write to %0d:0x%08x, expected write to %0d", @@ -252,11 +251,11 @@ module cv32e40p_tracer // Funnel all handoffs to the ISS here, note that this must be automatic // as multiple retire events may occur at a time (wb_bypass) always begin - wait(trace_q.size() != 0); + wait (trace_q.size() != 0); trace_retire = trace_q.pop_front(); - wait(trace_retire.retire != 0); + wait (trace_retire.retire != 0); - if (trace_retire.ebreak) wait(debug_mode == 1); + if (trace_retire.ebreak) wait (debug_mode == 1); // Write signals and data structures used by step-and-compare insn_regs_write = trace_retire.regs_write; diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index 174416a52..e7669f545 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -312,10 +312,10 @@ typedef struct { } csr; struct { - logic [ 1:0][31:0] start_q; - logic [ 1:0][31:0] end_q; - logic [ 1:0][31:0] counter_q; - logic [ 1:0][31:0] counter_n; + logic [1:0][31:0] start_q; + logic [1:0][31:0] end_q; + logic [1:0][31:0] counter_q; + logic [1:0][31:0] counter_n; } hwloop; } pipe_trace_t; @@ -359,7 +359,7 @@ endfunction task monitor_pipeline(); $display("*****Starting pipeline monitoring*****\n"); forever begin - wait(clk_i_d == 1'b0 & rst_ni == 1'b1); + wait (clk_i_d == 1'b0 & rst_ni == 1'b1); // r_pipe_freeze_trace. <= ; r_pipe_freeze_trace.instr_req = instr_req_i; @@ -667,13 +667,13 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.fcsr_we = 1'b1; end - r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i ; - r_pipe_freeze_trace.hwloop.end_q = hwlp_end_q_i ; + r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i; + r_pipe_freeze_trace.hwloop.end_q = hwlp_end_q_i; r_pipe_freeze_trace.hwloop.counter_q = hwlp_counter_q_i; r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; // #1; ->e_pipe_monitor_ok; - wait(clk_i_d == 1'b1); + wait (clk_i_d == 1'b1); end endtask diff --git a/rtl/cv32e40p_aligner.sv b/rtl/cv32e40p_aligner.sv index a61f7e928..9bb341e06 100644 --- a/rtl/cv32e40p_aligner.sv +++ b/rtl/cv32e40p_aligner.sv @@ -25,7 +25,7 @@ module cv32e40p_aligner ( input logic rst_n, input logic fetch_valid_i, - output logic aligner_ready_o, //prevents overwriting the fethced instruction + output logic aligner_ready_o, //prevents overwriting the fethced instruction input logic if_valid_i, @@ -34,7 +34,7 @@ module cv32e40p_aligner ( output logic instr_valid_o, input logic [31:0] branch_addr_i, - input logic branch_i, // Asserted if we are branching/jumping now + input logic branch_i, // Asserted if we are branching/jumping now input logic [31:0] hwlp_addr_i, input logic hwlp_update_pc_i, diff --git a/rtl/cv32e40p_alu.sv b/rtl/cv32e40p_alu.sv index aa900a787..e7be80cb3 100644 --- a/rtl/cv32e40p_alu.sv +++ b/rtl/cv32e40p_alu.sv @@ -263,36 +263,28 @@ module cv32e40p_alu // right shifts, we let the synthesizer optimize this logic [63:0] shift_op_a_32; - assign shift_op_a_32 = (operator_i == ALU_ROR) ? { - shift_op_a, shift_op_a - } : $signed( - {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} - ); + assign shift_op_a_32 = (operator_i == ALU_ROR) ? {shift_op_a, shift_op_a} : $signed( + {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} + ); always_comb begin case (vector_mode_i) VEC_MODE16: begin - shift_right_result[31:16] = $signed( - {shift_arithmetic & shift_op_a[31], shift_op_a[31:16]} - ) >>> shift_amt_int[19:16]; - shift_right_result[15:0] = $signed( - {shift_arithmetic & shift_op_a[15], shift_op_a[15:0]} - ) >>> shift_amt_int[3:0]; + shift_right_result[31:16] = $signed({shift_arithmetic & shift_op_a[31], + shift_op_a[31:16]}) >>> shift_amt_int[19:16]; + shift_right_result[15:0] = + $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:0]}) >>> shift_amt_int[3:0]; end VEC_MODE8: begin - shift_right_result[31:24] = $signed( - {shift_arithmetic & shift_op_a[31], shift_op_a[31:24]} - ) >>> shift_amt_int[26:24]; - shift_right_result[23:16] = $signed( - {shift_arithmetic & shift_op_a[23], shift_op_a[23:16]} - ) >>> shift_amt_int[18:16]; - shift_right_result[15:8] = $signed( - {shift_arithmetic & shift_op_a[15], shift_op_a[15:8]} - ) >>> shift_amt_int[10:8]; - shift_right_result[7:0] = $signed( - {shift_arithmetic & shift_op_a[7], shift_op_a[7:0]} - ) >>> shift_amt_int[2:0]; + shift_right_result[31:24] = $signed({shift_arithmetic & shift_op_a[31], + shift_op_a[31:24]}) >>> shift_amt_int[26:24]; + shift_right_result[23:16] = $signed({shift_arithmetic & shift_op_a[23], + shift_op_a[23:16]}) >>> shift_amt_int[18:16]; + shift_right_result[15:8] = + $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:8]}) >>> shift_amt_int[10:8]; + shift_right_result[7:0] = $signed({shift_arithmetic & shift_op_a[7], shift_op_a[7:0]}) >>> + shift_amt_int[2:0]; end default: // VEC_MODE32 diff --git a/rtl/cv32e40p_alu_div.sv b/rtl/cv32e40p_alu_div.sv index a7449e01c..446f446fd 100644 --- a/rtl/cv32e40p_alu_div.sv +++ b/rtl/cv32e40p_alu_div.sv @@ -35,8 +35,8 @@ module cv32e40p_alu_div #( input logic [C_LOG_WIDTH-1:0] OpBShift_DI, input logic OpBIsZero_SI, // - input logic OpBSign_SI, // gate this to 0 in case of unsigned ops - input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem + input logic OpBSign_SI, // gate this to 0 in case of unsigned ops + input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem // handshake input logic InVld_SI, // output IF @@ -186,9 +186,9 @@ module cv32e40p_alu_div #( assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP; assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP; - assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? { - ABComp_S, ResReg_DP[$high(ResReg_DP):1] - } : ResReg_DP; + assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? {ABComp_S, ResReg_DP[$high( + ResReg_DP + ):1]} : ResReg_DP; always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs if (~Rst_RBI) begin diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index 25179aea8..4a5c32774 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -84,7 +84,7 @@ module cv32e40p_core input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -379,9 +379,9 @@ module cv32e40p_core .COREV_CLUSTER(COREV_CLUSTER) ) sleep_unit_i ( // Clock, reset interface - .clk_ungated_i(clk_i), // Ungated clock + .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), - .clk_gated_o (clk), // Gated clock + .clk_gated_o (clk), // Gated clock .scan_cg_en_i (scan_cg_en_i), // Core sleep @@ -447,8 +447,8 @@ module cv32e40p_core .instr_gnt_i (instr_gnt_pmp), .instr_rvalid_i (instr_rvalid_i), .instr_rdata_i (instr_rdata_i), - .instr_err_i (1'b0), // Bus error (not used yet) - .instr_err_pmp_i(instr_err_pmp), // PMP error + .instr_err_i (1'b0), // Bus error (not used yet) + .instr_err_pmp_i(instr_err_pmp), // PMP error // outputs to ID stage .instr_valid_id_o (instr_valid_id), @@ -464,7 +464,7 @@ module cv32e40p_core .depc_i(depc), // debug return address - .pc_mux_i (pc_mux_id), // sel for pc multiplexer + .pc_mux_i (pc_mux_id), // sel for pc multiplexer .exc_pc_mux_i(exc_pc_mux_id), @@ -523,7 +523,7 @@ module cv32e40p_core .APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU), .DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN) ) id_stage_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), @@ -592,14 +592,14 @@ module cv32e40p_core .regfile_alu_waddr_ex_o(regfile_alu_waddr_ex), // MUL - .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage - .mult_en_ex_o (mult_en_ex), // from ID to EX stage + .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage + .mult_en_ex_o (mult_en_ex), // from ID to EX stage .mult_sel_subword_ex_o(mult_sel_subword_ex), // from ID to EX stage .mult_signed_mode_ex_o(mult_signed_mode_ex), // from ID to EX stage - .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage - .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage - .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage - .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage + .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage + .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage + .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage + .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage .mult_dot_op_a_ex_o (mult_dot_op_a_ex), // from ID to EX stage .mult_dot_op_b_ex_o (mult_dot_op_b_ex), // from ID to EX stage @@ -636,9 +636,9 @@ module cv32e40p_core .current_priv_lvl_i (current_priv_lvl), .csr_irq_sec_o (csr_irq_sec), .csr_cause_o (csr_cause), - .csr_save_if_o (csr_save_if), // control signal to save pc - .csr_save_id_o (csr_save_id), // control signal to save pc - .csr_save_ex_o (csr_save_ex), // control signal to save pc + .csr_save_if_o (csr_save_if), // control signal to save pc + .csr_save_id_o (csr_save_id), // control signal to save pc + .csr_save_ex_o (csr_save_ex), // control signal to save pc .csr_restore_mret_id_o(csr_restore_mret_id), // control signal to restore pc .csr_restore_uret_id_o(csr_restore_uret_id), // control signal to restore pc @@ -655,11 +655,11 @@ module cv32e40p_core .hwlp_target_o(hwlp_target), // LSU - .data_req_ex_o (data_req_ex), // to load store unit - .data_we_ex_o (data_we_ex), // to load store unit + .data_req_ex_o (data_req_ex), // to load store unit + .data_we_ex_o (data_we_ex), // to load store unit .atop_ex_o (data_atop_ex), - .data_type_ex_o (data_type_ex), // to load store unit - .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit + .data_type_ex_o (data_type_ex), // to load store unit + .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit .data_reg_offset_ex_o(data_reg_offset_ex), // to load store unit .data_load_event_ex_o(data_load_event_ex), // to load store unit @@ -748,34 +748,34 @@ module cv32e40p_core // Alu signals from ID stage .alu_en_i (alu_en_ex), - .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers + .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers .alu_operand_a_i (alu_operand_a_ex), // from ID/EX pipe registers .alu_operand_b_i (alu_operand_b_ex), // from ID/EX pipe registers .alu_operand_c_i (alu_operand_c_ex), // from ID/EX pipe registers - .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers - .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers - .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers - .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers - .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers + .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers + .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers + .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers + .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers + .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers .alu_is_subrot_i (alu_is_subrot_ex), // from ID/Ex pipe registers - .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers + .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers // Multipler - .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers - .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers - .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers - .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers - .mult_en_i (mult_en_ex), // from ID/EX pipe registers + .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers + .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers + .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers + .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers + .mult_en_i (mult_en_ex), // from ID/EX pipe registers .mult_sel_subword_i(mult_sel_subword_ex), // from ID/EX pipe registers .mult_signed_mode_i(mult_signed_mode_ex), // from ID/EX pipe registers - .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers - .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers - .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers - .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers - .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers - .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers - .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers - .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers + .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers + .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers + .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers + .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers + .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers + .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers + .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers + .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers .mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers @@ -873,8 +873,8 @@ module cv32e40p_core .data_req_o (data_req_pmp), .data_gnt_i (data_gnt_pmp), .data_rvalid_i (data_rvalid_i), - .data_err_i (1'b0), // Bus error (not used yet) - .data_err_pmp_i(data_err_pmp), // PMP error + .data_err_i (1'b0), // Bus error (not used yet) + .data_err_pmp_i(data_err_pmp), // PMP error .data_addr_o (data_addr_pmp), .data_we_o (data_we_o), @@ -890,7 +890,7 @@ module cv32e40p_core .data_wdata_ex_i (alu_operand_c_ex), .data_reg_offset_ex_i(data_reg_offset_ex), .data_load_event_ex_i(data_load_event_ex), - .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension + .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension .data_rdata_ex_o (lsu_rdata), .data_req_ex_i (data_req_ex), diff --git a/rtl/cv32e40p_ex_stage.sv b/rtl/cv32e40p_ex_stage.sv index 6b58a8425..4b6e30578 100644 --- a/rtl/cv32e40p_ex_stage.sv +++ b/rtl/cv32e40p_ex_stage.sv @@ -149,7 +149,7 @@ module cv32e40p_ex_stage output logic ex_ready_o, // EX stage ready for new data output logic ex_valid_o, // EX stage gets new data - input logic wb_ready_i // WB stage ready for new data + input logic wb_ready_i // WB stage ready for new data ); logic [31:0] alu_result; diff --git a/rtl/cv32e40p_fp_wrapper.sv b/rtl/cv32e40p_fp_wrapper.sv index c385a2900..16b39c10c 100644 --- a/rtl/cv32e40p_fp_wrapper.sv +++ b/rtl/cv32e40p_fp_wrapper.sv @@ -63,32 +63,29 @@ module cv32e40p_fp_wrapper // ----------- // Features (enabled formats, vectors etc.) localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ - Width: C_FLEN, - EnableVectors: C_XFVEC, - EnableNanBox: 1'b0, - FpFmtMask: { - C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT - }, IntFmtMask: { - C_XFVEC && C_XF8, C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0 - }}; + Width: C_FLEN, + EnableVectors: C_XFVEC, + EnableNanBox: 1'b0, + FpFmtMask: {C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT}, + IntFmtMask: {C_XFVEC && C_XF8, C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0} + }; // Implementation (number of registers etc) localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ - PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt - '{ - FPU_ADDMUL_LAT, C_LAT_FP64, C_LAT_FP16, C_LAT_FP8, C_LAT_FP16ALT - }, // ADDMUL - '{default: C_LAT_DIVSQRT}, // DIVSQRT - '{default: FPU_OTHERS_LAT}, // NONCOMP - '{default: FPU_OTHERS_LAT} - }, // CONV - UnitTypes: '{ - '{default: fpnew_pkg::MERGED}, // ADDMUL - '{default: fpnew_pkg::MERGED}, // DIVSQRT - '{default: fpnew_pkg::PARALLEL}, // NONCOMP - '{default: fpnew_pkg::MERGED} - }, // CONV - PipeConfig: fpnew_pkg::AFTER}; + PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt + '{FPU_ADDMUL_LAT, C_LAT_FP64, C_LAT_FP16, C_LAT_FP8, C_LAT_FP16ALT}, // ADDMUL + '{default: C_LAT_DIVSQRT}, // DIVSQRT + '{default: FPU_OTHERS_LAT}, // NONCOMP + '{default: FPU_OTHERS_LAT} + }, // CONV + UnitTypes: '{ + '{default: fpnew_pkg::MERGED}, // ADDMUL + '{default: fpnew_pkg::MERGED}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED} + }, // CONV + PipeConfig: fpnew_pkg::AFTER + }; //--------------- // FPU instance diff --git a/rtl/cv32e40p_hwloop_regs.sv b/rtl/cv32e40p_hwloop_regs.sv index fc835fbe4..7f6260f0d 100644 --- a/rtl/cv32e40p_hwloop_regs.sv +++ b/rtl/cv32e40p_hwloop_regs.sv @@ -34,7 +34,7 @@ module cv32e40p_hwloop_regs #( input logic [ 31:0] hwlp_end_data_i, input logic [ 31:0] hwlp_cnt_data_i, input logic [ 2:0] hwlp_we_i, - input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set + input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set // from controller input logic valid_i, diff --git a/rtl/cv32e40p_id_stage.sv b/rtl/cv32e40p_id_stage.sv index 0f54e31eb..74e8446d5 100644 --- a/rtl/cv32e40p_id_stage.sv +++ b/rtl/cv32e40p_id_stage.sv @@ -61,7 +61,7 @@ module cv32e40p_id_stage // Interface to IF stage input logic instr_valid_i, - input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage + input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage output logic instr_req_o, input logic is_compressed_i, input logic illegal_c_insn_i, @@ -196,8 +196,8 @@ module cv32e40p_id_stage // Interrupt signals input logic [31:0] irq_i, input logic irq_sec_i, - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR input logic m_irq_enable_i, input logic u_irq_enable_i, output logic irq_ack_o, @@ -1086,7 +1086,7 @@ module cv32e40p_id_stage .COREV_CLUSTER(COREV_CLUSTER), .COREV_PULP (COREV_PULP) ) controller_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_ungated_i), // Ungated clock .rst_n (rst_n), diff --git a/rtl/cv32e40p_if_stage.sv b/rtl/cv32e40p_if_stage.sv index 23fd14f5f..ede05613b 100644 --- a/rtl/cv32e40p_if_stage.sv +++ b/rtl/cv32e40p_if_stage.sv @@ -80,7 +80,7 @@ module cv32e40p_if_stage #( input logic [4:0] m_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines input logic [4:0] u_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines - output logic csr_mtvec_init_o, // tell CS regfile to init mtvec + output logic csr_mtvec_init_o, // tell CS regfile to init mtvec // jump and branch target and decision input logic [31:0] jump_target_id_i, // jump target address @@ -95,7 +95,7 @@ module cv32e40p_if_stage #( input logic id_ready_i, // misc signals - output logic if_busy_o, // is the IF stage busy fetching instructions? + output logic if_busy_o, // is the IF stage busy fetching instructions? output logic perf_imiss_o // Instruction Fetch Miss ); @@ -200,7 +200,7 @@ module cv32e40p_if_stage #( .instr_addr_o (instr_addr_o), .instr_gnt_i (instr_gnt_i), .instr_rvalid_i (instr_rvalid_i), - .instr_err_i (instr_err_i), // Not supported (yet) + .instr_err_i (instr_err_i), // Not supported (yet) .instr_err_pmp_i(instr_err_pmp_i), // Not supported (yet) .instr_rdata_i (instr_rdata_i), diff --git a/rtl/cv32e40p_int_controller.sv b/rtl/cv32e40p_int_controller.sv index af8489e0f..b2044c7b6 100644 --- a/rtl/cv32e40p_int_controller.sv +++ b/rtl/cv32e40p_int_controller.sv @@ -30,8 +30,8 @@ module cv32e40p_int_controller input logic rst_n, // External interrupt lines - input logic [31:0] irq_i, // Level-triggered interrupt inputs - input logic irq_sec_i, // Interrupt secure bit from EU + input logic [31:0] irq_i, // Level-triggered interrupt inputs + input logic irq_sec_i, // Interrupt secure bit from EU // To cv32e40p_controller output logic irq_req_ctrl_o, @@ -40,10 +40,10 @@ module cv32e40p_int_controller output logic irq_wu_ctrl_o, // To/from cv32e40p_cs_registers - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR - input logic m_ie_i, // Interrupt enable bit from CSR (M mode) - input logic u_ie_i, // Interrupt enable bit from CSR (U mode) + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR + input logic m_ie_i, // Interrupt enable bit from CSR (M mode) + input logic u_ie_i, // Interrupt enable bit from CSR (U mode) input PrivLvl_t current_priv_lvl_i ); diff --git a/rtl/cv32e40p_load_store_unit.sv b/rtl/cv32e40p_load_store_unit.sv index 024d43699..c9773ff37 100644 --- a/rtl/cv32e40p_load_store_unit.sv +++ b/rtl/cv32e40p_load_store_unit.sv @@ -43,27 +43,27 @@ module cv32e40p_load_store_unit #( input logic [31:0] data_rdata_i, // signals from ex stage - input logic data_we_ex_i, // write enable -> from ex stage - input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage - input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage + input logic data_we_ex_i, // write enable -> from ex stage + input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage + input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage input logic [ 1:0] data_reg_offset_ex_i, // offset inside register for stores -> from ex stage input logic data_load_event_ex_i, // load event -> from ex stage - input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage + input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage - output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage - input logic data_req_ex_i, // data request -> from ex stage - input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage - input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage - input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage + output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage + input logic data_req_ex_i, // data request -> from ex stage + input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage + input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage + input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage - input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline + input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline output logic data_misaligned_o, // misaligned access was detected -> to controller - input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage + input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage output logic [5:0] data_atop_o, // atomic instruction signal -> core output output logic p_elw_start_o, // load event starts - output logic p_elw_finish_o, // load event finishes + output logic p_elw_finish_o, // load event finishes // stall signal output logic lsu_ready_ex_o, // LSU ready for new data in EX stage @@ -468,7 +468,7 @@ module cv32e40p_load_store_unit #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (data_req_o), .obi_gnt_i (data_gnt_i), @@ -476,10 +476,10 @@ module cv32e40p_load_store_unit #( .obi_we_o (data_we_o), .obi_be_o (data_be_o), .obi_wdata_o (data_wdata_o), - .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec + .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec .obi_rdata_i (data_rdata_i), .obi_rvalid_i(data_rvalid_i), - .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) + .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) ); diff --git a/rtl/cv32e40p_mult.sv b/rtl/cv32e40p_mult.sv index ea0da1937..804f24e1c 100644 --- a/rtl/cv32e40p_mult.sv +++ b/rtl/cv32e40p_mult.sv @@ -370,9 +370,7 @@ module cv32e40p_mult |-> (result_o == (($signed( {{32{op_a_i[31]}}, op_a_i} - ) * { - 32'b0, op_b_i - }) >> 32))); + ) * {32'b0, op_b_i}) >> 32))); // check multiplication result for mulhu assert property ( diff --git a/rtl/cv32e40p_prefetch_buffer.sv b/rtl/cv32e40p_prefetch_buffer.sv index 596bdfe94..80a5ecb88 100644 --- a/rtl/cv32e40p_prefetch_buffer.sv +++ b/rtl/cv32e40p_prefetch_buffer.sv @@ -48,8 +48,8 @@ module cv32e40p_prefetch_buffer #( output logic [31:0] instr_addr_o, input logic [31:0] instr_rdata_i, input logic instr_rvalid_i, - input logic instr_err_i, // Not used yet (future addition) - input logic instr_err_pmp_i, // Not used yet (future addition) + input logic instr_err_i, // Not used yet (future addition) + input logic instr_err_pmp_i, // Not used yet (future addition) // Prefetch Buffer Status output logic busy_o @@ -167,15 +167,15 @@ module cv32e40p_prefetch_buffer #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (instr_req_o), .obi_gnt_i (instr_gnt_i), .obi_addr_o (instr_addr_o), - .obi_we_o (), // Left unconnected on purpose - .obi_be_o (), // Left unconnected on purpose - .obi_wdata_o (), // Left unconnected on purpose - .obi_atop_o (), // Left unconnected on purpose + .obi_we_o (), // Left unconnected on purpose + .obi_be_o (), // Left unconnected on purpose + .obi_wdata_o (), // Left unconnected on purpose + .obi_atop_o (), // Left unconnected on purpose .obi_rdata_i (instr_rdata_i), .obi_rvalid_i(instr_rvalid_i), .obi_err_i (instr_err_i) diff --git a/rtl/cv32e40p_prefetch_controller.sv b/rtl/cv32e40p_prefetch_controller.sv index d00406e54..2f7bea754 100644 --- a/rtl/cv32e40p_prefetch_controller.sv +++ b/rtl/cv32e40p_prefetch_controller.sv @@ -47,10 +47,10 @@ module cv32e40p_prefetch_controller #( input logic rst_n, // Fetch stage interface - input logic req_i, // Fetch stage requests instructions - input logic branch_i, // Taken branch + input logic req_i, // Fetch stage requests instructions + input logic branch_i, // Taken branch input logic [31:0] branch_addr_i, // Taken branch address (only valid when branch_i = 1) - output logic busy_o, // Prefetcher busy + output logic busy_o, // Prefetcher busy // HW loop signals input logic hwlp_jump_i, diff --git a/rtl/cv32e40p_sleep_unit.sv b/rtl/cv32e40p_sleep_unit.sv index 03e519487..39c5537f2 100644 --- a/rtl/cv32e40p_sleep_unit.sv +++ b/rtl/cv32e40p_sleep_unit.sv @@ -59,8 +59,8 @@ module cv32e40p_sleep_unit #( // Clock, reset interface input logic clk_ungated_i, // Free running clock input logic rst_n, - output logic clk_gated_o, // Gated clock - input logic scan_cg_en_i, // Enable all clock gates for testing + output logic clk_gated_o, // Gated clock + input logic scan_cg_en_i, // Enable all clock gates for testing // Core sleep output logic core_sleep_o, diff --git a/rtl/cv32e40p_top.sv b/rtl/cv32e40p_top.sv index 7ddd2d5a2..ef656da97 100644 --- a/rtl/cv32e40p_top.sv +++ b/rtl/cv32e40p_top.sv @@ -52,7 +52,7 @@ module cv32e40p_top #( input logic [31:0] data_rdata_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, From d2c72860b5c95e1ef81c772c22c07d5d5c1303f9 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Mon, 29 May 2023 16:47:11 +0800 Subject: [PATCH 10/38] Enabling fflags frm and fcsr write by non apu insn running verible --- bhv/cv32e40p_rvfi.sv | 67 +++++++++++++++++++++----- bhv/insn_trace.sv | 101 +++++++++++++++++++++------------------ bhv/pipe_freeze_trace.sv | 9 ++-- 3 files changed, 114 insertions(+), 63 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 2c2f91640..3f4d501d4 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -291,7 +291,7 @@ module cv32e40p_rvfi input logic [ 7:0] csr_pmpcfg_n_i [16], input logic [ 7:0] csr_pmpcfg_q_i [16], input logic [15:0] csr_pmpcfg_we_i, - input logic [31:0] csr_pmpaddr_n_i, // PMP address input shared for all pmpaddr registers + input logic [31:0] csr_pmpaddr_n_i, // PMP address input shared for all pmpaddr registers input logic [31:0] csr_pmpaddr_q_i [16], input logic [15:0] csr_pmpaddr_we_i, input logic [31:0] csr_mseccfg_n_i, @@ -396,7 +396,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_mcountinhibit_wmask, output logic [31:0] rvfi_csr_mcountinhibit_rdata, output logic [31:0] rvfi_csr_mcountinhibit_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmevent_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmevent_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmevent_wmask, output logic [31:0][31:0] rvfi_csr_mhpmevent_rdata, output logic [31:0][31:0] rvfi_csr_mhpmevent_wdata, @@ -448,7 +448,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_tselect_wmask, output logic [31:0] rvfi_csr_tselect_rdata, output logic [31:0] rvfi_csr_tselect_wdata, - output logic [ 3:0][31:0] rvfi_csr_tdata_rmask, // 1-3 implemented + output logic [ 3:0][31:0] rvfi_csr_tdata_rmask, // 1-3 implemented output logic [ 3:0][31:0] rvfi_csr_tdata_wmask, output logic [ 3:0][31:0] rvfi_csr_tdata_rdata, output logic [ 3:0][31:0] rvfi_csr_tdata_wdata, @@ -472,7 +472,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_dpc_wmask, output logic [31:0] rvfi_csr_dpc_rdata, output logic [31:0] rvfi_csr_dpc_wdata, - output logic [ 1:0][31:0] rvfi_csr_dscratch_rmask, // 0-1 implemented + output logic [ 1:0][31:0] rvfi_csr_dscratch_rmask, // 0-1 implemented output logic [ 1:0][31:0] rvfi_csr_dscratch_wmask, output logic [ 1:0][31:0] rvfi_csr_dscratch_rdata, output logic [ 1:0][31:0] rvfi_csr_dscratch_wdata, @@ -484,7 +484,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_minstret_wmask, output logic [31:0] rvfi_csr_minstret_rdata, output logic [31:0] rvfi_csr_minstret_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmcounter_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmcounter_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmcounter_wmask, output logic [31:0][31:0] rvfi_csr_mhpmcounter_rdata, output logic [31:0][31:0] rvfi_csr_mhpmcounter_wdata, @@ -496,7 +496,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_minstreth_wmask, output logic [31:0] rvfi_csr_minstreth_rdata, output logic [31:0] rvfi_csr_minstreth_wdata, - output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_mhpmcounterh_wmask, output logic [31:0][31:0] rvfi_csr_mhpmcounterh_rdata, output logic [31:0][31:0] rvfi_csr_mhpmcounterh_wdata, @@ -508,7 +508,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_instret_wmask, output logic [31:0] rvfi_csr_instret_rdata, output logic [31:0] rvfi_csr_instret_wdata, - output logic [31:0][31:0] rvfi_csr_hpmcounter_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_hpmcounter_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_hpmcounter_wmask, output logic [31:0][31:0] rvfi_csr_hpmcounter_rdata, output logic [31:0][31:0] rvfi_csr_hpmcounter_wdata, @@ -520,7 +520,7 @@ module cv32e40p_rvfi output logic [31:0] rvfi_csr_instreth_wmask, output logic [31:0] rvfi_csr_instreth_rdata, output logic [31:0] rvfi_csr_instreth_wdata, - output logic [31:0][31:0] rvfi_csr_hpmcounterh_rmask, // 3-31 implemented + output logic [31:0][31:0] rvfi_csr_hpmcounterh_rmask, // 3-31 implemented output logic [31:0][31:0] rvfi_csr_hpmcounterh_wmask, output logic [31:0][31:0] rvfi_csr_hpmcounterh_rdata, output logic [31:0][31:0] rvfi_csr_hpmcounterh_wdata, @@ -696,18 +696,36 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (new_rvfi_trace.m_is_apu) begin if (new_rvfi_trace.m_csr.fflags_we) begin s_fflags_mirror = new_rvfi_trace.m_csr.fflags_wdata; + end else begin + s_fflags_mirror = new_rvfi_trace.m_csr.fflags_rdata; end if (new_rvfi_trace.m_csr.frm_we) begin s_frm_mirror = new_rvfi_trace.m_csr.frm_wdata; + end else begin + s_frm_mirror = new_rvfi_trace.m_csr.frm_rdata; end if (new_rvfi_trace.m_csr.fcsr_we) begin s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; + end else begin + s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_rdata; end end else begin new_rvfi_trace.m_csr.fflags_rdata = s_fflags_mirror; new_rvfi_trace.m_csr.frm_rdata = s_frm_mirror; new_rvfi_trace.m_csr.fcsr_rdata = s_fcsr_mirror; + if (new_rvfi_trace.m_fflags_we_non_apu) begin + s_fflags_mirror = new_rvfi_trace.m_csr.fflags_wdata; + s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; + new_rvfi_trace.m_csr.fflags_wmask = 32'hFFFF_FFFF; + new_rvfi_trace.m_csr.fcsr_wmask = 32'hFFFF_FFFF; + end + if (new_rvfi_trace.m_frm_we_non_apu) begin + s_frm_mirror = new_rvfi_trace.m_csr.frm_wdata; + s_fcsr_mirror = new_rvfi_trace.m_csr.fcsr_wdata; + new_rvfi_trace.m_csr.frm_wmask = 32'hFFFF_FFFF; + new_rvfi_trace.m_csr.fcsr_wmask = 32'hFFFF_FFFF; + end end @@ -1125,6 +1143,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_apu_wb_ok; bit s_apu_0_cycle_reps; + + bit s_fflags_we_non_apu; + bit s_frm_we_non_apu; + trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1146,7 +1168,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; $display("*****Starting pipeline computing*****\n"); forever begin - wait (e_pipe_monitor_ok.triggered); + wait(e_pipe_monitor_ok.triggered); #1; check_trap(); @@ -1215,6 +1237,20 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE);// && !r_pipe_freeze_trace.apu_rvalid;; + s_fflags_we_non_apu = 1'b0; + if (r_pipe_freeze_trace.csr.fflags_we) begin + if (cnt_apu_resp == cnt_apu_req) begin //No ongoing apu instruction + s_fflags_we_non_apu = 1'b1; + end + end + + s_frm_we_non_apu = 1'b0; + if (r_pipe_freeze_trace.csr.frm_we) begin + if (cnt_apu_resp == cnt_apu_req) begin //No ongoing apu instruction + s_frm_we_non_apu = 1'b1; + end + end + //WB_STAGE if (r_pipe_freeze_trace.apu_rvalid && (apu_trace_q.size() > 0)) begin apu_resp(); @@ -1325,6 +1361,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, fflags) `CSR_FROM_PIPE(id, frm) `CSR_FROM_PIPE(id, fcsr) + if (s_fflags_we_non_apu) begin + trace_id.m_fflags_we_non_apu = 1'b1; + end + + if (s_frm_we_non_apu) begin + trace_id.m_frm_we_non_apu = 1'b1; + end trace_ex.m_csr.fflags_wmask = '0; trace_ex.m_csr.frm_wmask = '0; trace_ex.m_csr.fcsr_wmask = '0; @@ -1529,16 +1572,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_mode = 2'b11; //priv_lvl_i; //TODO: correct this if needed $display("*****Starting update rvfi task*****\n"); - wait (clk_i_d == 1'b1); + wait(clk_i_d == 1'b1); forever begin - wait (clk_i_d == 1'b1); + wait(clk_i_d == 1'b1); if (rvfi_trace_q.size() != 0) begin set_rvfi(); rvfi_valid = 1'b1; end else begin rvfi_valid = 1'b0; end - wait (clk_i_d == 1'b0); + wait(clk_i_d == 1'b0); end endtask diff --git a/bhv/insn_trace.sv b/bhv/insn_trace.sv index 63c57d5cf..06ae75d75 100644 --- a/bhv/insn_trace.sv +++ b/bhv/insn_trace.sv @@ -36,6 +36,8 @@ logic m_dbg_taken; logic [2:0] m_dbg_cause; + logic m_fflags_we_non_apu; + logic m_frm_we_non_apu; logic [5:0] m_rs1_addr; logic [5:0] m_rs2_addr; logic [31:0] m_rs1_rdata; @@ -121,27 +123,29 @@ function new(); - this.m_order = 0; - this.m_skip_order = 1'b0; - this.m_valid = 1'b0; - this.m_move_down_pipe = 1'b0; - this.m_data_missaligned = 1'b0; - this.m_got_first_data = 1'b0; - this.m_got_ex_reg = 1'b0; - this.m_intr = '0; - this.m_dbg_taken = 1'b0; - this.m_dbg_cause = '0; - this.m_is_ebreak = '0; - this.m_is_illegal = '0; - this.m_is_irq = '0; - this.m_is_memory = 1'b0; - this.m_is_load = 1'b0; - this.m_is_apu = 1'b0; - this.m_is_apu_ok = 1'b0; - this.m_apu_req_id = 0; - this.m_mem_req_id[0] = 0; - this.m_mem_req_id[1] = 0; - this.m_trap = 1'b0; + this.m_order = 0; + this.m_skip_order = 1'b0; + this.m_valid = 1'b0; + this.m_move_down_pipe = 1'b0; + this.m_data_missaligned = 1'b0; + this.m_got_first_data = 1'b0; + this.m_got_ex_reg = 1'b0; + this.m_intr = '0; + this.m_dbg_taken = 1'b0; + this.m_dbg_cause = '0; + this.m_is_ebreak = '0; + this.m_is_illegal = '0; + this.m_is_irq = '0; + this.m_is_memory = 1'b0; + this.m_is_load = 1'b0; + this.m_is_apu = 1'b0; + this.m_is_apu_ok = 1'b0; + this.m_apu_req_id = 0; + this.m_mem_req_id[0] = 0; + this.m_mem_req_id[1] = 0; + this.m_trap = 1'b0; + this.m_fflags_we_non_apu = 1'b0; + this.m_frm_we_non_apu = 1'b0; endfunction /* @@ -154,32 +158,33 @@ if(this.m_skip_order) begin this.m_order = this.m_order + 64'h1; end - this.m_skip_order = 1'b0; - this.m_pc_rdata = r_pipe_freeze_trace.pc_id; - this.m_is_illegal = 1'b0; - this.m_is_irq = 1'b0; - this.m_is_memory = 1'b0; - this.m_is_load = 1'b0; - this.m_is_apu = 1'b0; - this.m_is_apu_ok = 1'b0; - this.m_apu_req_id = 0; - this.m_mem_req_id[0] = 0; - this.m_mem_req_id[1] = 0; - this.m_data_missaligned = 1'b0; - this.m_got_first_data = 1'b0; - this.m_got_ex_reg = 1'b0; - this.m_got_regs_write = 1'b0; - this.m_move_down_pipe = 1'b0; - this.m_rd_addr[0] = '0; - this.m_rd_addr[1] = '0; - this.m_2_rd_insn = 1'b0; - this.m_rs1_addr = '0; - this.m_rs2_addr = '0; - this.m_ex_fw = '0; - this.m_csr.got_minstret = '0; - this.m_dbg_taken = '0; - this.m_trap = 1'b0; - + this.m_skip_order = 1'b0; + this.m_pc_rdata = r_pipe_freeze_trace.pc_id; + this.m_is_illegal = 1'b0; + this.m_is_irq = 1'b0; + this.m_is_memory = 1'b0; + this.m_is_load = 1'b0; + this.m_is_apu = 1'b0; + this.m_is_apu_ok = 1'b0; + this.m_apu_req_id = 0; + this.m_mem_req_id[0] = 0; + this.m_mem_req_id[1] = 0; + this.m_data_missaligned = 1'b0; + this.m_got_first_data = 1'b0; + this.m_got_ex_reg = 1'b0; + this.m_got_regs_write = 1'b0; + this.m_move_down_pipe = 1'b0; + this.m_rd_addr[0] = '0; + this.m_rd_addr[1] = '0; + this.m_2_rd_insn = 1'b0; + this.m_rs1_addr = '0; + this.m_rs2_addr = '0; + this.m_ex_fw = '0; + this.m_csr.got_minstret = '0; + this.m_dbg_taken = '0; + this.m_trap = 1'b0; + this.m_fflags_we_non_apu = 1'b0; + this.m_frm_we_non_apu = 1'b0; this.m_csr.mcause_we = '0; if (is_compressed_id_i) begin this.m_insn[31:16] = '0; @@ -244,6 +249,8 @@ this.m_intr = m_source.m_intr; this.m_trap = m_source.m_trap; + this.m_fflags_we_non_apu = m_source.m_fflags_we_non_apu; + this.m_frm_we_non_apu = m_source.m_frm_we_non_apu ; this.m_mem = m_source.m_mem; //CRS diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index e7669f545..e8ee5e48e 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -656,15 +656,16 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.fflags_n = {27'b0, csr_fcsr_fflags_n_i}; r_pipe_freeze_trace.csr.fflags_q = {27'b0, csr_fcsr_fflags_q_i}; - r_pipe_freeze_trace.csr.frm_n = {29'b0, csr_fcsr_frm_n_i}; - r_pipe_freeze_trace.csr.frm_q = {29'b0, csr_fcsr_frm_q_i}; + r_pipe_freeze_trace.csr.frm_n = {28'b0, csr_fcsr_frm_n_i}; + r_pipe_freeze_trace.csr.frm_q = {28'b0, csr_fcsr_frm_q_i}; r_pipe_freeze_trace.csr.fcsr_n = {24'b0, csr_fcsr_frm_n_i, csr_fcsr_fflags_n_i}; r_pipe_freeze_trace.csr.fcsr_q = {24'b0, csr_fcsr_frm_q_i, csr_fcsr_fflags_q_i}; compute_csr_we(); if (csr_fcsr_fflags_we_i) begin - r_pipe_freeze_trace.csr.fflags_we = 1'b1; - r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + r_pipe_freeze_trace.csr.fflags_we = 1'b1; + r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + r_pipe_freeze_trace.csr.mstatus_we = 1'b1; end r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i; From 944255ba6e50931b806c61efbf15f9e3be4acbaf Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 7 Jun 2023 17:15:40 +0800 Subject: [PATCH 11/38] Correcting csr update reporting on interrupt --- bhv/cv32e40p_rvfi.sv | 46 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 3f4d501d4..c0f329f9c 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -729,6 +729,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end + //FOR DEBUG!!!!!!!!!!!!!!!!!!!!!! + // if(new_rvfi_trace.m_order == 64'h0000_0000_0000_4423) begin + // new_rvfi_trace.m_csr.mcause_rdata = 32'h8000_0010; + // new_rvfi_trace.m_csr.mcause_wdata = 32'h8000_0010; + // new_rvfi_trace.m_csr.mstatus_rdata = 32'h0000_1888; + // new_rvfi_trace.m_csr.mstatus_wdata = 32'h0000_1888; + // new_rvfi_trace.m_csr.mepc_rdata = 32'h0000_554E; + // new_rvfi_trace.m_csr.mepc_wdata = 32'h0000_554E; + // end + rvfi_order = new_rvfi_trace.m_order; rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; rvfi_insn = new_rvfi_trace.m_insn; @@ -1146,6 +1156,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_fflags_we_non_apu; bit s_frm_we_non_apu; + bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes + bit s_is_irq_start; trace_if = new(); trace_id = new(); @@ -1166,6 +1178,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; csr_is_irq = '0; is_dbg_taken = '0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; + $display("*****Starting pipeline computing*****\n"); forever begin wait(e_pipe_monitor_ok.triggered); @@ -1173,6 +1188,14 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; check_trap(); + pc_mux_interrupt = 1'b0; + if(r_pipe_freeze_trace.pc_mux == 4'b0100) begin + if(r_pipe_freeze_trace.exc_pc_mux == 3'b001) begin + pc_mux_interrupt = 1'b1; + s_is_irq_start = 1'b1; + end + end + if (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID && r_pipe_freeze_trace.ebrk_insn_dec) begin if (trace_wb.m_valid) begin send_rvfi(trace_wb); @@ -1291,7 +1314,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_ex.m_valid) begin `CSR_FROM_PIPE(ex, misa) - `CSR_FROM_PIPE(ex, mip) + // `CSR_FROM_PIPE(ex, mip) `CSR_FROM_PIPE(ex, tdata1) tinfo_to_ex(); // `CSR_FROM_PIPE(ex, fflags) @@ -1346,7 +1369,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_id.m_valid) begin mtvec_to_id(); - if (!csr_is_irq) begin + // if(s_is_pc_set) begin + `CSR_FROM_PIPE(id, mip) + // end + + if (!csr_is_irq && !s_is_irq_start) begin mstatus_to_id(); `CSR_FROM_PIPE(id, mepc) if (trace_id.m_csr.mcause_we == '0) begin //for debug purpose @@ -1424,6 +1451,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; + ->e_id_to_ex_1; end else if (r_pipe_freeze_trace.ex_reg_we) begin trace_id.m_ex_fw = 1'b1; @@ -1473,11 +1501,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; + ->e_id_to_ex_2; end trace_id.init(trace_if); trace_id.m_is_ebreak = trace_if.m_is_ebreak; trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; @@ -1501,7 +1531,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.init(trace_if); trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; end @@ -1531,7 +1562,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end - if (csr_is_irq) begin + if (csr_is_irq && !s_is_pc_set) begin mstatus_to_id(); `CSR_FROM_PIPE(id, mepc) `CSR_FROM_PIPE(id, mcause) @@ -1542,6 +1573,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; dcsr_to_id(); end + if(r_pipe_freeze_trace.pc_set) begin + s_is_pc_set = 1'b1; + end + + csr_is_irq = r_pipe_freeze_trace.csr_cause[5]; is_dbg_taken = ((r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) | (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF)) ? 1'b1 : 1'b0; saved_debug_cause = r_pipe_freeze_trace.debug_cause; From e70a3ce7c68f2d1ace50a0d04dd116efc5d6cc8e Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Tue, 13 Jun 2023 13:22:39 +0800 Subject: [PATCH 12/38] Reporting trap by looking at minstret --- bhv/cv32e40p_rvfi.sv | 42 ++++++++++++++++++++++++++++++++------ bhv/cv32e40p_tb_wrapper.sv | 1 + bhv/pipe_freeze_trace.sv | 2 ++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index c0f329f9c..b014e352a 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -82,6 +82,7 @@ module cv32e40p_rvfi input logic [ 1:0][31:0] hwlp_end_q_i, input logic [ 1:0][31:0] hwlp_counter_q_i, input logic [ 1:0][31:0] hwlp_counter_n_i, + input logic minstret_i, // LSU input logic lsu_en_id_i, input logic lsu_we_id_i, @@ -689,6 +690,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; logic [31:0] s_fflags_mirror; logic [31:0] s_frm_mirror; logic [31:0] s_fcsr_mirror; + logic [31:0] r_previous_minstret; function void set_rvfi(); insn_trace_t new_rvfi_trace; new_rvfi_trace = rvfi_trace_q.pop_front(); @@ -743,6 +745,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; rvfi_insn = new_rvfi_trace.m_insn; + //Trying something here + //Flag as trap everytime minstret is not incremented + + if(new_rvfi_trace.m_csr.minstret_rdata == r_previous_minstret) begin + new_rvfi_trace.m_trap = 1'b1; + end else begin + r_previous_minstret = new_rvfi_trace.m_csr.minstret_rdata; + new_rvfi_trace.m_trap = 1'b1; + end + rvfi_rs1_addr = '0; rvfi_rs1_rdata = '0; rvfi_rs2_addr = '0; @@ -1015,6 +1027,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction + bit s_was_flush; //debug exception is flagged as trap only if preceed by a flush + //Work arround until I find the coreect way to distinguish trap function void check_trap(); bit s_dbg_exception, s_exception, s_irq; s_dbg_exception = 1'b0; @@ -1037,18 +1051,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end + if(s_was_flush == 1'b0) begin + s_dbg_exception = 1'b0; + end + if (r_pipe_freeze_trace.pc_id == trace_if.m_pc_rdata) begin if (trace_if.m_valid && (s_dbg_exception || s_exception)) begin trace_if.m_trap = 1'b1; end end - if (r_pipe_freeze_trace.pc_id == trace_id.m_pc_rdata) begin - if (trace_id.m_valid && (s_dbg_exception || s_exception)) begin - trace_id.m_trap = 1'b1; - end - end - if (r_pipe_freeze_trace.pc_id == trace_ex.m_pc_rdata) begin if (trace_ex.m_valid && (s_dbg_exception || s_exception)) begin trace_ex.m_trap = 1'b1; @@ -1060,6 +1072,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_wb.m_trap = 1'b1; end end + + if (r_pipe_freeze_trace.pc_id == trace_id.m_pc_rdata) begin + if (trace_id.m_valid && (s_dbg_exception || s_exception)) begin + trace_id.m_trap = 1'b1; + end + end + endfunction /* * This tracer works with three process, @@ -1177,6 +1196,12 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; cnt_apu_resp = 0; csr_is_irq = '0; is_dbg_taken = '0; + s_was_flush = 1'b0; + + r_previous_minstret = '0; + + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; @@ -1582,6 +1607,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; is_dbg_taken = ((r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) | (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF)) ? 1'b1 : 1'b0; saved_debug_cause = r_pipe_freeze_trace.debug_cause; s_id_done = r_pipe_freeze_trace.id_valid; + if((r_pipe_freeze_trace.ctrl_fsm_cs == DBG_FLUSH) || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_WB)) begin + s_was_flush = 1'b1; + end else begin + s_was_flush = 1'b0; + end #1; end endtask diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index a77db5614..6e7c60adb 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -286,6 +286,7 @@ module cv32e40p_tb_wrapper .hwlp_counter_q_i(hwlp_counter_q), .hwlp_counter_n_i(hwlp_counter_n), + .minstret_i (cv32e40p_top_i.core_i.id_stage_i.minstret), //// EX probes //// .ex_valid_i (cv32e40p_top_i.core_i.ex_valid), .ex_ready_i (cv32e40p_top_i.core_i.ex_ready), diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index e8ee5e48e..aad6fc5c5 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -68,6 +68,7 @@ typedef struct { logic [5:0] csr_cause; logic debug_csr_save; + logic minstret; // LSU logic lsu_en_id; logic lsu_we_id; @@ -402,6 +403,7 @@ task monitor_pipeline(); r_pipe_freeze_trace.ebrk_insn_dec = ebrk_insn_dec_i; r_pipe_freeze_trace.csr_cause = csr_cause_i; r_pipe_freeze_trace.debug_csr_save = debug_csr_save_i; + r_pipe_freeze_trace.minstret = minstret_i; // LSU r_pipe_freeze_trace.lsu_en_id = lsu_en_id_i; r_pipe_freeze_trace.lsu_we_id = lsu_we_id_i; From 79dd556cfa37b3a6f3d3094539681950578fcd3d Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Tue, 13 Jun 2023 18:19:51 +0200 Subject: [PATCH 13/38] Changed FPU top level name for implementation retiming command. Signed-off-by: Pascal Gouedo --- docs/source/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 372dfdef5..059fd43fb 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -188,7 +188,7 @@ This is done with FPU_*_LAT CV32E40P parameters setting to perfectly fit target It should be noted that any additional pipeline register is impacting FPU instructions latency and could cause performances degradation depending of applications using Floating-Point operations. Those pipeline registers are all added at the end of the FPU pipeline with all operators before them. Optimal frequency is only achievable using automatic retiming commands in implementation tools. This can be achieved with the following command for Synopsys Design Compiler: -“set_optimize_registers true -designs [get_object_name [get_designs "*fpnew_wrapper*"]]”. +“set_optimize_registers true -designs [get_object_name [get_designs "\*cv32e40p_fp_wrapper\*"]]”. Contents -------- From 81d09c639902574bcc2d0f00cd88e9a5a3cca1cd Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Tue, 13 Jun 2023 18:20:42 +0200 Subject: [PATCH 14/38] Added a note about register write priority for post-incremented loads. Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index 7d4c831ea..9160e8899 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -63,6 +63,11 @@ load & store instructions are only supported if ``COREV_PULP`` == 1. Load operations ^^^^^^^^^^^^^^^ +.. note:: + + When same register is used as address and destination (rD == rs1) for post-incremented loads (rs1!), + loaded data has highest priority over incremented address when writing to this same register. + .. table:: Load operations :name: Load operations :widths: 30 70 From e767d297b21007e62db29d50db4611f7764a734f Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 14 Jun 2023 15:56:32 +0800 Subject: [PATCH 15/38] running verible --- bhv/pipe_freeze_trace.sv | 5 +- rtl/cv32e40p_aligner.sv | 4 +- rtl/cv32e40p_alu.sv | 38 ++++++++----- rtl/cv32e40p_alu_div.sv | 10 ++-- rtl/cv32e40p_core.sv | 86 ++++++++++++++--------------- rtl/cv32e40p_ex_stage.sv | 2 +- rtl/cv32e40p_fp_wrapper.sv | 35 ++++++------ rtl/cv32e40p_hwloop_regs.sv | 2 +- rtl/cv32e40p_id_stage.sv | 8 +-- rtl/cv32e40p_if_stage.sv | 6 +- rtl/cv32e40p_int_controller.sv | 12 ++-- rtl/cv32e40p_load_store_unit.sv | 30 +++++----- rtl/cv32e40p_mult.sv | 4 +- rtl/cv32e40p_prefetch_buffer.sv | 14 ++--- rtl/cv32e40p_prefetch_controller.sv | 6 +- rtl/cv32e40p_sleep_unit.sv | 4 +- rtl/cv32e40p_top.sv | 2 +- 17 files changed, 140 insertions(+), 128 deletions(-) diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index aad6fc5c5..55a89b1bd 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -360,7 +360,7 @@ endfunction task monitor_pipeline(); $display("*****Starting pipeline monitoring*****\n"); forever begin - wait (clk_i_d == 1'b0 & rst_ni == 1'b1); + wait(clk_i_d == 1'b0 & rst_ni == 1'b1); // r_pipe_freeze_trace. <= ; r_pipe_freeze_trace.instr_req = instr_req_i; @@ -675,8 +675,7 @@ task monitor_pipeline(); r_pipe_freeze_trace.hwloop.counter_q = hwlp_counter_q_i; r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; - // #1; ->e_pipe_monitor_ok; - wait (clk_i_d == 1'b1); + wait(clk_i_d == 1'b1); end endtask diff --git a/rtl/cv32e40p_aligner.sv b/rtl/cv32e40p_aligner.sv index 9bb341e06..a61f7e928 100644 --- a/rtl/cv32e40p_aligner.sv +++ b/rtl/cv32e40p_aligner.sv @@ -25,7 +25,7 @@ module cv32e40p_aligner ( input logic rst_n, input logic fetch_valid_i, - output logic aligner_ready_o, //prevents overwriting the fethced instruction + output logic aligner_ready_o, //prevents overwriting the fethced instruction input logic if_valid_i, @@ -34,7 +34,7 @@ module cv32e40p_aligner ( output logic instr_valid_o, input logic [31:0] branch_addr_i, - input logic branch_i, // Asserted if we are branching/jumping now + input logic branch_i, // Asserted if we are branching/jumping now input logic [31:0] hwlp_addr_i, input logic hwlp_update_pc_i, diff --git a/rtl/cv32e40p_alu.sv b/rtl/cv32e40p_alu.sv index e7be80cb3..aa900a787 100644 --- a/rtl/cv32e40p_alu.sv +++ b/rtl/cv32e40p_alu.sv @@ -263,28 +263,36 @@ module cv32e40p_alu // right shifts, we let the synthesizer optimize this logic [63:0] shift_op_a_32; - assign shift_op_a_32 = (operator_i == ALU_ROR) ? {shift_op_a, shift_op_a} : $signed( - {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} - ); + assign shift_op_a_32 = (operator_i == ALU_ROR) ? { + shift_op_a, shift_op_a + } : $signed( + {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} + ); always_comb begin case (vector_mode_i) VEC_MODE16: begin - shift_right_result[31:16] = $signed({shift_arithmetic & shift_op_a[31], - shift_op_a[31:16]}) >>> shift_amt_int[19:16]; - shift_right_result[15:0] = - $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:0]}) >>> shift_amt_int[3:0]; + shift_right_result[31:16] = $signed( + {shift_arithmetic & shift_op_a[31], shift_op_a[31:16]} + ) >>> shift_amt_int[19:16]; + shift_right_result[15:0] = $signed( + {shift_arithmetic & shift_op_a[15], shift_op_a[15:0]} + ) >>> shift_amt_int[3:0]; end VEC_MODE8: begin - shift_right_result[31:24] = $signed({shift_arithmetic & shift_op_a[31], - shift_op_a[31:24]}) >>> shift_amt_int[26:24]; - shift_right_result[23:16] = $signed({shift_arithmetic & shift_op_a[23], - shift_op_a[23:16]}) >>> shift_amt_int[18:16]; - shift_right_result[15:8] = - $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:8]}) >>> shift_amt_int[10:8]; - shift_right_result[7:0] = $signed({shift_arithmetic & shift_op_a[7], shift_op_a[7:0]}) >>> - shift_amt_int[2:0]; + shift_right_result[31:24] = $signed( + {shift_arithmetic & shift_op_a[31], shift_op_a[31:24]} + ) >>> shift_amt_int[26:24]; + shift_right_result[23:16] = $signed( + {shift_arithmetic & shift_op_a[23], shift_op_a[23:16]} + ) >>> shift_amt_int[18:16]; + shift_right_result[15:8] = $signed( + {shift_arithmetic & shift_op_a[15], shift_op_a[15:8]} + ) >>> shift_amt_int[10:8]; + shift_right_result[7:0] = $signed( + {shift_arithmetic & shift_op_a[7], shift_op_a[7:0]} + ) >>> shift_amt_int[2:0]; end default: // VEC_MODE32 diff --git a/rtl/cv32e40p_alu_div.sv b/rtl/cv32e40p_alu_div.sv index 446f446fd..a7449e01c 100644 --- a/rtl/cv32e40p_alu_div.sv +++ b/rtl/cv32e40p_alu_div.sv @@ -35,8 +35,8 @@ module cv32e40p_alu_div #( input logic [C_LOG_WIDTH-1:0] OpBShift_DI, input logic OpBIsZero_SI, // - input logic OpBSign_SI, // gate this to 0 in case of unsigned ops - input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem + input logic OpBSign_SI, // gate this to 0 in case of unsigned ops + input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem // handshake input logic InVld_SI, // output IF @@ -186,9 +186,9 @@ module cv32e40p_alu_div #( assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP; assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP; - assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? {ABComp_S, ResReg_DP[$high( - ResReg_DP - ):1]} : ResReg_DP; + assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? { + ABComp_S, ResReg_DP[$high(ResReg_DP):1] + } : ResReg_DP; always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs if (~Rst_RBI) begin diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index 4a5c32774..25179aea8 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -84,7 +84,7 @@ module cv32e40p_core input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -379,9 +379,9 @@ module cv32e40p_core .COREV_CLUSTER(COREV_CLUSTER) ) sleep_unit_i ( // Clock, reset interface - .clk_ungated_i(clk_i), // Ungated clock + .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), - .clk_gated_o (clk), // Gated clock + .clk_gated_o (clk), // Gated clock .scan_cg_en_i (scan_cg_en_i), // Core sleep @@ -447,8 +447,8 @@ module cv32e40p_core .instr_gnt_i (instr_gnt_pmp), .instr_rvalid_i (instr_rvalid_i), .instr_rdata_i (instr_rdata_i), - .instr_err_i (1'b0), // Bus error (not used yet) - .instr_err_pmp_i(instr_err_pmp), // PMP error + .instr_err_i (1'b0), // Bus error (not used yet) + .instr_err_pmp_i(instr_err_pmp), // PMP error // outputs to ID stage .instr_valid_id_o (instr_valid_id), @@ -464,7 +464,7 @@ module cv32e40p_core .depc_i(depc), // debug return address - .pc_mux_i (pc_mux_id), // sel for pc multiplexer + .pc_mux_i (pc_mux_id), // sel for pc multiplexer .exc_pc_mux_i(exc_pc_mux_id), @@ -523,7 +523,7 @@ module cv32e40p_core .APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU), .DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN) ) id_stage_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), @@ -592,14 +592,14 @@ module cv32e40p_core .regfile_alu_waddr_ex_o(regfile_alu_waddr_ex), // MUL - .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage - .mult_en_ex_o (mult_en_ex), // from ID to EX stage + .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage + .mult_en_ex_o (mult_en_ex), // from ID to EX stage .mult_sel_subword_ex_o(mult_sel_subword_ex), // from ID to EX stage .mult_signed_mode_ex_o(mult_signed_mode_ex), // from ID to EX stage - .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage - .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage - .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage - .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage + .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage + .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage + .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage + .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage .mult_dot_op_a_ex_o (mult_dot_op_a_ex), // from ID to EX stage .mult_dot_op_b_ex_o (mult_dot_op_b_ex), // from ID to EX stage @@ -636,9 +636,9 @@ module cv32e40p_core .current_priv_lvl_i (current_priv_lvl), .csr_irq_sec_o (csr_irq_sec), .csr_cause_o (csr_cause), - .csr_save_if_o (csr_save_if), // control signal to save pc - .csr_save_id_o (csr_save_id), // control signal to save pc - .csr_save_ex_o (csr_save_ex), // control signal to save pc + .csr_save_if_o (csr_save_if), // control signal to save pc + .csr_save_id_o (csr_save_id), // control signal to save pc + .csr_save_ex_o (csr_save_ex), // control signal to save pc .csr_restore_mret_id_o(csr_restore_mret_id), // control signal to restore pc .csr_restore_uret_id_o(csr_restore_uret_id), // control signal to restore pc @@ -655,11 +655,11 @@ module cv32e40p_core .hwlp_target_o(hwlp_target), // LSU - .data_req_ex_o (data_req_ex), // to load store unit - .data_we_ex_o (data_we_ex), // to load store unit + .data_req_ex_o (data_req_ex), // to load store unit + .data_we_ex_o (data_we_ex), // to load store unit .atop_ex_o (data_atop_ex), - .data_type_ex_o (data_type_ex), // to load store unit - .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit + .data_type_ex_o (data_type_ex), // to load store unit + .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit .data_reg_offset_ex_o(data_reg_offset_ex), // to load store unit .data_load_event_ex_o(data_load_event_ex), // to load store unit @@ -748,34 +748,34 @@ module cv32e40p_core // Alu signals from ID stage .alu_en_i (alu_en_ex), - .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers + .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers .alu_operand_a_i (alu_operand_a_ex), // from ID/EX pipe registers .alu_operand_b_i (alu_operand_b_ex), // from ID/EX pipe registers .alu_operand_c_i (alu_operand_c_ex), // from ID/EX pipe registers - .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers - .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers - .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers - .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers - .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers + .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers + .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers + .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers + .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers + .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers .alu_is_subrot_i (alu_is_subrot_ex), // from ID/Ex pipe registers - .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers + .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers // Multipler - .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers - .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers - .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers - .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers - .mult_en_i (mult_en_ex), // from ID/EX pipe registers + .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers + .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers + .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers + .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers + .mult_en_i (mult_en_ex), // from ID/EX pipe registers .mult_sel_subword_i(mult_sel_subword_ex), // from ID/EX pipe registers .mult_signed_mode_i(mult_signed_mode_ex), // from ID/EX pipe registers - .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers - .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers - .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers - .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers - .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers - .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers - .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers - .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers + .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers + .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers + .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers + .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers + .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers + .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers + .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers + .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers .mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers @@ -873,8 +873,8 @@ module cv32e40p_core .data_req_o (data_req_pmp), .data_gnt_i (data_gnt_pmp), .data_rvalid_i (data_rvalid_i), - .data_err_i (1'b0), // Bus error (not used yet) - .data_err_pmp_i(data_err_pmp), // PMP error + .data_err_i (1'b0), // Bus error (not used yet) + .data_err_pmp_i(data_err_pmp), // PMP error .data_addr_o (data_addr_pmp), .data_we_o (data_we_o), @@ -890,7 +890,7 @@ module cv32e40p_core .data_wdata_ex_i (alu_operand_c_ex), .data_reg_offset_ex_i(data_reg_offset_ex), .data_load_event_ex_i(data_load_event_ex), - .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension + .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension .data_rdata_ex_o (lsu_rdata), .data_req_ex_i (data_req_ex), diff --git a/rtl/cv32e40p_ex_stage.sv b/rtl/cv32e40p_ex_stage.sv index 4b6e30578..6b58a8425 100644 --- a/rtl/cv32e40p_ex_stage.sv +++ b/rtl/cv32e40p_ex_stage.sv @@ -149,7 +149,7 @@ module cv32e40p_ex_stage output logic ex_ready_o, // EX stage ready for new data output logic ex_valid_o, // EX stage gets new data - input logic wb_ready_i // WB stage ready for new data + input logic wb_ready_i // WB stage ready for new data ); logic [31:0] alu_result; diff --git a/rtl/cv32e40p_fp_wrapper.sv b/rtl/cv32e40p_fp_wrapper.sv index 16b39c10c..15be96e5e 100644 --- a/rtl/cv32e40p_fp_wrapper.sv +++ b/rtl/cv32e40p_fp_wrapper.sv @@ -66,26 +66,29 @@ module cv32e40p_fp_wrapper Width: C_FLEN, EnableVectors: C_XFVEC, EnableNanBox: 1'b0, - FpFmtMask: {C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT}, - IntFmtMask: {C_XFVEC && C_XF8, C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0} - }; + FpFmtMask: { + C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT + }, IntFmtMask: { + C_XFVEC && C_XF8, C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0 + }}; // Implementation (number of registers etc) localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt - '{FPU_ADDMUL_LAT, C_LAT_FP64, C_LAT_FP16, C_LAT_FP8, C_LAT_FP16ALT}, // ADDMUL - '{default: C_LAT_DIVSQRT}, // DIVSQRT - '{default: FPU_OTHERS_LAT}, // NONCOMP - '{default: FPU_OTHERS_LAT} - }, // CONV - UnitTypes: '{ - '{default: fpnew_pkg::MERGED}, // ADDMUL - '{default: fpnew_pkg::MERGED}, // DIVSQRT - '{default: fpnew_pkg::PARALLEL}, // NONCOMP - '{default: fpnew_pkg::MERGED} - }, // CONV - PipeConfig: fpnew_pkg::AFTER - }; + '{ + FPU_ADDMUL_LAT, C_LAT_FP64, C_LAT_FP16, C_LAT_FP8, C_LAT_FP16ALT + }, // ADDMUL + '{default: C_LAT_DIVSQRT}, // DIVSQRT + '{default: FPU_OTHERS_LAT}, // NONCOMP + '{default: FPU_OTHERS_LAT} + }, // CONV + UnitTypes: '{ + '{default: fpnew_pkg::MERGED}, // ADDMUL + '{default: fpnew_pkg::MERGED}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED} + }, // CONV + PipeConfig: fpnew_pkg::AFTER}; //--------------- // FPU instance diff --git a/rtl/cv32e40p_hwloop_regs.sv b/rtl/cv32e40p_hwloop_regs.sv index 7f6260f0d..fc835fbe4 100644 --- a/rtl/cv32e40p_hwloop_regs.sv +++ b/rtl/cv32e40p_hwloop_regs.sv @@ -34,7 +34,7 @@ module cv32e40p_hwloop_regs #( input logic [ 31:0] hwlp_end_data_i, input logic [ 31:0] hwlp_cnt_data_i, input logic [ 2:0] hwlp_we_i, - input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set + input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set // from controller input logic valid_i, diff --git a/rtl/cv32e40p_id_stage.sv b/rtl/cv32e40p_id_stage.sv index 74e8446d5..0f54e31eb 100644 --- a/rtl/cv32e40p_id_stage.sv +++ b/rtl/cv32e40p_id_stage.sv @@ -61,7 +61,7 @@ module cv32e40p_id_stage // Interface to IF stage input logic instr_valid_i, - input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage + input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage output logic instr_req_o, input logic is_compressed_i, input logic illegal_c_insn_i, @@ -196,8 +196,8 @@ module cv32e40p_id_stage // Interrupt signals input logic [31:0] irq_i, input logic irq_sec_i, - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR input logic m_irq_enable_i, input logic u_irq_enable_i, output logic irq_ack_o, @@ -1086,7 +1086,7 @@ module cv32e40p_id_stage .COREV_CLUSTER(COREV_CLUSTER), .COREV_PULP (COREV_PULP) ) controller_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_ungated_i), // Ungated clock .rst_n (rst_n), diff --git a/rtl/cv32e40p_if_stage.sv b/rtl/cv32e40p_if_stage.sv index ede05613b..23fd14f5f 100644 --- a/rtl/cv32e40p_if_stage.sv +++ b/rtl/cv32e40p_if_stage.sv @@ -80,7 +80,7 @@ module cv32e40p_if_stage #( input logic [4:0] m_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines input logic [4:0] u_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines - output logic csr_mtvec_init_o, // tell CS regfile to init mtvec + output logic csr_mtvec_init_o, // tell CS regfile to init mtvec // jump and branch target and decision input logic [31:0] jump_target_id_i, // jump target address @@ -95,7 +95,7 @@ module cv32e40p_if_stage #( input logic id_ready_i, // misc signals - output logic if_busy_o, // is the IF stage busy fetching instructions? + output logic if_busy_o, // is the IF stage busy fetching instructions? output logic perf_imiss_o // Instruction Fetch Miss ); @@ -200,7 +200,7 @@ module cv32e40p_if_stage #( .instr_addr_o (instr_addr_o), .instr_gnt_i (instr_gnt_i), .instr_rvalid_i (instr_rvalid_i), - .instr_err_i (instr_err_i), // Not supported (yet) + .instr_err_i (instr_err_i), // Not supported (yet) .instr_err_pmp_i(instr_err_pmp_i), // Not supported (yet) .instr_rdata_i (instr_rdata_i), diff --git a/rtl/cv32e40p_int_controller.sv b/rtl/cv32e40p_int_controller.sv index b2044c7b6..af8489e0f 100644 --- a/rtl/cv32e40p_int_controller.sv +++ b/rtl/cv32e40p_int_controller.sv @@ -30,8 +30,8 @@ module cv32e40p_int_controller input logic rst_n, // External interrupt lines - input logic [31:0] irq_i, // Level-triggered interrupt inputs - input logic irq_sec_i, // Interrupt secure bit from EU + input logic [31:0] irq_i, // Level-triggered interrupt inputs + input logic irq_sec_i, // Interrupt secure bit from EU // To cv32e40p_controller output logic irq_req_ctrl_o, @@ -40,10 +40,10 @@ module cv32e40p_int_controller output logic irq_wu_ctrl_o, // To/from cv32e40p_cs_registers - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR - input logic m_ie_i, // Interrupt enable bit from CSR (M mode) - input logic u_ie_i, // Interrupt enable bit from CSR (U mode) + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR + input logic m_ie_i, // Interrupt enable bit from CSR (M mode) + input logic u_ie_i, // Interrupt enable bit from CSR (U mode) input PrivLvl_t current_priv_lvl_i ); diff --git a/rtl/cv32e40p_load_store_unit.sv b/rtl/cv32e40p_load_store_unit.sv index c9773ff37..024d43699 100644 --- a/rtl/cv32e40p_load_store_unit.sv +++ b/rtl/cv32e40p_load_store_unit.sv @@ -43,27 +43,27 @@ module cv32e40p_load_store_unit #( input logic [31:0] data_rdata_i, // signals from ex stage - input logic data_we_ex_i, // write enable -> from ex stage - input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage - input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage + input logic data_we_ex_i, // write enable -> from ex stage + input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage + input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage input logic [ 1:0] data_reg_offset_ex_i, // offset inside register for stores -> from ex stage input logic data_load_event_ex_i, // load event -> from ex stage - input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage + input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage - output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage - input logic data_req_ex_i, // data request -> from ex stage - input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage - input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage - input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage + output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage + input logic data_req_ex_i, // data request -> from ex stage + input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage + input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage + input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage - input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline + input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline output logic data_misaligned_o, // misaligned access was detected -> to controller - input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage + input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage output logic [5:0] data_atop_o, // atomic instruction signal -> core output output logic p_elw_start_o, // load event starts - output logic p_elw_finish_o, // load event finishes + output logic p_elw_finish_o, // load event finishes // stall signal output logic lsu_ready_ex_o, // LSU ready for new data in EX stage @@ -468,7 +468,7 @@ module cv32e40p_load_store_unit #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (data_req_o), .obi_gnt_i (data_gnt_i), @@ -476,10 +476,10 @@ module cv32e40p_load_store_unit #( .obi_we_o (data_we_o), .obi_be_o (data_be_o), .obi_wdata_o (data_wdata_o), - .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec + .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec .obi_rdata_i (data_rdata_i), .obi_rvalid_i(data_rvalid_i), - .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) + .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) ); diff --git a/rtl/cv32e40p_mult.sv b/rtl/cv32e40p_mult.sv index 804f24e1c..ea0da1937 100644 --- a/rtl/cv32e40p_mult.sv +++ b/rtl/cv32e40p_mult.sv @@ -370,7 +370,9 @@ module cv32e40p_mult |-> (result_o == (($signed( {{32{op_a_i[31]}}, op_a_i} - ) * {32'b0, op_b_i}) >> 32))); + ) * { + 32'b0, op_b_i + }) >> 32))); // check multiplication result for mulhu assert property ( diff --git a/rtl/cv32e40p_prefetch_buffer.sv b/rtl/cv32e40p_prefetch_buffer.sv index 80a5ecb88..596bdfe94 100644 --- a/rtl/cv32e40p_prefetch_buffer.sv +++ b/rtl/cv32e40p_prefetch_buffer.sv @@ -48,8 +48,8 @@ module cv32e40p_prefetch_buffer #( output logic [31:0] instr_addr_o, input logic [31:0] instr_rdata_i, input logic instr_rvalid_i, - input logic instr_err_i, // Not used yet (future addition) - input logic instr_err_pmp_i, // Not used yet (future addition) + input logic instr_err_i, // Not used yet (future addition) + input logic instr_err_pmp_i, // Not used yet (future addition) // Prefetch Buffer Status output logic busy_o @@ -167,15 +167,15 @@ module cv32e40p_prefetch_buffer #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (instr_req_o), .obi_gnt_i (instr_gnt_i), .obi_addr_o (instr_addr_o), - .obi_we_o (), // Left unconnected on purpose - .obi_be_o (), // Left unconnected on purpose - .obi_wdata_o (), // Left unconnected on purpose - .obi_atop_o (), // Left unconnected on purpose + .obi_we_o (), // Left unconnected on purpose + .obi_be_o (), // Left unconnected on purpose + .obi_wdata_o (), // Left unconnected on purpose + .obi_atop_o (), // Left unconnected on purpose .obi_rdata_i (instr_rdata_i), .obi_rvalid_i(instr_rvalid_i), .obi_err_i (instr_err_i) diff --git a/rtl/cv32e40p_prefetch_controller.sv b/rtl/cv32e40p_prefetch_controller.sv index 2f7bea754..d00406e54 100644 --- a/rtl/cv32e40p_prefetch_controller.sv +++ b/rtl/cv32e40p_prefetch_controller.sv @@ -47,10 +47,10 @@ module cv32e40p_prefetch_controller #( input logic rst_n, // Fetch stage interface - input logic req_i, // Fetch stage requests instructions - input logic branch_i, // Taken branch + input logic req_i, // Fetch stage requests instructions + input logic branch_i, // Taken branch input logic [31:0] branch_addr_i, // Taken branch address (only valid when branch_i = 1) - output logic busy_o, // Prefetcher busy + output logic busy_o, // Prefetcher busy // HW loop signals input logic hwlp_jump_i, diff --git a/rtl/cv32e40p_sleep_unit.sv b/rtl/cv32e40p_sleep_unit.sv index 39c5537f2..03e519487 100644 --- a/rtl/cv32e40p_sleep_unit.sv +++ b/rtl/cv32e40p_sleep_unit.sv @@ -59,8 +59,8 @@ module cv32e40p_sleep_unit #( // Clock, reset interface input logic clk_ungated_i, // Free running clock input logic rst_n, - output logic clk_gated_o, // Gated clock - input logic scan_cg_en_i, // Enable all clock gates for testing + output logic clk_gated_o, // Gated clock + input logic scan_cg_en_i, // Enable all clock gates for testing // Core sleep output logic core_sleep_o, diff --git a/rtl/cv32e40p_top.sv b/rtl/cv32e40p_top.sv index ef656da97..7ddd2d5a2 100644 --- a/rtl/cv32e40p_top.sv +++ b/rtl/cv32e40p_top.sv @@ -52,7 +52,7 @@ module cv32e40p_top #( input logic [31:0] data_rdata_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, From efa1d165b446503273785b3495714dc4b64a014b Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 14 Jun 2023 16:08:25 +0800 Subject: [PATCH 16/38] running latest verible on pipe_freeze_trace --- bhv/pipe_freeze_trace.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index 55a89b1bd..c73c8b923 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -360,7 +360,7 @@ endfunction task monitor_pipeline(); $display("*****Starting pipeline monitoring*****\n"); forever begin - wait(clk_i_d == 1'b0 & rst_ni == 1'b1); + wait (clk_i_d == 1'b0 & rst_ni == 1'b1); // r_pipe_freeze_trace. <= ; r_pipe_freeze_trace.instr_req = instr_req_i; @@ -676,6 +676,6 @@ task monitor_pipeline(); r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; ->e_pipe_monitor_ok; - wait(clk_i_d == 1'b1); + wait (clk_i_d == 1'b1); end endtask From 7ad3e635a36433270bf87ce3d9db6dcc19ac17b9 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 14 Jun 2023 17:57:42 +0200 Subject: [PATCH 17/38] Removed redundant table command for 2 tables (and restored correct github rendering). Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index 8623dac01..ae7155b27 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -666,7 +666,6 @@ Bit Manipulation Encoding .. table:: Immediate Bit Manipulation operations encoding :name: Immediate Bit Manipulation operations encoding - :width: 50 :widths: 5 14 13 5 8 6 16 33 :class: no-scrollbar-table @@ -690,7 +689,6 @@ Bit Manipulation Encoding .. table:: Register Bit Manipulation operations encoding :name: Register Bit Manipulation operations encoding - :width: 50 :widths: 19 13 5 8 6 16 33 :class: no-scrollbar-table From 53d2dd646e9a8ad82fa6e3b8ea9eb185467ed684 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Fri, 16 Jun 2023 10:16:24 +0200 Subject: [PATCH 18/38] Corrected wrong value to emulate cv.avg/avgu with cv.addN/adduN Signed-off-by: Pascal Gouedo --- docs/source/instruction_set_extensions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/instruction_set_extensions.rst b/docs/source/instruction_set_extensions.rst index ae7155b27..a0bb95090 100644 --- a/docs/source/instruction_set_extensions.rst +++ b/docs/source/instruction_set_extensions.rst @@ -799,13 +799,13 @@ General ALU operations | | | | | Note: Arithmetic shift right. | | | | - | | Setting Is3 to 2 replaces former cv.avg. | + | | Setting Is3 to 1 replaces former cv.avg. | +-------------------------------------------+------------------------------------------------------------------------+ | **cv.adduN rD, rs1, rs2, Is3** | rD = (rs1 + rs2) >> Is3 | | | | | | Note: Logical shift right. | | | | - | | Setting Is3 to 2 replaces former cv.avg. | + | | Setting Is3 to 1 replaces former cv.avgu. | +-------------------------------------------+------------------------------------------------------------------------+ | **cv.addRN rD, rs1, rs2, Is3** | rD = (rs1 + rs2 + 2^(Is3-1)) >>> Is3 | | | | From a932b9c8cf965aafc4e60bb3275e43f38574ee2c Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Mon, 19 Jun 2023 04:37:46 +0200 Subject: [PATCH 19/38] running verible --- bhv/cv32e40p_apu_tracer.sv | 2 +- bhv/cv32e40p_rvfi.sv | 34 +++++++++++++++++----------------- bhv/cv32e40p_rvfi_trace.sv | 2 +- bhv/cv32e40p_tb_wrapper.sv | 4 ++-- bhv/cv32e40p_tracer.sv | 15 ++++++++------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/bhv/cv32e40p_apu_tracer.sv b/bhv/cv32e40p_apu_tracer.sv index 2c84aa329..6183b00d8 100644 --- a/bhv/cv32e40p_apu_tracer.sv +++ b/bhv/cv32e40p_apu_tracer.sv @@ -52,7 +52,7 @@ module cv32e40p_apu_tracer ( // open/close output file for writing initial begin - wait (rst_n == 1'b1); + wait(rst_n == 1'b1); $sformat(fn, "apu_trace_core_%h.log", hart_id_i); $display("[APU_TRACER %2d] Output filename is: %s", hart_id_i, fn); apu_trace = $fopen(fn, "w"); diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index b014e352a..457fbe672 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -82,7 +82,7 @@ module cv32e40p_rvfi input logic [ 1:0][31:0] hwlp_end_q_i, input logic [ 1:0][31:0] hwlp_counter_q_i, input logic [ 1:0][31:0] hwlp_counter_n_i, - input logic minstret_i, + input logic minstret_i, // LSU input logic lsu_en_id_i, input logic lsu_we_id_i, @@ -741,17 +741,17 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; // new_rvfi_trace.m_csr.mepc_wdata = 32'h0000_554E; // end - rvfi_order = new_rvfi_trace.m_order; - rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; - rvfi_insn = new_rvfi_trace.m_insn; + rvfi_order = new_rvfi_trace.m_order; + rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; + rvfi_insn = new_rvfi_trace.m_insn; //Trying something here //Flag as trap everytime minstret is not incremented - if(new_rvfi_trace.m_csr.minstret_rdata == r_previous_minstret) begin + if (new_rvfi_trace.m_csr.minstret_rdata == r_previous_minstret) begin new_rvfi_trace.m_trap = 1'b1; end else begin - r_previous_minstret = new_rvfi_trace.m_csr.minstret_rdata; + r_previous_minstret = new_rvfi_trace.m_csr.minstret_rdata; new_rvfi_trace.m_trap = 1'b1; end @@ -1027,7 +1027,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction - bit s_was_flush; //debug exception is flagged as trap only if preceed by a flush + bit s_was_flush; //debug exception is flagged as trap only if preceed by a flush //Work arround until I find the coreect way to distinguish trap function void check_trap(); bit s_dbg_exception, s_exception, s_irq; @@ -1051,7 +1051,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end - if(s_was_flush == 1'b0) begin + if (s_was_flush == 1'b0) begin s_dbg_exception = 1'b0; end @@ -1175,7 +1175,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_fflags_we_non_apu; bit s_frm_we_non_apu; - bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes + bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes bit s_is_irq_start; trace_if = new(); @@ -1214,10 +1214,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; check_trap(); pc_mux_interrupt = 1'b0; - if(r_pipe_freeze_trace.pc_mux == 4'b0100) begin - if(r_pipe_freeze_trace.exc_pc_mux == 3'b001) begin + if (r_pipe_freeze_trace.pc_mux == 4'b0100) begin + if (r_pipe_freeze_trace.exc_pc_mux == 3'b001) begin pc_mux_interrupt = 1'b1; - s_is_irq_start = 1'b1; + s_is_irq_start = 1'b1; end end @@ -1531,8 +1531,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.init(trace_if); trace_id.m_is_ebreak = trace_if.m_is_ebreak; trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; @@ -1556,8 +1556,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.init(trace_if); trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; end @@ -1598,7 +1598,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; dcsr_to_id(); end - if(r_pipe_freeze_trace.pc_set) begin + if (r_pipe_freeze_trace.pc_set) begin s_is_pc_set = 1'b1; end diff --git a/bhv/cv32e40p_rvfi_trace.sv b/bhv/cv32e40p_rvfi_trace.sv index 6bd798577..6ed4b773e 100644 --- a/bhv/cv32e40p_rvfi_trace.sv +++ b/bhv/cv32e40p_rvfi_trace.sv @@ -181,7 +181,7 @@ instr_trace_t trace_retire; end initial begin - wait (rst_n == 1'b1); + wait(rst_n == 1'b1); $sformat(fn, "trace_core.log"); $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); diff --git a/bhv/cv32e40p_tb_wrapper.sv b/bhv/cv32e40p_tb_wrapper.sv index 6e7c60adb..14de3b607 100644 --- a/bhv/cv32e40p_tb_wrapper.sv +++ b/bhv/cv32e40p_tb_wrapper.sv @@ -86,7 +86,7 @@ module cv32e40p_tb_wrapper input logic [31:0] data_rdata_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -286,7 +286,7 @@ module cv32e40p_tb_wrapper .hwlp_counter_q_i(hwlp_counter_q), .hwlp_counter_n_i(hwlp_counter_n), - .minstret_i (cv32e40p_top_i.core_i.id_stage_i.minstret), + .minstret_i (cv32e40p_top_i.core_i.id_stage_i.minstret), //// EX probes //// .ex_valid_i (cv32e40p_top_i.core_i.ex_valid), .ex_ready_i (cv32e40p_top_i.core_i.ex_ready), diff --git a/bhv/cv32e40p_tracer.sv b/bhv/cv32e40p_tracer.sv index 5a31c24d8..8208f2e61 100644 --- a/bhv/cv32e40p_tracer.sv +++ b/bhv/cv32e40p_tracer.sv @@ -178,7 +178,7 @@ module cv32e40p_tracer end initial begin - wait (rst_n == 1'b1); + wait(rst_n == 1'b1); $sformat(fn, "trace_core_%h.log", hart_id_i); $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); @@ -201,7 +201,7 @@ module cv32e40p_tracer always @(trace_wb) trace_wb_is_delay_instr = (trace_wb != null && is_wb_delay_instr( - trace_wb + trace_wb )) ? 1 : 0; assign rd = {rd_is_fp, instr[11:07]}; @@ -214,8 +214,9 @@ module cv32e40p_tracer foreach (trace.regs_write[i]) if (trace.regs_write[i].addr == reg_addr) begin trace.regs_write[i].value = wdata; - `uvm_info(info_tag, $sformatf("Write mapped %0d, %0d:0x%08x pc:0x%08x", i, reg_addr, wdata, - trace.pc), UVM_DEBUG) + `uvm_info(info_tag, $sformatf( + "Write mapped %0d, %0d:0x%08x pc:0x%08x", i, reg_addr, wdata, trace.pc), + UVM_DEBUG) end else begin `uvm_info(info_tag, $sformatf( "Unmapped write to %0d:0x%08x, expected write to %0d", @@ -251,11 +252,11 @@ module cv32e40p_tracer // Funnel all handoffs to the ISS here, note that this must be automatic // as multiple retire events may occur at a time (wb_bypass) always begin - wait (trace_q.size() != 0); + wait(trace_q.size() != 0); trace_retire = trace_q.pop_front(); - wait (trace_retire.retire != 0); + wait(trace_retire.retire != 0); - if (trace_retire.ebreak) wait (debug_mode == 1); + if (trace_retire.ebreak) wait(debug_mode == 1); // Write signals and data structures used by step-and-compare insn_regs_write = trace_retire.regs_write; From 719fcca7b2fb45d02d7290fd417fb3919ec0f450 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Mon, 19 Jun 2023 11:14:03 +0800 Subject: [PATCH 20/38] Verible crash workaround --- bhv/pipe_freeze_trace.sv | 79 ++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index c73c8b923..220f80261 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -176,6 +176,8 @@ typedef struct { logic irq_wu_ctrl; logic [4:0] irq_id_ctrl; + logic dummy; + struct { //// CSR Probes //// csr_num_e addr; @@ -322,8 +324,6 @@ typedef struct { pipe_trace_t r_pipe_freeze_trace; -event e_pipe_monitor_ok; - // Compute each CSR write enable function compute_csr_we(); r_pipe_freeze_trace.csr.mstatus_we = 1'b0; @@ -357,38 +357,44 @@ endfunction * At negedge we buffer all signals form rtl * The rest of the tracer will work from those buffered signals */ + +function void if_probes(); + r_pipe_freeze_trace.if_valid = if_valid_i; + r_pipe_freeze_trace.if_ready = if_ready_i; + r_pipe_freeze_trace.instr_valid_if = instr_valid_if_i; + r_pipe_freeze_trace.instr_if = instr_if_i; + r_pipe_freeze_trace.pc_if = pc_if_i; + r_pipe_freeze_trace.instr_pmp_err_if = instr_pmp_err_if_i; + + r_pipe_freeze_trace.instr_valid_id = instr_valid_id_i; + r_pipe_freeze_trace.instr_rdata_id = instr_rdata_id_i; + r_pipe_freeze_trace.is_fetch_failed_id = is_fetch_failed_id_i; + r_pipe_freeze_trace.instr_req_int = instr_req_int_i; + r_pipe_freeze_trace.clear_instr_valid = clear_instr_valid_i; +endfunction + +event e_pipe_monitor_ok; + task monitor_pipeline(); $display("*****Starting pipeline monitoring*****\n"); forever begin - wait (clk_i_d == 1'b0 & rst_ni == 1'b1); - // r_pipe_freeze_trace. <= ; - - r_pipe_freeze_trace.instr_req = instr_req_i; - r_pipe_freeze_trace.instr_grant = instr_grant_i; - r_pipe_freeze_trace.instr_rvalid = instr_rvalid_i; - r_pipe_freeze_trace.is_decoding = is_decoding_i; - r_pipe_freeze_trace.is_illegal = is_illegal_i; - r_pipe_freeze_trace.trigger_match = trigger_match_i; - r_pipe_freeze_trace.data_misaligned = data_misaligned_i; - r_pipe_freeze_trace.lsu_data_we_ex = lsu_data_we_ex_i; - - r_pipe_freeze_trace.debug_mode = debug_mode_i; - r_pipe_freeze_trace.debug_cause = debug_cause_i; - r_pipe_freeze_trace.prefetch_req = prefetch_req_i; - r_pipe_freeze_trace.pc_set = pc_set_i; + wait(clk_i_d == 1'b0 & rst_ni == 1'b1); + + r_pipe_freeze_trace.instr_req = instr_req_i; + r_pipe_freeze_trace.instr_grant = instr_grant_i; + r_pipe_freeze_trace.instr_rvalid = instr_rvalid_i; + r_pipe_freeze_trace.is_decoding = is_decoding_i; + r_pipe_freeze_trace.is_illegal = is_illegal_i; + r_pipe_freeze_trace.trigger_match = trigger_match_i; + r_pipe_freeze_trace.data_misaligned = data_misaligned_i; + r_pipe_freeze_trace.lsu_data_we_ex = lsu_data_we_ex_i; + + r_pipe_freeze_trace.debug_mode = debug_mode_i; + r_pipe_freeze_trace.debug_cause = debug_cause_i; + r_pipe_freeze_trace.prefetch_req = prefetch_req_i; + r_pipe_freeze_trace.pc_set = pc_set_i; //// IF probes //// - r_pipe_freeze_trace.if_valid = if_valid_i; - r_pipe_freeze_trace.if_ready = if_ready_i; - r_pipe_freeze_trace.instr_valid_if = instr_valid_if_i; - r_pipe_freeze_trace.instr_if = instr_if_i; - r_pipe_freeze_trace.pc_if = pc_if_i; - r_pipe_freeze_trace.instr_pmp_err_if = instr_pmp_err_if_i; - - r_pipe_freeze_trace.instr_valid_id = instr_valid_id_i; - r_pipe_freeze_trace.instr_rdata_id = instr_rdata_id_i; - r_pipe_freeze_trace.is_fetch_failed_id = is_fetch_failed_id_i; - r_pipe_freeze_trace.instr_req_int = instr_req_int_i; - r_pipe_freeze_trace.clear_instr_valid = clear_instr_valid_i; + if_probes(); //// ID probes //// r_pipe_freeze_trace.pc_id = pc_id_i; r_pipe_freeze_trace.id_valid = id_valid_i; @@ -663,6 +669,11 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.fcsr_n = {24'b0, csr_fcsr_frm_n_i, csr_fcsr_fflags_n_i}; r_pipe_freeze_trace.csr.fcsr_q = {24'b0, csr_fcsr_frm_q_i, csr_fcsr_fflags_q_i}; + r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i; + r_pipe_freeze_trace.hwloop.end_q = hwlp_end_q_i; + r_pipe_freeze_trace.hwloop.counter_q = hwlp_counter_q_i; + r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; + compute_csr_we(); if (csr_fcsr_fflags_we_i) begin r_pipe_freeze_trace.csr.fflags_we = 1'b1; @@ -670,12 +681,8 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.mstatus_we = 1'b1; end - r_pipe_freeze_trace.hwloop.start_q = hwlp_start_q_i; - r_pipe_freeze_trace.hwloop.end_q = hwlp_end_q_i; - r_pipe_freeze_trace.hwloop.counter_q = hwlp_counter_q_i; - r_pipe_freeze_trace.hwloop.counter_n = hwlp_counter_n_i; - ->e_pipe_monitor_ok; - wait (clk_i_d == 1'b1); + + wait(clk_i_d == 1'b1); end endtask From 8ae9c1238a95df19c45de170b9dca8f19df042db Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Tue, 20 Jun 2023 16:25:34 +0800 Subject: [PATCH 21/38] Correcting rvfi issue when lsu and apu resp at the same time --- bhv/cv32e40p_rvfi.sv | 51 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 457fbe672..bab3f2f50 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -1163,6 +1163,18 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end endfunction + insn_trace_t lsu_trace_q[$]; + insn_trace_t trace_lsu_req, trace_lsu_resp; + bit s_is_misaligned_resp; + + // function void lsu_resp(); + // if(s_is_misaligned_resp) begin + + // end else if(trace_lsu_resp.size() > 0) begin + // trace_lsu_resp = lsu_trace_q.pop_front(); + // end + // endfunction + task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; @@ -1178,6 +1190,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes bit s_is_irq_start; + bit s_skip_wb; // used to skip wb monitoring when apu resp and not lsu trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1205,6 +1218,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; + s_skip_wb = 1'b0; $display("*****Starting pipeline computing*****\n"); forever begin @@ -1300,9 +1314,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end //WB_STAGE + s_skip_wb = 1'b0; if (r_pipe_freeze_trace.apu_rvalid && (apu_trace_q.size() > 0)) begin apu_resp(); - end else if (trace_wb.m_valid) begin + if(!r_pipe_freeze_trace.data_rvalid) begin + s_skip_wb = 1'b1; + end + // end else if (r_pipe_freeze_trace.data_rvalid && (lsu_trace_q.size() > 0)) begin + // lsu_resp(); + end + if (trace_wb.m_valid && !s_skip_wb) begin if (r_pipe_freeze_trace.rf_we_wb) begin if((trace_wb.m_rd_addr[0] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0])) begin trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; @@ -1339,15 +1360,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_ex.m_valid) begin `CSR_FROM_PIPE(ex, misa) - // `CSR_FROM_PIPE(ex, mip) `CSR_FROM_PIPE(ex, tdata1) tinfo_to_ex(); - // `CSR_FROM_PIPE(ex, fflags) - // `CSR_FROM_PIPE(ex, frm) - // `CSR_FROM_PIPE(ex, fcsr) - // trace_ex.m_csr.fflags_wmask = '0; - // trace_ex.m_csr.frm_wmask = '0; - // trace_ex.m_csr.fcsr_wmask = '0; if (s_wb_valid_adjusted) begin if (trace_wb.m_valid) begin @@ -1388,15 +1402,13 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end - s_ex_valid_adjusted = r_pipe_freeze_trace.ex_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) && !r_pipe_freeze_trace.apu_rvalid; + s_ex_valid_adjusted = r_pipe_freeze_trace.ex_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); //EX_STAGE if (trace_id.m_valid) begin mtvec_to_id(); - // if(s_is_pc_set) begin `CSR_FROM_PIPE(id, mip) - // end if (!csr_is_irq && !s_is_irq_start) begin mstatus_to_id(); @@ -1413,6 +1425,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, fflags) `CSR_FROM_PIPE(id, frm) `CSR_FROM_PIPE(id, fcsr) + + if (s_fflags_we_non_apu) begin trace_id.m_fflags_we_non_apu = 1'b1; end @@ -1569,22 +1583,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_if.m_is_ebreak = '0; trace_if.m_trap = 1'b0; - - //check if interrupt - if (pc_mux_interrupt || pc_mux_nmi || pc_mux_exception) begin - trace_if.m_intr.intr = 1'b1; - trace_if.m_intr.interrupt = pc_mux_interrupt || pc_mux_nmi; - trace_if.m_intr.exception = pc_mux_exception; - // trace_if.m_intr.cause = r_pipe_freeze_trace.ctrl_fsm_cs.csr_cause.exception_code; - end else if (pc_mux_debug) begin - - end else begin - trace_if.m_intr.intr = '0; - trace_if.m_intr.interrupt = '0; - trace_if.m_intr.exception = '0; - end trace_if.m_valid = 1'b1; - end if (csr_is_irq && !s_is_pc_set) begin From 1cfb2e1d09996c7bd818cdc6965ed7b26b6f4ab3 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Thu, 22 Jun 2023 18:16:38 +0800 Subject: [PATCH 22/38] RVFI - recreating instret counter internally for trap --- bhv/cv32e40p_rvfi.sv | 28 ++++++++++++++++++++++++---- bhv/insn_trace.sv | 5 +++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index bab3f2f50..910759792 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -748,11 +748,12 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; //Trying something here //Flag as trap everytime minstret is not incremented - if (new_rvfi_trace.m_csr.minstret_rdata == r_previous_minstret) begin + if (new_rvfi_trace.m_instret_cnt == r_previous_minstret) begin + // new_rvfi_trace.m_trap = 1'b0; new_rvfi_trace.m_trap = 1'b1; end else begin - r_previous_minstret = new_rvfi_trace.m_csr.minstret_rdata; - new_rvfi_trace.m_trap = 1'b1; + r_previous_minstret = new_rvfi_trace.m_instret_cnt; + new_rvfi_trace.m_trap = 1'b0; end rvfi_rs1_addr = '0; @@ -892,12 +893,14 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction + int r_instret_cnt; function void minstret_to_id(); trace_id.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; trace_id.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; trace_id.m_csr.minstret_rmask = '1; trace_id.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; trace_id.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + trace_id.m_instret_cnt = r_instret_cnt; endfunction function void minstret_to_ex(); @@ -906,6 +909,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_csr.minstret_rmask = '1; trace_ex.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; trace_ex.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + trace_ex.m_instret_cnt = r_instret_cnt; endfunction function void tinfo_to_id(); @@ -1191,6 +1195,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_is_irq_start; bit s_skip_wb; // used to skip wb monitoring when apu resp and not lsu + bit s_increase_instret_1; + bit s_increase_instret_2; + trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1211,7 +1218,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; is_dbg_taken = '0; s_was_flush = 1'b0; - r_previous_minstret = '0; + r_previous_minstret = -1; s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; @@ -1220,6 +1227,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_is_irq_start = 1'b0; s_skip_wb = 1'b0; + r_instret_cnt = 0; + s_increase_instret_1 = 1'b0; + s_increase_instret_2 = 1'b0; + $display("*****Starting pipeline computing*****\n"); forever begin wait(e_pipe_monitor_ok.triggered); @@ -1227,6 +1238,12 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; check_trap(); + if(s_increase_instret_2) begin + r_instret_cnt = r_instret_cnt + 1; + end + s_increase_instret_2 = s_increase_instret_1; + s_increase_instret_1 = r_pipe_freeze_trace.minstret; + pc_mux_interrupt = 1'b0; if (r_pipe_freeze_trace.pc_mux == 4'b0100) begin if (r_pipe_freeze_trace.exc_pc_mux == 3'b001) begin @@ -1556,6 +1573,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, dscratch1) mstatus_to_id(); + // trace_id.m_instret_cnt = r_instret_cnt; + end else begin if (trace_id.m_valid) begin `CSR_FROM_PIPE(id, dscratch0) @@ -1574,6 +1593,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; + // trace_id.m_instret_cnt = r_instret_cnt; end trace_if.m_insn = r_pipe_freeze_trace.instr_if; //Instr comes from if, buffer for one cycle diff --git a/bhv/insn_trace.sv b/bhv/insn_trace.sv index 06ae75d75..cb766fc7e 100644 --- a/bhv/insn_trace.sv +++ b/bhv/insn_trace.sv @@ -54,6 +54,8 @@ bit m_move_down_pipe; + int m_instret_cnt; + struct { logic [31:0] addr ; logic [ 3:0] rmask; @@ -146,6 +148,7 @@ this.m_trap = 1'b0; this.m_fflags_we_non_apu = 1'b0; this.m_frm_we_non_apu = 1'b0; + this.m_instret_cnt = 0; endfunction /* @@ -174,6 +177,7 @@ this.m_got_ex_reg = 1'b0; this.m_got_regs_write = 1'b0; this.m_move_down_pipe = 1'b0; + this.m_instret_cnt = 0; this.m_rd_addr[0] = '0; this.m_rd_addr[1] = '0; this.m_2_rd_insn = 1'b0; @@ -237,6 +241,7 @@ this.m_is_ebreak = m_source.m_is_ebreak; this.m_is_illegal = m_source.m_is_illegal; this.m_is_irq = m_source.m_is_irq; + this.m_instret_cnt = m_source.m_instret_cnt; this.m_rs1_addr = m_source.m_rs1_addr; this.m_rs2_addr = m_source.m_rs2_addr; this.m_rs1_rdata = m_source.m_rs1_rdata; From 7ad07c66aa60f4aead6cc8c12d3cc3658d1e2fdd Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Thu, 22 Jun 2023 18:17:21 +0800 Subject: [PATCH 23/38] Running verible --- bhv/cv32e40p_rvfi.sv | 58 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 910759792..63d48bde0 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -1194,40 +1194,40 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_is_pc_set; //If pc_set, wait until next trace_id to commit csr changes bit s_is_irq_start; - bit s_skip_wb; // used to skip wb monitoring when apu resp and not lsu + bit s_skip_wb; // used to skip wb monitoring when apu resp and not lsu bit s_increase_instret_1; bit s_increase_instret_2; - trace_if = new(); - trace_id = new(); - trace_ex = new(); - trace_wb = new(); - s_new_valid_insn = 1'b0; - s_ex_valid_adjusted = 1'b0; + trace_if = new(); + trace_id = new(); + trace_ex = new(); + trace_wb = new(); + s_new_valid_insn = 1'b0; + s_ex_valid_adjusted = 1'b0; - s_id_done = 1'b0; - s_apu_wb_ok = 1'b0; - s_apu_0_cycle_reps = 1'b0; + s_id_done = 1'b0; + s_apu_wb_ok = 1'b0; + s_apu_0_cycle_reps = 1'b0; - next_send = 1; - cnt_data_req = 0; - cnt_data_resp = 0; - cnt_apu_req = 0; - cnt_apu_resp = 0; - csr_is_irq = '0; - is_dbg_taken = '0; - s_was_flush = 1'b0; + next_send = 1; + cnt_data_req = 0; + cnt_data_resp = 0; + cnt_apu_req = 0; + cnt_apu_resp = 0; + csr_is_irq = '0; + is_dbg_taken = '0; + s_was_flush = 1'b0; - r_previous_minstret = -1; + r_previous_minstret = -1; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; - s_skip_wb = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; + s_skip_wb = 1'b0; - r_instret_cnt = 0; + r_instret_cnt = 0; s_increase_instret_1 = 1'b0; s_increase_instret_2 = 1'b0; @@ -1238,7 +1238,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; check_trap(); - if(s_increase_instret_2) begin + if (s_increase_instret_2) begin r_instret_cnt = r_instret_cnt + 1; end s_increase_instret_2 = s_increase_instret_1; @@ -1334,11 +1334,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_skip_wb = 1'b0; if (r_pipe_freeze_trace.apu_rvalid && (apu_trace_q.size() > 0)) begin apu_resp(); - if(!r_pipe_freeze_trace.data_rvalid) begin + if (!r_pipe_freeze_trace.data_rvalid) begin s_skip_wb = 1'b1; end - // end else if (r_pipe_freeze_trace.data_rvalid && (lsu_trace_q.size() > 0)) begin - // lsu_resp(); + // end else if (r_pipe_freeze_trace.data_rvalid && (lsu_trace_q.size() > 0)) begin + // lsu_resp(); end if (trace_wb.m_valid && !s_skip_wb) begin if (r_pipe_freeze_trace.rf_we_wb) begin From 7f3285b110507d193ea780d402ddee6c97adf5de Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Mon, 19 Jun 2023 16:46:18 +0200 Subject: [PATCH 24/38] CVFPU 0.8.0 vendorized Signed-off-by: Pascal Gouedo --- rtl/vendor/pulp_platform_fpnew.lock.hjson | 2 +- rtl/vendor/pulp_platform_fpnew.vendor.hjson | 7 +- rtl/vendor/pulp_platform_fpnew/LICENSE.apache | 201 +++++ .../{LICENSE => LICENSE.solderpad} | 0 .../pulp_platform_fpnew/README.license.md | 5 + .../src/fpnew_cast_multi.sv | 55 +- .../src/fpnew_divsqrt_multi.sv | 101 ++- .../src/fpnew_divsqrt_th_32.sv | 479 ++++++++++ .../pulp_platform_fpnew/src/fpnew_fma.sv | 40 +- .../src/fpnew_fma_multi.sv | 38 +- .../pulp_platform_fpnew/src/fpnew_noncomp.sv | 22 +- .../src/fpnew_opgroup_block.sv | 24 +- .../src/fpnew_opgroup_fmt_slice.sv | 47 +- .../src/fpnew_opgroup_multifmt_slice.sv | 182 +++- .../pulp_platform_fpnew/src/fpnew_pkg.sv | 1 + .../pulp_platform_fpnew/src/fpnew_rounding.sv | 2 + .../pulp_platform_fpnew/src/fpnew_top.sv | 16 +- .../gen_rtl/clk/rtl/gated_clk_cell.v | 50 ++ .../gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v | 783 +++++++++++++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_ff1.v | 163 ++++ .../gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v | 275 ++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_prepare.v | 286 ++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_round_single.v | 540 ++++++++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_special.v | 345 ++++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v | 824 ++++++++++++++++++ .../gen_rtl/fdsu/rtl/pa_fdsu_top.v | 461 ++++++++++ .../gen_rtl/fpu/rtl/pa_fpu_dp.v | 299 +++++++ .../gen_rtl/fpu/rtl/pa_fpu_frbus.v | 90 ++ .../gen_rtl/fpu/rtl/pa_fpu_src_type.v | 92 ++ .../vendor/opene906/LICENSE | 201 +++++ 30 files changed, 5499 insertions(+), 132 deletions(-) create mode 100644 rtl/vendor/pulp_platform_fpnew/LICENSE.apache rename rtl/vendor/pulp_platform_fpnew/{LICENSE => LICENSE.solderpad} (100%) create mode 100644 rtl/vendor/pulp_platform_fpnew/README.license.md create mode 100644 rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_th_32.sv create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ff1.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_prepare.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_round_single.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_special.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_top.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v create mode 100644 rtl/vendor/pulp_platform_fpnew/vendor/opene906/LICENSE diff --git a/rtl/vendor/pulp_platform_fpnew.lock.hjson b/rtl/vendor/pulp_platform_fpnew.lock.hjson index b02a284b6..e150bcb2f 100644 --- a/rtl/vendor/pulp_platform_fpnew.lock.hjson +++ b/rtl/vendor/pulp_platform_fpnew.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/pulp-platform/fpnew.git - rev: 0fc3620978a500303ce94811eec7839e427dc995 + rev: 11659d7ff3580ac3226c6d56a90ef717cdc530e3 } } diff --git a/rtl/vendor/pulp_platform_fpnew.vendor.hjson b/rtl/vendor/pulp_platform_fpnew.vendor.hjson index 2b0576cd2..e76745d51 100644 --- a/rtl/vendor/pulp_platform_fpnew.vendor.hjson +++ b/rtl/vendor/pulp_platform_fpnew.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/pulp-platform/fpnew.git", - rev: "0fc3620978a500303ce94811eec7839e427dc995", + rev: "11659d7ff3580ac3226c6d56a90ef717cdc530e3", }, exclude_from_upstream: [ @@ -22,5 +22,10 @@ "ips_list.yml", "src_files.yml" "docs" + "util" + "vendor/opene906.lock.hjson" + "vendor/opene906.vendor.hjson" + "vendor/opene906/README.md" + "vendor/patches" ] } diff --git a/rtl/vendor/pulp_platform_fpnew/LICENSE.apache b/rtl/vendor/pulp_platform_fpnew/LICENSE.apache new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/LICENSE.apache @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/rtl/vendor/pulp_platform_fpnew/LICENSE b/rtl/vendor/pulp_platform_fpnew/LICENSE.solderpad similarity index 100% rename from rtl/vendor/pulp_platform_fpnew/LICENSE rename to rtl/vendor/pulp_platform_fpnew/LICENSE.solderpad diff --git a/rtl/vendor/pulp_platform_fpnew/README.license.md b/rtl/vendor/pulp_platform_fpnew/README.license.md new file mode 100644 index 000000000..ebbb64d33 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/README.license.md @@ -0,0 +1,5 @@ +# Licensing + +FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [SolderPad license file](LICENSE.solderpad) for further information. + +The T-Head E906 DivSqrt unit, integrated into FPnew in [`vendor/opene906`](vendor/opene906), is reseased under the *Apache License, Version 2.0*. Please refer to the [Apache 2.0 license file](LICENSE.apache) for further information. diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv index f6b748add..964ef7429 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv @@ -26,7 +26,8 @@ module fpnew_cast_multi #( // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -40,6 +41,7 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -50,12 +52,15 @@ module fpnew_cast_multi #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); // ---------- @@ -116,6 +121,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -131,6 +137,7 @@ module fpnew_cast_multi #( assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -146,7 +153,7 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -157,6 +164,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -330,6 +338,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -350,6 +359,7 @@ module fpnew_cast_multi #( assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -366,7 +376,7 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -382,6 +392,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -491,6 +502,7 @@ module fpnew_cast_multi #( logic [NUM_FORMATS-1:0] fmt_uf_after_round; logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format + logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round; logic rounded_sign; logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding @@ -575,14 +587,33 @@ module fpnew_cast_multi #( end end - // Classification after rounding select by destination format - assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; - assign of_after_round = fmt_of_after_round[dst_fmt_q2]; - // Negative integer result needs to be brought into two's complement assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; assign rounded_int_res_zero = (rounded_int_res == '0); + // Detect integer overflows after rounding (only positives) + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : detect_overflow + ifmt_of_after_round[ifmt] = 1'b0; + // Int result can overflow if we're at the max exponent + if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin + // Check whether the rounded MSB differs from unrounded MSB + ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2]; + end + end + end else begin : inactive_format + assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2]; + // ------------------------- // FP Special case handling // ------------------------- @@ -666,7 +697,7 @@ module fpnew_cast_multi #( // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) assign int_result_is_special = info_q.is_nan | info_q.is_inf | - of_before_round | ~info_q.is_boxed | + of_before_round | of_after_round | ~info_q.is_boxed | (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); // All integer special cases are invalid @@ -716,6 +747,7 @@ module fpnew_cast_multi #( fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -726,6 +758,7 @@ module fpnew_cast_multi #( assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -741,12 +774,13 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -756,6 +790,7 @@ module fpnew_cast_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv index 0db085a8e..a8b004952 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_multi.sv @@ -24,7 +24,8 @@ module fpnew_divsqrt_multi #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -35,22 +36,31 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, + input logic vectorial_op_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, + output logic divsqrt_done_o, + input logic simd_synch_done_i, + output logic divsqrt_ready_o, + input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); // ---------- @@ -84,7 +94,9 @@ module fpnew_divsqrt_multi #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages logic [0:NUM_INP_REGS] inp_pipe_ready; @@ -95,9 +107,11 @@ module fpnew_divsqrt_multi #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_vec_op_q[0] = vectorial_op_i; assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry + // Input stage: Propagate pipeline ready signal to upstream circuitry assign in_ready_o = inp_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline @@ -110,14 +124,16 @@ module fpnew_divsqrt_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -154,33 +170,58 @@ module fpnew_divsqrt_multi #( // ------------ // Control FSM // ------------ + logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance + logic unit_ready, unit_done, unit_done_q; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid logic unit_busy; // valid data in flight + logic simd_synch_done; // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; - // Upstream ready comes from sanitization FSM - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; - // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; assign op_starting = div_valid | sqrt_valid; + // Hold additional information while the operation is in progress + logic result_is_fp8_q; + TagType result_tag_q; + logic result_mask_q; + AuxType result_aux_q; + logic result_vec_op_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) + + // Wait for other lanes only if the operation is vectorial + assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; + + // Valid synch with other lanes + // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes + // As soon as all the lanes are over, we can clear this FF and start with a new operation + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + // Tell the other units that this unit has finished now or in the past + assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; + + // Ready synch with other lanes + // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes + assign divsqrt_ready_o = in_ready; + // Upstream ready comes from sanitization FSM, and it is synched among all the lanes + assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm // Default assignments in_ready = 1'b0; out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; unit_busy = 1'b0; state_d = state_q; @@ -195,19 +236,18 @@ module fpnew_divsqrt_multi #( // Operation in progress BUSY: begin unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin + // If all the lanes are done with processing + if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin state_d = IDLE; // we anticipate going back to idling.. + in_ready = 1'b1; // we acknowledge the instruction if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // we acknowledge the instruction state_d = BUSY; // and stay busy with it end // Otherwise if downstream is not ready for the result end else begin - hold_result = 1'b1; // activate the hold register state_d = HOLD; // wait for the pipeline to take the data end end @@ -215,7 +255,6 @@ module fpnew_divsqrt_multi #( // Waiting with valid result for downstream HOLD: begin unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid out_valid = 1'b1; // try to commit result downstream // If the result is accepted by downstream if (out_ready) begin @@ -241,22 +280,13 @@ module fpnew_divsqrt_multi #( // FSM status register (asynch active low reset) `FF(state_q, state_d, IDLE) - // Hold additional information while the operation is in progress - logic result_is_fp8_q; - TagType result_tag_q; - AuxType result_aux_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - // ----------------- // DIVSQRT instance // ----------------- logic [63:0] unit_result; logic [WIDTH-1:0] adjusted_result, held_result_q; fpnew_pkg::status_t unit_status, held_status_q; + logic hold_en; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -278,9 +308,12 @@ module fpnew_divsqrt_multi #( // Adjust result width and fix FP8 assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + // Hold the result when one lane has finished execution, except when all the lanes finish together, + // or the operation is not vectorial, and the result can be accepted downstream + assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) + `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) + `FFLNR(held_status_q, unit_status, hold_en, clk_i) // -------------- // Output Select @@ -288,8 +321,8 @@ module fpnew_divsqrt_multi #( logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : adjusted_result; - assign status_d = data_is_held ? held_status_q : unit_status; + assign result_d = unit_done_q ? held_result_q : adjusted_result; + assign status_d = unit_done_q ? held_status_q : unit_status; // ---------------- // Output Pipeline @@ -298,6 +331,7 @@ module fpnew_divsqrt_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -307,6 +341,7 @@ module fpnew_divsqrt_multi #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; assign out_pipe_valid_q[0] = out_valid; // Input stage: Propagate pipeline ready signal to inside pipe @@ -322,11 +357,12 @@ module fpnew_divsqrt_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -336,6 +372,7 @@ module fpnew_divsqrt_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_th_32.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_th_32.sv new file mode 100644 index 000000000..8ddb80e9e --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_th_32.sv @@ -0,0 +1,479 @@ +// Copyright 2019-2022 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Authors: Stefan Mach +// Luca Bertaccini +// Jiang Lannan +// Kexin Li + +`include "common_cells/registers.svh" + +module fpnew_divsqrt_th_32 #( + // FP32-only DivSqrt + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = 32, + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i +); + + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; + + // ------------ + // Control FSM + // ------------ + logic in_ready; // input handshake with upstream + logic div_op, sqrt_op; // input signalling with unit + logic unit_ready_q, unit_done; // status signals from unit instance + logic op_starting; // high in the cycle a new operation starts + logic out_valid, out_ready; // output handshake with downstream + logic hold_result; // whether to put result into hold register + logic data_is_held; // data in hold register is valid + logic unit_busy; // valid data in flight + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Operations are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. + assign div_op = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; //in_ready delete, valid independent of ready + assign sqrt_op = in_valid_q & (op_q == fpnew_pkg::SQRT) & in_ready & ~flush_i; + assign op_starting = div_op | sqrt_op; //start computing or handshake, modify tb handshake right + + logic fdsu_fpu_ex1_stall, fdsu_fpu_ex1_stall_q; + logic div_op_d, div_op_q; + logic sqrt_op_d, sqrt_op_q; + + assign div_op_d = (fdsu_fpu_ex1_stall) ? div_op : 1'b0; + assign sqrt_op_d = (fdsu_fpu_ex1_stall) ? sqrt_op : 1'b0; + + `FFL(fdsu_fpu_ex1_stall_q, fdsu_fpu_ex1_stall, 1'b1, '0) + `FFL(div_op_q, div_op_d, 1'b1, '0) + `FFL(sqrt_op_q, sqrt_op_d, 1'b1, '0) + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + in_ready = 1'b0; + out_valid = 1'b0; + hold_result = 1'b0; + data_is_held = 1'b0; + unit_busy = 1'b0; + state_d = state_q; + inp_pipe_ready[NUM_INP_REGS] = unit_ready_q; + + unique case (state_q) + // Waiting for work + IDLE: begin + // in_ready = 1'b1; // we're ready + in_ready = unit_ready_q; //*** + if (in_valid_q && unit_ready_q) begin // New work arrives + inp_pipe_ready[NUM_INP_REGS] = unit_ready_q && !fdsu_fpu_ex1_stall; + state_d = BUSY; // go into processing state + end + end + // Operation in progress + BUSY: begin + inp_pipe_ready[NUM_INP_REGS] = fdsu_fpu_ex1_stall_q; + unit_busy = 1'b1; // data in flight + // If the unit is done with processing + if (unit_done) begin + out_valid = 1'b1; // try to commit result downstream + // If downstream accepts our result + if (out_ready) begin + state_d = IDLE; // we anticipate going back to idling.. + if (in_valid_q && unit_ready_q) begin // ..unless new work comes in + in_ready = 1'b1; // we acknowledge the instruction + state_d = BUSY; // and stay busy with it + end + // Otherwise if downstream is not ready for the result + end else begin + hold_result = 1'b1; // activate the hold register + state_d = HOLD; // wait for the pipeline to take the data + end + end + end + // Waiting with valid result for downstream + HOLD: begin + unit_busy = 1'b1; // data in flight + data_is_held = 1'b1; // data in hold register is valid + out_valid = 1'b1; // try to commit result downstream + // If the result is accepted by downstream + if (out_ready) begin + state_d = IDLE; // go back to idle.. + if (in_valid_q && unit_ready_q) begin // ..unless new work comes in + in_ready = 1'b1; // acknowledge the new transaction + state_d = BUSY; // will be busy with the next instruction + end + end + end + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + unit_busy = 1'b0; // data is invalidated + out_valid = 1'b0; // cancel any valid data + state_d = IDLE; // go to default state + end + end + + // FSM status register (asynch active low reset) + `FF(state_q, state_d, IDLE) + + // Hold additional information while the operation is in progress + TagType result_tag_q; + AuxType result_aux_q; + logic result_mask_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + + // ----------------- + // DIVSQRT instance + // ----------------- + logic [WIDTH-1:0] unit_result, held_result_q; + fpnew_pkg::status_t unit_status, held_status_q; + + // thead define fdsu module's input and output + logic ctrl_fdsu_ex1_sel; + logic fdsu_fpu_ex1_cmplt; + logic [4:0] fdsu_fpu_ex1_fflags; + logic [7:0] fdsu_fpu_ex1_special_sel; + logic [3:0] fdsu_fpu_ex1_special_sign; + logic fdsu_fpu_no_op; + logic [2:0] idu_fpu_ex1_eu_sel; + logic [31:0] fdsu_frbus_data; + logic [4:0] fdsu_frbus_fflags; + logic fdsu_frbus_wb_vld; + + // dp + logic [31:0] dp_frbus_ex2_data; + logic [4:0] dp_frbus_ex2_fflags; + logic [2:0] dp_xx_ex1_cnan; + logic [2:0] dp_xx_ex1_id; + logic [2:0] dp_xx_ex1_inf; + logic [2:0] dp_xx_ex1_norm; + logic [2:0] dp_xx_ex1_qnan; + logic [2:0] dp_xx_ex1_snan; + logic [2:0] dp_xx_ex1_zero; + logic ex2_inst_wb; + logic ex2_inst_wb_vld_d, ex2_inst_wb_vld_q; + + // frbus + logic [31:0] fpu_idu_fwd_data; + logic [4:0] fpu_idu_fwd_fflags; + logic fpu_idu_fwd_vld; + + logic unit_ready_d; + + // unit_ready_q related to state machine, different under special and normal cases. + always_comb begin + if(op_starting && unit_ready_q) begin + if(ex2_inst_wb && ex2_inst_wb_vld_q) begin + unit_ready_d = 1'b1; + end else begin + unit_ready_d = 1'b0; + end + end else if(fpu_idu_fwd_vld | flush_i) begin + unit_ready_d = 1'b1; + end else begin + unit_ready_d = unit_ready_q; + end + end + + `FFL(unit_ready_q, unit_ready_d, 1'b1, 1'b1) + + // determine input of time to select operands + always_comb begin + ctrl_fdsu_ex1_sel = 1'b0; + idu_fpu_ex1_eu_sel = 3'h0; + if (op_starting) begin // time to start calculation + ctrl_fdsu_ex1_sel = 1'b1; // time to select operands + idu_fpu_ex1_eu_sel = 3'h4; // time to select operands, only idu_fpu_ex1_eu_sel_i[2] works in fdsu module + end else if (fdsu_fpu_ex1_stall_q) begin + ctrl_fdsu_ex1_sel = 1'b1; // time to select operands + idu_fpu_ex1_eu_sel = 3'h4; // time to select operands, only idu_fpu_ex1_eu_sel_i[2] works in fdsu module + end else begin + ctrl_fdsu_ex1_sel = 1'b0; + idu_fpu_ex1_eu_sel = 3'h0; + end + end + + pa_fdsu_top i_divsqrt_thead ( + .cp0_fpu_icg_en ( 1'b0 ), // input clock gate enable in gated_clk_cell, active 0. + .cp0_fpu_xx_dqnan ( 1'b0 ), // When dqnan = 0, QNAN (0x7fc00000). + .cp0_yy_clk_en ( 1'b1 ), // clock enable in gated_clk_cell, active 1. + .cpurst_b ( rst_ni ), // If negedge cpu reset, all state machines reset to IDLE. + .ctrl_fdsu_ex1_sel ( ctrl_fdsu_ex1_sel ), // select operands + .ctrl_xx_ex1_cmplt_dp ( ctrl_fdsu_ex1_sel ), // complete datapath + .ctrl_xx_ex1_inst_vld ( ctrl_fdsu_ex1_sel ), // instance valid + .ctrl_xx_ex1_stall ( fdsu_fpu_ex1_stall ), + .ctrl_xx_ex1_warm_up ( 1'b0 ), + .ctrl_xx_ex2_warm_up ( 1'b0 ), + .ctrl_xx_ex3_warm_up ( 1'b0 ), + .dp_xx_ex1_cnan ( dp_xx_ex1_cnan ), // Special input type determination + .dp_xx_ex1_id ( dp_xx_ex1_id ), + .dp_xx_ex1_inf ( dp_xx_ex1_inf ), + .dp_xx_ex1_qnan ( dp_xx_ex1_qnan ), + .dp_xx_ex1_rm ( rnd_mode_q ), // rounding mode + .dp_xx_ex1_snan ( dp_xx_ex1_snan ), + .dp_xx_ex1_zero ( dp_xx_ex1_zero ), + .fdsu_fpu_debug_info ( ), // output, not used + .fdsu_fpu_ex1_cmplt ( fdsu_fpu_ex1_cmplt ), // output, ctrl_xx_ex1_cmplt_dp && idu_fpu_ex1_eu_sel_i[2] + .fdsu_fpu_ex1_cmplt_dp ( ), // output, not used + .fdsu_fpu_ex1_fflags ( fdsu_fpu_ex1_fflags ), // output, special case fflags + .fdsu_fpu_ex1_special_sel ( fdsu_fpu_ex1_special_sel ), // output, special case type selection + .fdsu_fpu_ex1_special_sign ( fdsu_fpu_ex1_special_sign ), // output, special case sign determination + .fdsu_fpu_ex1_stall ( fdsu_fpu_ex1_stall ), // output, determine whether stall in ex1 + .fdsu_fpu_no_op ( fdsu_fpu_no_op ), // output, if Write Back SM and fdsu SM no operation, fdsu_fpu_no_op = 1; Otherwise if busy, fdsu_fpu_no_op = 0. (not used) + .fdsu_frbus_data ( fdsu_frbus_data ), // output, normal case result + .fdsu_frbus_fflags ( fdsu_frbus_fflags ), // output, normal case fflags + .fdsu_frbus_freg ( ), // output, determined by input idu_fpu_ex1_dst_freg + .fdsu_frbus_wb_vld ( fdsu_frbus_wb_vld ), // output, determine whether write back valid + .forever_cpuclk ( clk_i ), + .frbus_fdsu_wb_grant ( fdsu_frbus_wb_vld ), // input is fdsu_frbus_wb_vld + .idu_fpu_ex1_dst_freg ( 5'h0f ), // register index to write back (not used) + .idu_fpu_ex1_eu_sel ( idu_fpu_ex1_eu_sel ), // time to select operands + .idu_fpu_ex1_func ( {8'b0, div_op | div_op_q, sqrt_op | sqrt_op_q} ), + .idu_fpu_ex1_srcf0 ( operands_q[0][31:0] ), // the first operand + .idu_fpu_ex1_srcf1 ( operands_q[1][31:0] ), // the second operand + .pad_yy_icg_scan_en ( 1'b0 ), // input of core_top, set to 1'b0 from the beginning to end + .rtu_xx_ex1_cancel ( 1'b0 ), + .rtu_xx_ex2_cancel ( 1'b0 ), + .rtu_yy_xx_async_flush ( flush_i ), + .rtu_yy_xx_flush ( 1'b0 ) + ); + + pa_fpu_dp x_pa_fpu_dp ( + .cp0_fpu_icg_en ( 1'b0 ), + .cp0_fpu_xx_rm ( rnd_mode_q ), + .cp0_yy_clk_en ( 1'b1 ), + .ctrl_xx_ex1_inst_vld ( ctrl_fdsu_ex1_sel ), + .ctrl_xx_ex1_stall ( 1'b0 ), + .ctrl_xx_ex1_warm_up ( 1'b0 ), + .dp_frbus_ex2_data ( dp_frbus_ex2_data ), // output + .dp_frbus_ex2_fflags ( dp_frbus_ex2_fflags ), // output + .dp_xx_ex1_cnan ( dp_xx_ex1_cnan ), // output + .dp_xx_ex1_id ( dp_xx_ex1_id ), // output + .dp_xx_ex1_inf ( dp_xx_ex1_inf ), // output + .dp_xx_ex1_norm ( dp_xx_ex1_norm ), // output + .dp_xx_ex1_qnan ( dp_xx_ex1_qnan ), // output + .dp_xx_ex1_snan ( dp_xx_ex1_snan ), // output + .dp_xx_ex1_zero ( dp_xx_ex1_zero ), // output + .ex2_inst_wb ( ex2_inst_wb ), // output + .fdsu_fpu_ex1_fflags ( fdsu_fpu_ex1_fflags ), + .fdsu_fpu_ex1_special_sel ( fdsu_fpu_ex1_special_sel ), + .fdsu_fpu_ex1_special_sign ( fdsu_fpu_ex1_special_sign ), + .forever_cpuclk ( clk_i ), + .idu_fpu_ex1_eu_sel ( idu_fpu_ex1_eu_sel ), + .idu_fpu_ex1_func ( {8'b0, div_op, sqrt_op} ), + .idu_fpu_ex1_gateclk_vld ( fdsu_fpu_ex1_cmplt ), + .idu_fpu_ex1_rm ( rnd_mode_q ), + .idu_fpu_ex1_srcf0 ( operands_q[0][31:0] ), + .idu_fpu_ex1_srcf1 ( operands_q[1][31:0] ), + .idu_fpu_ex1_srcf2 ( '0 ), + .pad_yy_icg_scan_en ( 1'b0 ) + ); + + assign ex2_inst_wb_vld_d = ctrl_fdsu_ex1_sel; + `FF(ex2_inst_wb_vld_q, ex2_inst_wb_vld_d, '0) + + pa_fpu_frbus x_pa_fpu_frbus ( + .ctrl_frbus_ex2_wb_req ( ex2_inst_wb & ex2_inst_wb_vld_q ), + .dp_frbus_ex2_data ( dp_frbus_ex2_data ), + .dp_frbus_ex2_fflags ( dp_frbus_ex2_fflags ), + .fdsu_frbus_data ( fdsu_frbus_data ), + .fdsu_frbus_fflags ( fdsu_frbus_fflags ), + .fdsu_frbus_wb_vld ( fdsu_frbus_wb_vld ), + .fpu_idu_fwd_data ( fpu_idu_fwd_data ), // output + .fpu_idu_fwd_fflags ( fpu_idu_fwd_fflags ), // output + .fpu_idu_fwd_vld ( fpu_idu_fwd_vld ) // output + ); + + always_comb begin + unit_result[31:0] = fpu_idu_fwd_data[31:0]; + unit_status[4:0] = fpu_idu_fwd_fflags[4:0]; + unit_done = fpu_idu_fwd_vld; + end + + // The Hold register (load, no reset) + `FFLNR(held_result_q, unit_result, hold_result, clk_i) + `FFLNR(held_status_q, unit_status, hold_result, clk_i) + + // -------------- + // Output Select + // -------------- + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + // Prioritize hold register data + assign result_d = data_is_held ? held_result_q : unit_result; + assign status_d = data_is_held ? held_status_q : unit_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); +endmodule diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv index 3a581c423..051e6a698 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma.sv @@ -21,8 +21,9 @@ module fpnew_fma #( parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, parameter type AuxType = logic, - - localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat), + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -33,6 +34,7 @@ module fpnew_fma #( input fpnew_pkg::operation_e op_i, input logic op_mod_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -43,12 +45,15 @@ module fpnew_fma #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); // ---------- @@ -66,8 +71,8 @@ module fpnew_fma #( // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); - // Shift amount width: maximum internal mantissa size is 3p+3 bits - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); // Pipelines localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE ? NumPipeRegs @@ -104,6 +109,7 @@ module fpnew_fma #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -116,6 +122,7 @@ module fpnew_fma #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -131,7 +138,7 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -139,6 +146,7 @@ module fpnew_fma #( `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end @@ -169,7 +177,7 @@ module fpnew_fma #( // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C // | ADD | \c 0 | ADD: Set operand A to +1.0 // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C - // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. always_comb begin : op_select @@ -192,8 +200,11 @@ module fpnew_fma #( operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. end - fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) - operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. end default: begin // propagate don't cares @@ -405,6 +416,7 @@ module fpnew_fma #( fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -424,6 +436,7 @@ module fpnew_fma #( assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -440,7 +453,7 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -455,6 +468,7 @@ module fpnew_fma #( `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -631,6 +645,7 @@ module fpnew_fma #( fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -640,6 +655,7 @@ module fpnew_fma #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -655,11 +671,12 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -669,6 +686,7 @@ module fpnew_fma #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv index 8233019d7..e691f6777 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_fma_multi.sv @@ -23,7 +23,8 @@ module fpnew_fma_multi #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -36,6 +37,7 @@ module fpnew_fma_multi #( input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -46,12 +48,15 @@ module fpnew_fma_multi #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); // ---------- @@ -72,8 +77,8 @@ module fpnew_fma_multi #( // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); - // Shift amount width: maximum internal mantissa size is 3p+3 bits - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); // Pipelines localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE ? NumPipeRegs @@ -117,6 +122,7 @@ module fpnew_fma_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -131,6 +137,7 @@ module fpnew_fma_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -146,7 +153,7 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -156,6 +163,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -218,7 +226,7 @@ module fpnew_fma_multi #( // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C // | ADD | \c 0 | ADD: Set operand A to +1.0 // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C - // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. always_comb begin : op_select @@ -241,8 +249,11 @@ module fpnew_fma_multi #( operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. end - fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) - operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. end default: begin // propagate don't cares @@ -490,6 +501,7 @@ module fpnew_fma_multi #( fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -510,6 +522,7 @@ module fpnew_fma_multi #( assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -526,7 +539,7 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -542,6 +555,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -780,6 +794,7 @@ module fpnew_fma_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -789,6 +804,7 @@ module fpnew_fma_multi #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -804,11 +820,12 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -818,6 +835,7 @@ module fpnew_fma_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_noncomp.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_noncomp.sv index 295e53300..4dabfaae1 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_noncomp.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_noncomp.sv @@ -21,8 +21,9 @@ module fpnew_noncomp #( parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, parameter type AuxType = logic, - - localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat), + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -33,6 +34,7 @@ module fpnew_noncomp #( input fpnew_pkg::operation_e op_i, input logic op_mod_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -45,12 +47,15 @@ module fpnew_noncomp #( output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); // ---------- @@ -89,6 +94,7 @@ module fpnew_noncomp #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -101,6 +107,7 @@ module fpnew_noncomp #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -116,7 +123,7 @@ module fpnew_noncomp #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -124,6 +131,7 @@ module fpnew_noncomp #( `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end @@ -354,6 +362,7 @@ module fpnew_noncomp #( fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -366,6 +375,7 @@ module fpnew_noncomp #( assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -381,7 +391,7 @@ module fpnew_noncomp #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) @@ -389,6 +399,7 @@ module fpnew_noncomp #( `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -400,6 +411,7 @@ module fpnew_noncomp #( assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv index 6588c7270..d8611d9c1 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv @@ -18,15 +18,19 @@ module fpnew_opgroup_block #( // FPU configuration parameter int unsigned Width = 32, parameter logic EnableVectors = 1'b1, + parameter logic PulpDivsqrt = 1'b1, parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, // Do not change localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -41,6 +45,7 @@ module fpnew_opgroup_block #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -92,6 +97,11 @@ module fpnew_opgroup_block #( assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format + // Forward masks related to the right SIMD lane + localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors); + logic [INTERNAL_LANES-1:0] mask_slice; + always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b]; + fpnew_opgroup_fmt_slice #( .OpGroup ( OpGroup ), .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), @@ -99,7 +109,8 @@ module fpnew_opgroup_block #( .EnableVectors ( EnableVectors ), .NumPipeRegs ( FmtPipeRegs[fmt] ), .PipeConfig ( PipeConfig ), - .TagType ( TagType ) + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) ) i_fmt_slice ( .clk_i, .rst_ni, @@ -110,6 +121,7 @@ module fpnew_opgroup_block #( .op_mod_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( mask_slice ), .in_valid_i ( in_valid ), .in_ready_o ( fmt_in_ready[fmt] ), .flush_i, @@ -119,7 +131,8 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[fmt].tag ), .out_valid_o ( fmt_out_valid[fmt] ), .out_ready_i ( fmt_out_ready[fmt] ), - .busy_o ( fmt_busy[fmt] ) + .busy_o ( fmt_busy[fmt] ), + .reg_ena_i ( '0 ) ); // If the format wants to use merged ops, tie off the dangling ones not used here end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused @@ -167,6 +180,7 @@ module fpnew_opgroup_block #( .FpFmtConfig ( FpFmtMask ), .IntFmtConfig ( IntFmtMask ), .EnableVectors ( EnableVectors ), + .PulpDivsqrt ( PulpDivsqrt ), .NumPipeRegs ( REG ), .PipeConfig ( PipeConfig ), .TagType ( TagType ) @@ -183,6 +197,7 @@ module fpnew_opgroup_block #( .int_fmt_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( simd_mask_i ), .in_valid_i ( in_valid ), .in_ready_o ( fmt_in_ready[FMT] ), .flush_i, @@ -192,7 +207,8 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[FMT].tag ), .out_valid_o ( fmt_out_valid[FMT] ), .out_ready_i ( fmt_out_ready[FMT] ), - .busy_o ( fmt_busy[FMT] ) + .busy_o ( fmt_busy[FMT] ), + .reg_ena_i ( '0 ) ); end diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv index ece62fb7a..9aeb469bd 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv @@ -21,9 +21,14 @@ module fpnew_opgroup_fmt_slice #( parameter logic EnableVectors = 1'b1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter logic ExtRegEna = 1'b0, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, // Do not change - localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0], + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -35,6 +40,7 @@ module fpnew_opgroup_fmt_slice #( input logic op_mod_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -48,11 +54,13 @@ module fpnew_opgroup_fmt_slice #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); - localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors); + localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes @@ -65,6 +73,7 @@ module fpnew_opgroup_fmt_slice #( logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito logic result_is_vector, result_is_class; @@ -115,6 +124,7 @@ module fpnew_opgroup_fmt_slice #( .op_i, .op_mod_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( vectorial_op ), // Remember whether operation was vectorial .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -123,10 +133,12 @@ module fpnew_opgroup_fmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .busy_o ( lane_busy[lane] ), + .reg_ena_i ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; @@ -157,7 +169,8 @@ module fpnew_opgroup_fmt_slice #( // .aux_o ( lane_vectorial[lane] ), // .out_valid_o ( out_valid ), // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ) + // .busy_o ( lane_busy[lane] ), + // .reg_ena_i // ); // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance @@ -176,6 +189,7 @@ module fpnew_opgroup_fmt_slice #( .op_i, .op_mod_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( vectorial_op ), // Remember whether operation was vectorial .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -186,10 +200,12 @@ module fpnew_opgroup_fmt_slice #( .class_mask_o ( lane_class_mask[lane] ), .is_class_o ( lane_is_class[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .busy_o ( lane_busy[lane] ), + .reg_ena_i ); end // ADD OTHER OPTIONS HERE @@ -198,8 +214,8 @@ module fpnew_opgroup_fmt_slice #( assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = (lane_out_valid[lane] | ExtRegEna) ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = (lane_out_valid[lane] | ExtRegEna) ? op_status : '0; // Otherwise generate constant sign-extension end else begin @@ -215,7 +231,10 @@ module fpnew_opgroup_fmt_slice #( assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result; // Create Classification results - if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size + if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size + assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane]; + assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0; + end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF || lane_class_mask[lane] == fpnew_pkg::NEGNORM || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM || @@ -248,9 +267,11 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; - // Pad out unused vec_class bits - if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class - assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + // Pad out unused vec_class bits if each classify result is on 8 bits + if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end end // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; @@ -272,7 +293,7 @@ module fpnew_opgroup_fmt_slice #( automatic fpnew_pkg::status_t temp_status; temp_status = '0; for (int i = 0; i < int'(NUM_LANES); i++) - temp_status |= lane_status[i]; + temp_status |= lane_status[i] & {5{lane_masks[i]}}; status_o = temp_status; end endmodule diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv index b6637e6fb..cdc3e34d9 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv @@ -22,12 +22,17 @@ module fpnew_opgroup_multifmt_slice #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, parameter logic EnableVectors = 1'b1, + parameter logic PulpDivsqrt = 1'b1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter logic ExtRegEna = 1'b0, parameter type TagType = logic, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors), + localparam type MaskType = logic [NUM_SIMD_LANES-1:0], + localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs ) ( input logic clk_i, input logic rst_ni, @@ -42,6 +47,7 @@ module fpnew_opgroup_multifmt_slice #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -55,9 +61,18 @@ module fpnew_opgroup_multifmt_slice #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + // External register enable override + input logic [ExtRegEnaWidth-1:0] reg_ena_i ); + if ((OpGroup == fpnew_pkg::DIVSQRT) && !PulpDivsqrt && + !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin + $fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \ +Set PulpDivsqrt to 1 not to use the PULP DivSqrt unit \ +or set Features.FpFmtMask to support only FP32"); + end + localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); @@ -67,7 +82,7 @@ module fpnew_opgroup_multifmt_slice #( fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation logic [AUX_BITS-1:0] aux_data; @@ -75,19 +90,18 @@ module fpnew_opgroup_multifmt_slice #( // additional flags for CONV logic dst_fmt_is_int, dst_is_cpk; logic [1:0] dst_vec_op; // info for vectorial results (for packing) - logic [2:0] target_aux_d, target_aux_q; + logic [2:0] target_aux_d; logic is_up_cast, is_down_cast; logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result; logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result; - logic [Width-1:0] conv_slice_result; - logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used logic [NUM_LANES-1:0] lane_busy; // dito @@ -96,6 +110,8 @@ module fpnew_opgroup_multifmt_slice #( logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) + logic simd_synch_rdy, simd_synch_done; + // ----------- // Input Side // ----------- @@ -121,6 +137,8 @@ module fpnew_opgroup_multifmt_slice #( // CONV passes one operand for assembly after the unit: opC for cpk, opB for others if (OpGroup == fpnew_pkg::CONV) begin : conv_target assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1]; + end else begin : not_conv_target + assign conv_target_d = '0; end // For 2-operand units, prepare boxing info @@ -173,7 +191,11 @@ module fpnew_opgroup_multifmt_slice #( // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin - local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i); + if (i == 2) begin + local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(dst_fmt_i); + end else begin + local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i); + end end // override operand 0 for some conversions @@ -215,6 +237,7 @@ module fpnew_opgroup_multifmt_slice #( .src_fmt_i, .dst_fmt_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( aux_data ), .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -223,41 +246,84 @@ module fpnew_opgroup_multifmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .busy_o ( lane_busy[lane] ), + .reg_ena_i ); end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - fpnew_divsqrt_multi #( - .FpFmtConfig ( LANE_FORMATS ), - .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) - ) i_fpnew_divsqrt_multi ( - .clk_i, - .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i, - .op_i, - .dst_fmt_i, - .tag_i, - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) - ); + if (!PulpDivsqrt && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin + // The T-head-based DivSqrt unit is supported only in FP32-only configurations + fpnew_divsqrt_th_32 #( + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_multi_th ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i, + .op_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i + ); + end else begin + fpnew_divsqrt_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i, + .op_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .vectorial_op_i ( vectorial_op ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .divsqrt_done_o ( divsqrt_done[lane] ), + .simd_synch_done_i( simd_synch_done ), + .divsqrt_ready_o ( divsqrt_ready[lane] ), + .simd_synch_rdy_i ( simd_synch_rdy ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i + ); + end end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance @@ -280,6 +346,7 @@ module fpnew_opgroup_multifmt_slice #( .dst_fmt_i, .int_fmt_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( aux_data ), .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -288,10 +355,12 @@ module fpnew_opgroup_multifmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .busy_o ( lane_busy[lane] ), + .reg_ena_i ); end // ADD OTHER OPTIONS HERE @@ -300,14 +369,20 @@ module fpnew_opgroup_multifmt_slice #( assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = (lane_out_valid[lane] | ExtRegEna) ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = (lane_out_valid[lane] | ExtRegEna) ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane assign lane_out_valid[lane] = 1'b0; // unused lane assign lane_in_ready[lane] = 1'b0; // unused lane - assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_aux[lane] = 1'b0; // unused lane + assign lane_masks[lane] = 1'b1; // unused lane + assign lane_tags[lane] = 1'b0; // unused lane + assign divsqrt_done[lane] = 1'b0; // unused lane + assign divsqrt_ready[lane] = 1'b0; // unused lane + assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane + assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; assign lane_busy[lane] = 1'b0; end @@ -354,10 +429,17 @@ module fpnew_opgroup_multifmt_slice #( assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; end - // Mute int results if unused - for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : extend_or_mute_int_result + // Mute int results if unused if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result assign ifmt_slice_result[ifmt] = '0; + + // Extend slice result if needed + end else begin : extend_int_result + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + if (NUM_LANES*INT_WIDTH < Width) + assign ifmt_slice_result[ifmt][Width-1:NUM_LANES*INT_WIDTH] = '0; end end @@ -385,7 +467,7 @@ module fpnew_opgroup_multifmt_slice #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + assign reg_ena = (byp_pipe_ready[i] & byp_pipe_valid_q[i]) | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) @@ -399,6 +481,17 @@ module fpnew_opgroup_multifmt_slice #( assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; end else begin : no_conv assign {result_vec_op, result_is_cpk} = '0; + assign conv_target_q = '0; + end + + if (PulpDivsqrt) begin + // Synch lanes if there is more than one + assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; + assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; + end else begin + // Unused (alternative divider only supported for scalar FP32 divsqrt) + assign simd_synch_rdy = '0; + assign simd_synch_done = '0; end // ------------ @@ -422,7 +515,8 @@ module fpnew_opgroup_multifmt_slice #( automatic fpnew_pkg::status_t temp_status; temp_status = '0; for (int i = 0; i < int'(NUM_LANES); i++) - temp_status |= lane_status[i]; + temp_status |= lane_status[i] & {5{lane_masks[i]}}; status_o = temp_status; end + endmodule diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_pkg.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_pkg.sv index 08dd8c409..7addc3e9b 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_pkg.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_pkg.sv @@ -133,6 +133,7 @@ package fpnew_pkg; RDN = 3'b010, RUP = 3'b011, RMM = 3'b100, + ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC DYN = 3'b111 } roundmode_e; diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_rounding.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_rounding.sv index 9f5be96b6..4e6772094 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_rounding.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_rounding.sv @@ -40,6 +40,7 @@ module fpnew_rounding #( // 010 | RDN | Round Down (towards -\infty) // 011 | RUP | Round Up (towards \infty) // 100 | RMM | Round to Nearest, ties to Max Magnitude + // 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC) // others | | *invalid* always_comb begin : rounding_decision unique case (rnd_mode_i) @@ -55,6 +56,7 @@ module fpnew_rounding #( fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if - fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if + fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up + fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i); default: round_up = fpnew_pkg::DONT_CARE; // propagate x endcase end diff --git a/rtl/vendor/pulp_platform_fpnew/src/fpnew_top.sv b/rtl/vendor/pulp_platform_fpnew/src/fpnew_top.sv index 35a3b8639..a6ff89a67 100644 --- a/rtl/vendor/pulp_platform_fpnew/src/fpnew_top.sv +++ b/rtl/vendor/pulp_platform_fpnew/src/fpnew_top.sv @@ -17,8 +17,14 @@ module fpnew_top #( // FPU configuration parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + // PulpDivSqrt = 0 enables T-head-based DivSqrt unit. Supported only for FP32-only instances of Fpnew + parameter logic PulpDivsqrt = 1'b1, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + parameter int unsigned EnableSIMDMask = 0, // Do not change + localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), + localparam type MaskType = logic [NumLanes-1:0], localparam int unsigned WIDTH = Features.Width, localparam int unsigned NUM_OPERANDS = 3 ) ( @@ -34,6 +40,7 @@ module fpnew_top #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -87,6 +94,10 @@ module fpnew_top #( end end + // Filter out the mask if not used + MaskType simd_mask; + assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}}; + // ------------------------- // Generate Operation Blocks // ------------------------- @@ -108,12 +119,14 @@ module fpnew_top #( .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), .Width ( WIDTH ), .EnableVectors ( Features.EnableVectors ), + .PulpDivsqrt ( PulpDivsqrt ), .FpFmtMask ( Features.FpFmtMask ), .IntFmtMask ( Features.IntFmtMask ), .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), .PipeConfig ( Implementation.PipeConfig ), - .TagType ( TagType ) + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) ) i_opgroup_block ( .clk_i, .rst_ni, @@ -127,6 +140,7 @@ module fpnew_top #( .int_fmt_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( simd_mask ), .in_valid_i ( in_valid ), .in_ready_o ( opgrp_in_ready[opgrp] ), .flush_i, diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v new file mode 100644 index 000000000..c0e20e79e --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v @@ -0,0 +1,50 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +module gated_clk_cell( + clk_in, + global_en, + module_en, + local_en, + external_en, + pad_yy_icg_scan_en, + clk_out +); + +input clk_in; +input global_en; +input module_en; +input local_en; +input external_en; +input pad_yy_icg_scan_en; +output clk_out; + +wire clk_en_bf_latch; +wire SE; + +assign clk_en_bf_latch = (global_en && (module_en || local_en)) || external_en ; + +// SE driven from primary input, held constant +assign SE = pad_yy_icg_scan_en; + +// // &Connect( .clk_in (clk_in), @50 +// // .SE (SE), @51 +// // .external_en (clk_en_bf_latch), @52 +// // .clk_out (clk_out) @53 +// // ) ; @54 + +assign clk_out = clk_in; + +endmodule diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v new file mode 100644 index 000000000..ce08eda94 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v @@ -0,0 +1,783 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_ctrl( + cp0_fpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + ctrl_fdsu_ex1_sel, + ctrl_xx_ex1_cmplt_dp, + ctrl_xx_ex1_inst_vld, + ctrl_xx_ex1_stall, + ctrl_xx_ex1_warm_up, + ctrl_xx_ex2_warm_up, + ctrl_xx_ex3_warm_up, + ex1_div, + ex1_expnt_adder_op0, + ex1_of_result_lfn, + ex1_op0_id, + ex1_op0_norm, + ex1_op1_id_vld, + ex1_op1_norm, + ex1_op1_sel, + ex1_oper_id_expnt, + ex1_oper_id_expnt_f, + ex1_pipedown, + ex1_pipedown_gate, + ex1_result_sign, + ex1_rm, + ex1_save_op0, + ex1_save_op0_gate, + ex1_sqrt, + ex1_srt_skip, + ex2_expnt_adder_op0, + ex2_of, + ex2_pipe_clk, + ex2_pipedown, + ex2_potnt_of, + ex2_potnt_uf, + ex2_result_inf, + ex2_result_lfn, + ex2_rslt_denorm, + ex2_srt_expnt_rst, + ex2_srt_first_round, + ex2_uf, + ex2_uf_srt_skip, + ex3_expnt_adjust_result, + ex3_pipedown, + ex3_rslt_denorm, + fdsu_ex1_sel, + fdsu_fpu_debug_info, + fdsu_fpu_ex1_cmplt, + fdsu_fpu_ex1_cmplt_dp, + fdsu_fpu_ex1_stall, + fdsu_fpu_no_op, + fdsu_frbus_wb_vld, + fdsu_yy_div, + fdsu_yy_expnt_rst, + fdsu_yy_of, + fdsu_yy_of_rm_lfn, + fdsu_yy_op0_norm, + fdsu_yy_op1_norm, + fdsu_yy_potnt_of, + fdsu_yy_potnt_uf, + fdsu_yy_result_inf, + fdsu_yy_result_lfn, + fdsu_yy_result_sign, + fdsu_yy_rm, + fdsu_yy_rslt_denorm, + fdsu_yy_sqrt, + fdsu_yy_uf, + fdsu_yy_wb_freg, + forever_cpuclk, + frbus_fdsu_wb_grant, + idu_fpu_ex1_dst_freg, + idu_fpu_ex1_eu_sel, + pad_yy_icg_scan_en, + rtu_xx_ex1_cancel, + rtu_xx_ex2_cancel, + rtu_yy_xx_async_flush, + rtu_yy_xx_flush, + srt_remainder_zero, + srt_sm_on +); + +// &Ports; @24 +input cp0_fpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input ctrl_fdsu_ex1_sel; +input ctrl_xx_ex1_cmplt_dp; +input ctrl_xx_ex1_inst_vld; +input ctrl_xx_ex1_stall; +input ctrl_xx_ex1_warm_up; +input ctrl_xx_ex2_warm_up; +input ctrl_xx_ex3_warm_up; +input ex1_div; +input [12:0] ex1_expnt_adder_op0; +input ex1_of_result_lfn; +input ex1_op0_id; +input ex1_op0_norm; +input ex1_op1_id_vld; +input ex1_op1_norm; +input [12:0] ex1_oper_id_expnt; +input ex1_result_sign; +input [2 :0] ex1_rm; +input ex1_sqrt; +input ex1_srt_skip; +input ex2_of; +input ex2_potnt_of; +input ex2_potnt_uf; +input ex2_result_inf; +input ex2_result_lfn; +input ex2_rslt_denorm; +input [9 :0] ex2_srt_expnt_rst; +input ex2_uf; +input ex2_uf_srt_skip; +input [9 :0] ex3_expnt_adjust_result; +input ex3_rslt_denorm; +input forever_cpuclk; +input frbus_fdsu_wb_grant; +input [4 :0] idu_fpu_ex1_dst_freg; +input [2 :0] idu_fpu_ex1_eu_sel; +input pad_yy_icg_scan_en; +input rtu_xx_ex1_cancel; +input rtu_xx_ex2_cancel; +input rtu_yy_xx_async_flush; +input rtu_yy_xx_flush; +input srt_remainder_zero; +output ex1_op1_sel; +output [12:0] ex1_oper_id_expnt_f; +output ex1_pipedown; +output ex1_pipedown_gate; +output ex1_save_op0; +output ex1_save_op0_gate; +output [9 :0] ex2_expnt_adder_op0; +output ex2_pipe_clk; +output ex2_pipedown; +output ex2_srt_first_round; +output ex3_pipedown; +output fdsu_ex1_sel; +output [4 :0] fdsu_fpu_debug_info; +output fdsu_fpu_ex1_cmplt; +output fdsu_fpu_ex1_cmplt_dp; +output fdsu_fpu_ex1_stall; +output fdsu_fpu_no_op; +output fdsu_frbus_wb_vld; +output fdsu_yy_div; +output [9 :0] fdsu_yy_expnt_rst; +output fdsu_yy_of; +output fdsu_yy_of_rm_lfn; +output fdsu_yy_op0_norm; +output fdsu_yy_op1_norm; +output fdsu_yy_potnt_of; +output fdsu_yy_potnt_uf; +output fdsu_yy_result_inf; +output fdsu_yy_result_lfn; +output fdsu_yy_result_sign; +output [2 :0] fdsu_yy_rm; +output fdsu_yy_rslt_denorm; +output fdsu_yy_sqrt; +output fdsu_yy_uf; +output [4 :0] fdsu_yy_wb_freg; +output srt_sm_on; + +// &Regs; @25 +reg ex2_srt_first_round; +reg [2 :0] fdsu_cur_state; +reg fdsu_div; +reg [9 :0] fdsu_expnt_rst; +reg [2 :0] fdsu_next_state; +reg fdsu_of; +reg fdsu_of_rm_lfn; +reg fdsu_potnt_of; +reg fdsu_potnt_uf; +reg fdsu_result_inf; +reg fdsu_result_lfn; +reg fdsu_result_sign; +reg [2 :0] fdsu_rm; +reg fdsu_sqrt; +reg fdsu_uf; +reg [4 :0] fdsu_wb_freg; +reg fdsu_yy_rslt_denorm; +reg [4 :0] srt_cnt; +reg [1 :0] wb_cur_state; +reg [1 :0] wb_nxt_state; + +// &Wires; @26 +wire cp0_fpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire ctrl_fdsu_ex1_sel; +wire ctrl_fdsu_ex1_stall; +wire ctrl_fdsu_wb_vld; +wire ctrl_iter_start; +wire ctrl_iter_start_gate; +wire ctrl_pack; +wire ctrl_result_vld; +wire ctrl_round; +wire ctrl_sm_cmplt; +wire ctrl_sm_ex1; +wire ctrl_sm_idle; +wire ctrl_sm_start; +wire ctrl_sm_start_gate; +wire ctrl_srt_idle; +wire ctrl_srt_itering; +wire ctrl_wb_idle; +wire ctrl_wb_sm_cmplt; +wire ctrl_wb_sm_ex2; +wire ctrl_wb_sm_idle; +wire ctrl_wfi2; +wire ctrl_wfwb; +wire ctrl_xx_ex1_cmplt_dp; +wire ctrl_xx_ex1_inst_vld; +wire ctrl_xx_ex1_stall; +wire ctrl_xx_ex1_warm_up; +wire ctrl_xx_ex2_warm_up; +wire ctrl_xx_ex3_warm_up; +wire ex1_div; +wire [12:0] ex1_expnt_adder_op0; +wire ex1_of_result_lfn; +wire ex1_op0_id; +wire ex1_op1_id_vld; +wire ex1_op1_sel; +wire [12:0] ex1_oper_id_expnt; +wire [12:0] ex1_oper_id_expnt_f; +wire ex1_pipe_clk; +wire ex1_pipe_clk_en; +wire ex1_pipedown; +wire ex1_pipedown_gate; +wire ex1_result_sign; +wire [2 :0] ex1_rm; +wire ex1_save_op0; +wire ex1_save_op0_gate; +wire ex1_sqrt; +wire ex1_srt_skip; +wire [4 :0] ex1_wb_freg; +wire [9 :0] ex2_expnt_adder_op0; +wire ex2_of; +wire ex2_pipe_clk; +wire ex2_pipe_clk_en; +wire ex2_pipedown; +wire ex2_potnt_of; +wire ex2_potnt_uf; +wire ex2_result_inf; +wire ex2_result_lfn; +wire ex2_rslt_denorm; +wire [9 :0] ex2_srt_expnt_rst; +wire ex2_uf; +wire ex2_uf_srt_skip; +wire [9 :0] ex3_expnt_adjust_result; +wire ex3_pipedown; +wire ex3_rslt_denorm; +wire expnt_rst_clk; +wire expnt_rst_clk_en; +wire fdsu_busy; +wire fdsu_clk; +wire fdsu_clk_en; +wire fdsu_dn_stall; +wire fdsu_ex1_inst_vld; +wire fdsu_ex1_res_vld; +wire fdsu_ex1_sel; +wire fdsu_flush; +wire [4 :0] fdsu_fpu_debug_info; +wire fdsu_fpu_ex1_cmplt; +wire fdsu_fpu_ex1_cmplt_dp; +wire fdsu_fpu_ex1_stall; +wire fdsu_fpu_no_op; +wire fdsu_frbus_wb_vld; +wire fdsu_op0_norm; +wire fdsu_op1_norm; +wire fdsu_wb_grant; +wire fdsu_yy_div; +wire [9 :0] fdsu_yy_expnt_rst; +wire fdsu_yy_of; +wire fdsu_yy_of_rm_lfn; +wire fdsu_yy_op0_norm; +wire fdsu_yy_op1_norm; +wire fdsu_yy_potnt_of; +wire fdsu_yy_potnt_uf; +wire fdsu_yy_result_inf; +wire fdsu_yy_result_lfn; +wire fdsu_yy_result_sign; +wire [2 :0] fdsu_yy_rm; +wire fdsu_yy_sqrt; +wire fdsu_yy_uf; +wire [4 :0] fdsu_yy_wb_freg; +wire forever_cpuclk; +wire frbus_fdsu_wb_grant; +wire [4 :0] idu_fpu_ex1_dst_freg; +wire [2 :0] idu_fpu_ex1_eu_sel; +wire pad_yy_icg_scan_en; +wire rtu_xx_ex1_cancel; +wire rtu_xx_ex2_cancel; +wire rtu_yy_xx_async_flush; +wire rtu_yy_xx_flush; +wire [4 :0] srt_cnt_ini; +wire srt_cnt_zero; +wire srt_last_round; +wire srt_remainder_zero; +wire srt_skip; +wire srt_sm_on; + + +//========================================================== +// Input Signal +//========================================================== +assign ex1_wb_freg[4:0] = idu_fpu_ex1_dst_freg[4:0]; +assign fdsu_ex1_inst_vld = ctrl_xx_ex1_inst_vld && ctrl_fdsu_ex1_sel; +assign fdsu_ex1_sel = idu_fpu_ex1_eu_sel[2]; +// &Force("input", "idu_fpu_ex1_eu_sel"); &Force("bus", "idu_fpu_ex1_eu_sel", 2, 0); @34 + +//========================================================== +// FDSU Main State Machine +//========================================================== +assign fdsu_ex1_res_vld = fdsu_ex1_inst_vld && ex1_srt_skip; +assign fdsu_wb_grant = frbus_fdsu_wb_grant; + +assign ctrl_iter_start = ctrl_sm_start && !fdsu_dn_stall + || ctrl_wfi2; +assign ctrl_iter_start_gate = ctrl_sm_start_gate && !fdsu_dn_stall + || ctrl_wfi2; +assign ctrl_sm_start = fdsu_ex1_inst_vld && ctrl_srt_idle + && !ex1_srt_skip; +assign ctrl_sm_start_gate = fdsu_ex1_inst_vld && ctrl_srt_idle; + +assign srt_last_round = (srt_skip || + srt_remainder_zero || + srt_cnt_zero) && + ctrl_srt_itering; +assign srt_skip = ex2_of || + ex2_uf_srt_skip; +assign srt_cnt_zero = ~|srt_cnt[4:0]; +assign fdsu_dn_stall = ctrl_sm_start && ex1_op1_id_vld; + +parameter IDLE = 3'b000; +parameter WFI2 = 3'b001; +parameter ITER = 3'b010; +parameter RND = 3'b011; +parameter PACK = 3'b100; +parameter WFWB = 3'b101; + +always @ (posedge fdsu_clk or negedge cpurst_b) +begin + if (!cpurst_b) + fdsu_cur_state[2:0] <= IDLE; + else if (fdsu_flush) + fdsu_cur_state[2:0] <= IDLE; + else + fdsu_cur_state[2:0] <= fdsu_next_state[2:0]; +end + +// &CombBeg; @76 +always @( ctrl_sm_start + or fdsu_dn_stall + or srt_last_round + or fdsu_cur_state[2:0] + or fdsu_wb_grant) +begin +case (fdsu_cur_state[2:0]) + IDLE: + begin + if (ctrl_sm_start) + if (fdsu_dn_stall) + fdsu_next_state[2:0] = WFI2; + else + fdsu_next_state[2:0] = ITER; + else + fdsu_next_state[2:0] = IDLE; + end + WFI2: + fdsu_next_state[2:0] = ITER; + ITER: + begin + if (srt_last_round) + fdsu_next_state[2:0] = RND; + else + fdsu_next_state[2:0] = ITER; + end + RND: + fdsu_next_state[2:0] = PACK; + PACK: + begin + if (fdsu_wb_grant) + if (ctrl_sm_start) + if (fdsu_dn_stall) + fdsu_next_state[2:0] = WFI2; + else + fdsu_next_state[2:0] = ITER; + else + fdsu_next_state[2:0] = IDLE; + else + fdsu_next_state[2:0] = WFWB; + end + WFWB: + begin + if (fdsu_wb_grant) + if (ctrl_sm_start) + if (fdsu_dn_stall) + fdsu_next_state[2:0] = WFI2; + else + fdsu_next_state[2:0] = ITER; + else + fdsu_next_state[2:0] = IDLE; + else + fdsu_next_state[2:0] = WFWB; + end + default: + fdsu_next_state[2:0] = IDLE; +endcase +// &CombEnd; @128 +end + +assign ctrl_sm_idle = fdsu_cur_state[2:0] == IDLE; +assign ctrl_wfi2 = fdsu_cur_state[2:0] == WFI2; +assign ctrl_srt_itering = fdsu_cur_state[2:0] == ITER; +assign ctrl_round = fdsu_cur_state[2:0] == RND; +assign ctrl_pack = fdsu_cur_state[2:0] == PACK; +assign ctrl_wfwb = fdsu_cur_state[2:0] == WFWB; + +assign ctrl_sm_cmplt = ctrl_pack || ctrl_wfwb; +assign ctrl_srt_idle = ctrl_sm_idle + || fdsu_wb_grant; +assign ctrl_sm_ex1 = ctrl_srt_idle || ctrl_wfi2; + +//========================================================== +// Iteration Counter +//========================================================== +always @ (posedge fdsu_clk) +begin + if (fdsu_flush) + srt_cnt[4:0] <= 5'b0; + else if (ctrl_iter_start) + srt_cnt[4:0] <= srt_cnt_ini[4:0]; + else if (ctrl_srt_itering) + srt_cnt[4:0] <= srt_cnt[4:0] - 5'b1; + else + srt_cnt[4:0] <= srt_cnt[4:0]; +end + +//srt_cnt_ini[4:0] +//For Double, initial is 5'b11100('d28), calculate 29 round +//For Single, initial is 5'b01110('d14), calculate 15 round +assign srt_cnt_ini[4:0] = 5'b01110; + +//fdsu srt first round signal +//For srt calculate special use +always @(posedge fdsu_clk or negedge cpurst_b) +begin + if(!cpurst_b) + ex2_srt_first_round <= 1'b0; + else if(fdsu_flush) + ex2_srt_first_round <= 1'b0; + else if(ex1_pipedown) + ex2_srt_first_round <= 1'b1; + else + ex2_srt_first_round <= 1'b0; +end + +//========================================================== +// Write Back State Machine +//========================================================== +parameter WB_IDLE = 2'b00, + WB_EX2 = 2'b10, + WB_CMPLT = 2'b01; + +always @ (posedge fdsu_clk or negedge cpurst_b) +begin + if (!cpurst_b) + wb_cur_state[1:0] <= WB_IDLE; + else if (fdsu_flush) + wb_cur_state[1:0] <= WB_IDLE; + else + wb_cur_state[1:0] <= wb_nxt_state[1:0]; +end + +// &CombBeg; @215 +always @( ctrl_fdsu_wb_vld + or fdsu_dn_stall + or ctrl_xx_ex1_stall + or fdsu_ex1_inst_vld + or ctrl_iter_start + or fdsu_ex1_res_vld + or wb_cur_state[1:0]) +begin + case(wb_cur_state[1:0]) + WB_IDLE: + if (fdsu_ex1_inst_vld) + if (ctrl_xx_ex1_stall || fdsu_ex1_res_vld || fdsu_dn_stall) + wb_nxt_state[1:0] = WB_IDLE; + else + wb_nxt_state[1:0] = WB_EX2; + else + wb_nxt_state[1:0] = WB_IDLE; + WB_EX2: + // if (ctrl_xx_ex2_stall) + // wb_nxt_state[1:0] = WB_EX2; + // else + if (ctrl_fdsu_wb_vld) + if (ctrl_iter_start && !ctrl_xx_ex1_stall) + wb_nxt_state[1:0] = WB_EX2; + else + wb_nxt_state[1:0] = WB_IDLE; + else + wb_nxt_state[1:0] = WB_CMPLT; + WB_CMPLT: + if (ctrl_fdsu_wb_vld) + if (ctrl_iter_start && !ctrl_xx_ex1_stall) + wb_nxt_state[1:0] = WB_EX2; + else + wb_nxt_state[1:0] = WB_IDLE; + else + wb_nxt_state[1:0] = WB_CMPLT; + default: + wb_nxt_state[1:0] = WB_IDLE; + endcase +// &CombEnd; @247 +end + +assign ctrl_wb_idle = wb_cur_state[1:0] == WB_IDLE + || wb_cur_state[1:0] == WB_CMPLT && ctrl_fdsu_wb_vld; +assign ctrl_wb_sm_idle = wb_cur_state[1:0] == WB_IDLE; +assign ctrl_wb_sm_ex2 = wb_cur_state[1:0] == WB_EX2; +assign ctrl_wb_sm_cmplt = wb_cur_state[1:0] == WB_EX2 + || wb_cur_state[1:0] == WB_CMPLT; + +assign ctrl_result_vld = ctrl_sm_cmplt && ctrl_wb_sm_cmplt; +assign ctrl_fdsu_wb_vld = ctrl_result_vld && frbus_fdsu_wb_grant; + +assign ctrl_fdsu_ex1_stall = fdsu_ex1_inst_vld && !ctrl_sm_ex1 && !ctrl_wb_idle + || fdsu_ex1_inst_vld && fdsu_dn_stall; + +//========================================================== +// Flops +//========================================================== +always @(posedge ex1_pipe_clk) +begin + if(ex1_pipedown) + begin + fdsu_wb_freg[4:0] <= ex1_wb_freg[4:0]; + fdsu_result_sign <= ex1_result_sign; + fdsu_of_rm_lfn <= ex1_of_result_lfn; + fdsu_div <= ex1_div; + fdsu_sqrt <= ex1_sqrt; + fdsu_rm[2:0] <= ex1_rm[2:0]; + end + else + begin + fdsu_wb_freg[4:0] <= fdsu_wb_freg[4:0]; + fdsu_result_sign <= fdsu_result_sign; + fdsu_of_rm_lfn <= fdsu_of_rm_lfn; + fdsu_div <= fdsu_div; + fdsu_sqrt <= fdsu_sqrt; + fdsu_rm[2:0] <= fdsu_rm[2:0]; + end +end + +// In 906 FDSU, if one op0/1 is not norm, it will not enter EX2. +assign fdsu_op0_norm = 1'b1; +assign fdsu_op1_norm = 1'b1; +// &Force("input", "ex1_op0_norm"); @337 +// &Force("input", "ex1_op1_norm"); @338 + +// fdsu_expnt_rst is used to save: +// 1. op0 denormal expnt; +// 2. op0 expnt; +// 3. result expnt. +// &Force("bus", "ex1_oper_id_expnt", 12, 0); @378 +// &Force("bus", "ex1_expnt_adder_op0", 12, 0); @379 + + +always @ (posedge expnt_rst_clk) +begin + if (ex1_save_op0) + fdsu_expnt_rst[9:0] <= ex1_oper_id_expnt[9:0]; + else if (ex1_pipedown) + fdsu_expnt_rst[9:0] <= ex1_expnt_adder_op0[9:0]; + else if (ex2_pipedown) + fdsu_expnt_rst[9:0] <= ex2_srt_expnt_rst[9:0]; + else if (ex3_pipedown) + fdsu_expnt_rst[9:0] <= ex3_expnt_adjust_result[9:0]; + else + fdsu_expnt_rst[9:0] <= fdsu_expnt_rst[9:0]; +end + +assign ex1_oper_id_expnt_f[12:0] = {3'b1, fdsu_expnt_rst[9:0]}; + +always @ (posedge expnt_rst_clk) +begin + if (ex2_pipedown) + fdsu_yy_rslt_denorm <= ex2_rslt_denorm; + else if (ex3_pipedown) + fdsu_yy_rslt_denorm <= ex3_rslt_denorm; + else + fdsu_yy_rslt_denorm <= fdsu_yy_rslt_denorm; +end +// &Force("output", "fdsu_yy_rslt_denorm"); @440 + +// EX2 signal used in EX3 & EX4 +always @ (posedge ex2_pipe_clk) +begin + if (ex2_pipedown) + begin + fdsu_result_inf <= ex2_result_inf; + fdsu_result_lfn <= ex2_result_lfn; + fdsu_of <= ex2_of; + fdsu_uf <= ex2_uf; + fdsu_potnt_of <= ex2_potnt_of; + fdsu_potnt_uf <= ex2_potnt_uf; + end + else + begin + fdsu_result_inf <= fdsu_result_inf; + fdsu_result_lfn <= fdsu_result_lfn; + fdsu_of <= fdsu_of; + fdsu_uf <= fdsu_uf; + fdsu_potnt_of <= fdsu_potnt_of; + fdsu_potnt_uf <= fdsu_potnt_uf; + end +end + +//========================================================== +// Flush +//========================================================== +assign fdsu_flush = rtu_xx_ex1_cancel && ctrl_wb_idle + || rtu_xx_ex2_cancel && ctrl_wb_sm_ex2 + || ctrl_xx_ex1_warm_up + || rtu_yy_xx_async_flush; + +//========================================================== +// ICG +//========================================================== +assign fdsu_busy = fdsu_ex1_inst_vld + || !ctrl_sm_idle + || !ctrl_wb_sm_idle; +assign fdsu_clk_en = fdsu_busy + || !ctrl_sm_idle + || rtu_yy_xx_flush; +// &Instance("gated_clk_cell", "x_fdsu_clk"); @514 +gated_clk_cell x_fdsu_clk ( + .clk_in (forever_cpuclk ), + .clk_out (fdsu_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (fdsu_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect(.clk_in (forever_cpuclk), @515 +// .external_en (1'b0), @516 +// .global_en (cp0_yy_clk_en), @517 +// .module_en (cp0_fpu_icg_en), @518 +// .local_en (fdsu_clk_en), @519 +// .clk_out (fdsu_clk)); @520 + +assign ex1_pipe_clk_en = ex1_pipedown_gate; +// &Instance("gated_clk_cell","x_ex1_pipe_clk"); @523 +gated_clk_cell x_ex1_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex1_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex1_pipe_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @524 +// .clk_out (ex1_pipe_clk),//Out Clock @525 +// .external_en (1'b0), @526 +// .global_en (cp0_yy_clk_en), @527 +// .local_en (ex1_pipe_clk_en),//Local Condition @528 +// .module_en (cp0_fpu_icg_en) @529 +// ); @530 + +assign ex2_pipe_clk_en = ex2_pipedown; +// &Instance("gated_clk_cell","x_ex2_pipe_clk"); @533 +gated_clk_cell x_ex2_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex2_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex2_pipe_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @534 +// .clk_out (ex2_pipe_clk),//Out Clock @535 +// .external_en (1'b0), @536 +// .global_en (cp0_yy_clk_en), @537 +// .local_en (ex2_pipe_clk_en),//Local Condition @538 +// .module_en (cp0_fpu_icg_en) @539 +// ); @540 +// &Force("output", "ex2_pipe_clk"); @541 + +assign expnt_rst_clk_en = ex1_save_op0_gate + || ex1_pipedown_gate + || ex2_pipedown + || ex3_pipedown; +// &Instance("gated_clk_cell", "x_expnt_rst_clk"); @547 +gated_clk_cell x_expnt_rst_clk ( + .clk_in (forever_cpuclk ), + .clk_out (expnt_rst_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (expnt_rst_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect(.clk_in (forever_cpuclk), @548 +// .external_en (1'b0), @549 +// .global_en (cp0_yy_clk_en), @550 +// .module_en (cp0_fpu_icg_en), @551 +// .local_en (expnt_rst_clk_en), @552 +// .clk_out (expnt_rst_clk)); @553 + +//========================================================== +// Output Signal +//========================================================== +assign fdsu_yy_wb_freg[4:0] = fdsu_wb_freg[4:0]; +assign fdsu_yy_result_sign = fdsu_result_sign; +assign fdsu_yy_op0_norm = fdsu_op0_norm; +assign fdsu_yy_op1_norm = fdsu_op1_norm; +assign fdsu_yy_of_rm_lfn = fdsu_of_rm_lfn; +assign fdsu_yy_div = fdsu_div; +assign fdsu_yy_sqrt = fdsu_sqrt; +assign fdsu_yy_rm[2:0] = fdsu_rm[2:0]; + +assign fdsu_yy_expnt_rst[9:0] = fdsu_expnt_rst[9:0]; +assign ex2_expnt_adder_op0[9:0] = fdsu_expnt_rst[9:0]; + +assign fdsu_yy_result_inf = fdsu_result_inf; +assign fdsu_yy_result_lfn = fdsu_result_lfn; +assign fdsu_yy_of = fdsu_of; +assign fdsu_yy_uf = fdsu_uf; +assign fdsu_yy_potnt_of = fdsu_potnt_of; +assign fdsu_yy_potnt_uf = fdsu_potnt_uf; + +assign ex1_pipedown = ctrl_iter_start || ctrl_xx_ex1_warm_up; +assign ex1_pipedown_gate = ctrl_iter_start_gate || ctrl_xx_ex1_warm_up; +assign ex2_pipedown = ctrl_srt_itering && srt_last_round || ctrl_xx_ex2_warm_up; +assign ex3_pipedown = ctrl_round || ctrl_xx_ex3_warm_up; +// &Force("output", "ex1_pipedown"); @589 +// &Force("output", "ex1_pipedown_gate"); @590 +// &Force("output", "ex2_pipedown"); @591 +// &Force("output", "ex3_pipedown"); @592 + +assign srt_sm_on = ctrl_srt_itering; + +assign fdsu_fpu_ex1_cmplt = fdsu_ex1_inst_vld; +assign fdsu_fpu_ex1_cmplt_dp = ctrl_xx_ex1_cmplt_dp && idu_fpu_ex1_eu_sel[2]; +assign fdsu_fpu_ex1_stall = ctrl_fdsu_ex1_stall; +assign fdsu_frbus_wb_vld = ctrl_result_vld; +// &Force("bus","idu_fpu_ex1_eu_sel",2,0); @600 +assign fdsu_fpu_no_op = !fdsu_busy; +assign ex1_op1_sel = ctrl_wfi2; +assign ex1_save_op0 = ctrl_sm_start && ex1_op0_id && ex1_op1_id_vld; +assign ex1_save_op0_gate = ctrl_sm_start_gate && ex1_op0_id && ex1_op1_id_vld; +// &Force("output", "ex1_save_op0"); @605 +// &Force("output", "ex1_save_op0_gate"); @606 + +assign fdsu_fpu_debug_info[4:0] = {wb_cur_state[1:0], fdsu_cur_state[2:0]}; + +// &ModuleEnd; @610 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ff1.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ff1.v new file mode 100644 index 000000000..b8f2dc56e --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ff1.v @@ -0,0 +1,163 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_ff1( + fanc_shift_num, + frac_bin_val, + frac_num +); + +// &Ports; @24 +input [51:0] frac_num; +output [51:0] fanc_shift_num; +output [12:0] frac_bin_val; + +// &Regs; @25 +reg [51:0] fanc_shift_num; +reg [12:0] frac_bin_val; + +// &Wires; @26 +wire [51:0] frac_num; + + +// &CombBeg; @28 +always @( frac_num[51:0]) +begin +casez(frac_num[51:0]) + 52'b1???????????????????????????????????????????????????: frac_bin_val[12:0] = 13'h0; + 52'b01??????????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1fff; + 52'b001?????????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ffe; + 52'b0001????????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ffd; + 52'b00001???????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ffc; + 52'b000001??????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ffb; + 52'b0000001?????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ffa; + 52'b00000001????????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff9; + 52'b000000001???????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff8; + 52'b0000000001??????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff7; + 52'b00000000001?????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff6; + 52'b000000000001????????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff5; + 52'b0000000000001???????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff4; + 52'b00000000000001??????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff3; + 52'b000000000000001?????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff2; + 52'b0000000000000001????????????????????????????????????: frac_bin_val[12:0] = 13'h1ff1; + 52'b00000000000000001???????????????????????????????????: frac_bin_val[12:0] = 13'h1ff0; + 52'b000000000000000001??????????????????????????????????: frac_bin_val[12:0] = 13'h1fef; + 52'b0000000000000000001?????????????????????????????????: frac_bin_val[12:0] = 13'h1fee; + 52'b00000000000000000001????????????????????????????????: frac_bin_val[12:0] = 13'h1fed; + 52'b000000000000000000001???????????????????????????????: frac_bin_val[12:0] = 13'h1fec; + 52'b0000000000000000000001??????????????????????????????: frac_bin_val[12:0] = 13'h1feb; + 52'b00000000000000000000001?????????????????????????????: frac_bin_val[12:0] = 13'h1fea; + 52'b000000000000000000000001????????????????????????????: frac_bin_val[12:0] = 13'h1fe9; + 52'b0000000000000000000000001???????????????????????????: frac_bin_val[12:0] = 13'h1fe8; + 52'b00000000000000000000000001??????????????????????????: frac_bin_val[12:0] = 13'h1fe7; + 52'b000000000000000000000000001?????????????????????????: frac_bin_val[12:0] = 13'h1fe6; + 52'b0000000000000000000000000001????????????????????????: frac_bin_val[12:0] = 13'h1fe5; + 52'b00000000000000000000000000001???????????????????????: frac_bin_val[12:0] = 13'h1fe4; + 52'b000000000000000000000000000001??????????????????????: frac_bin_val[12:0] = 13'h1fe3; + 52'b0000000000000000000000000000001?????????????????????: frac_bin_val[12:0] = 13'h1fe2; + 52'b00000000000000000000000000000001????????????????????: frac_bin_val[12:0] = 13'h1fe1; + 52'b000000000000000000000000000000001???????????????????: frac_bin_val[12:0] = 13'h1fe0; + 52'b0000000000000000000000000000000001??????????????????: frac_bin_val[12:0] = 13'h1fdf; + 52'b00000000000000000000000000000000001?????????????????: frac_bin_val[12:0] = 13'h1fde; + 52'b000000000000000000000000000000000001????????????????: frac_bin_val[12:0] = 13'h1fdd; + 52'b0000000000000000000000000000000000001???????????????: frac_bin_val[12:0] = 13'h1fdc; + 52'b00000000000000000000000000000000000001??????????????: frac_bin_val[12:0] = 13'h1fdb; + 52'b000000000000000000000000000000000000001?????????????: frac_bin_val[12:0] = 13'h1fda; + 52'b0000000000000000000000000000000000000001????????????: frac_bin_val[12:0] = 13'h1fd9; + 52'b00000000000000000000000000000000000000001???????????: frac_bin_val[12:0] = 13'h1fd8; + 52'b000000000000000000000000000000000000000001??????????: frac_bin_val[12:0] = 13'h1fd7; + 52'b0000000000000000000000000000000000000000001?????????: frac_bin_val[12:0] = 13'h1fd6; + 52'b00000000000000000000000000000000000000000001????????: frac_bin_val[12:0] = 13'h1fd5; + 52'b000000000000000000000000000000000000000000001???????: frac_bin_val[12:0] = 13'h1fd4; + 52'b0000000000000000000000000000000000000000000001??????: frac_bin_val[12:0] = 13'h1fd3; + 52'b00000000000000000000000000000000000000000000001?????: frac_bin_val[12:0] = 13'h1fd2; + 52'b000000000000000000000000000000000000000000000001????: frac_bin_val[12:0] = 13'h1fd1; + 52'b0000000000000000000000000000000000000000000000001???: frac_bin_val[12:0] = 13'h1fd0; + 52'b00000000000000000000000000000000000000000000000001??: frac_bin_val[12:0] = 13'h1fcf; + 52'b000000000000000000000000000000000000000000000000001?: frac_bin_val[12:0] = 13'h1fce; + 52'b0000000000000000000000000000000000000000000000000001: frac_bin_val[12:0] = 13'h1fcd; + 52'b0000000000000000000000000000000000000000000000000000: frac_bin_val[12:0] = 13'h1fcc; + default : frac_bin_val[12:0] = 13'h000; +endcase +// &CombEnd; @85 +end + +// &CombBeg; @87 +always @( frac_num[51:0]) +begin +casez(frac_num[51:0]) + 52'b1???????????????????????????????????????????????????: fanc_shift_num[51:0] = frac_num[51:0]; + 52'b01??????????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[50:0],1'b0}; + 52'b001?????????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[49:0],2'b0}; + 52'b0001????????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[48:0],3'b0}; + 52'b00001???????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[47:0],4'b0}; + 52'b000001??????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[46:0],5'b0}; + 52'b0000001?????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[45:0],6'b0}; + 52'b00000001????????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[44:0],7'b0}; + 52'b000000001???????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[43:0],8'b0}; + 52'b0000000001??????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[42:0],9'b0}; + 52'b00000000001?????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[41:0],10'b0}; + 52'b000000000001????????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[40:0],11'b0}; + 52'b0000000000001???????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[39:0],12'b0}; + 52'b00000000000001??????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[38:0],13'b0}; + 52'b000000000000001?????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[37:0],14'b0}; + 52'b0000000000000001????????????????????????????????????: fanc_shift_num[51:0] = {frac_num[36:0],15'b0}; + 52'b00000000000000001???????????????????????????????????: fanc_shift_num[51:0] = {frac_num[35:0],16'b0}; + 52'b000000000000000001??????????????????????????????????: fanc_shift_num[51:0] = {frac_num[34:0],17'b0}; + 52'b0000000000000000001?????????????????????????????????: fanc_shift_num[51:0] = {frac_num[33:0],18'b0}; + 52'b00000000000000000001????????????????????????????????: fanc_shift_num[51:0] = {frac_num[32:0],19'b0}; + 52'b000000000000000000001???????????????????????????????: fanc_shift_num[51:0] = {frac_num[31:0],20'b0}; + 52'b0000000000000000000001??????????????????????????????: fanc_shift_num[51:0] = {frac_num[30:0],21'b0}; + 52'b00000000000000000000001?????????????????????????????: fanc_shift_num[51:0] = {frac_num[29:0],22'b0}; + 52'b000000000000000000000001????????????????????????????: fanc_shift_num[51:0] = {frac_num[28:0],23'b0}; + 52'b0000000000000000000000001???????????????????????????: fanc_shift_num[51:0] = {frac_num[27:0],24'b0}; + 52'b00000000000000000000000001??????????????????????????: fanc_shift_num[51:0] = {frac_num[26:0],25'b0}; + 52'b000000000000000000000000001?????????????????????????: fanc_shift_num[51:0] = {frac_num[25:0],26'b0}; + 52'b0000000000000000000000000001????????????????????????: fanc_shift_num[51:0] = {frac_num[24:0],27'b0}; + 52'b00000000000000000000000000001???????????????????????: fanc_shift_num[51:0] = {frac_num[23:0],28'b0}; + 52'b000000000000000000000000000001??????????????????????: fanc_shift_num[51:0] = {frac_num[22:0],29'b0}; + 52'b0000000000000000000000000000001?????????????????????: fanc_shift_num[51:0] = {frac_num[21:0],30'b0}; + 52'b00000000000000000000000000000001????????????????????: fanc_shift_num[51:0] = {frac_num[20:0],31'b0}; + 52'b000000000000000000000000000000001???????????????????: fanc_shift_num[51:0] = {frac_num[19:0],32'b0}; + 52'b0000000000000000000000000000000001??????????????????: fanc_shift_num[51:0] = {frac_num[18:0],33'b0}; + 52'b00000000000000000000000000000000001?????????????????: fanc_shift_num[51:0] = {frac_num[17:0],34'b0}; + 52'b000000000000000000000000000000000001????????????????: fanc_shift_num[51:0] = {frac_num[16:0],35'b0}; + 52'b0000000000000000000000000000000000001???????????????: fanc_shift_num[51:0] = {frac_num[15:0],36'b0}; + 52'b00000000000000000000000000000000000001??????????????: fanc_shift_num[51:0] = {frac_num[14:0],37'b0}; + 52'b000000000000000000000000000000000000001?????????????: fanc_shift_num[51:0] = {frac_num[13:0],38'b0}; + 52'b0000000000000000000000000000000000000001????????????: fanc_shift_num[51:0] = {frac_num[12:0],39'b0}; + 52'b00000000000000000000000000000000000000001???????????: fanc_shift_num[51:0] = {frac_num[11:0],40'b0}; + 52'b000000000000000000000000000000000000000001??????????: fanc_shift_num[51:0] = {frac_num[10:0],41'b0}; + 52'b0000000000000000000000000000000000000000001?????????: fanc_shift_num[51:0] = {frac_num[9:0],42'b0}; + 52'b00000000000000000000000000000000000000000001????????: fanc_shift_num[51:0] = {frac_num[8:0],43'b0}; + 52'b000000000000000000000000000000000000000000001???????: fanc_shift_num[51:0] = {frac_num[7:0],44'b0}; + 52'b0000000000000000000000000000000000000000000001??????: fanc_shift_num[51:0] = {frac_num[6:0],45'b0}; + 52'b00000000000000000000000000000000000000000000001?????: fanc_shift_num[51:0] = {frac_num[5:0],46'b0}; + 52'b000000000000000000000000000000000000000000000001????: fanc_shift_num[51:0] = {frac_num[4:0],47'b0}; + 52'b0000000000000000000000000000000000000000000000001???: fanc_shift_num[51:0] = {frac_num[3:0],48'b0}; + 52'b00000000000000000000000000000000000000000000000001??: fanc_shift_num[51:0] = {frac_num[2:0],49'b0}; + 52'b000000000000000000000000000000000000000000000000001?: fanc_shift_num[51:0] = {frac_num[1:0],50'b0}; + 52'b0000000000000000000000000000000000000000000000000001: fanc_shift_num[51:0] = {frac_num[0:0],51'b0}; + 52'b0000000000000000000000000000000000000000000000000000: fanc_shift_num[51:0] = {52'b0}; + default : fanc_shift_num[51:0] = {52'b0}; +endcase +// &CombEnd; @144 +end + +// &ModuleEnd; @146 +endmodule + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v new file mode 100644 index 000000000..87139a253 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v @@ -0,0 +1,275 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_pack_single( + fdsu_ex4_denorm_to_tiny_frac, + fdsu_ex4_frac, + fdsu_ex4_nx, + fdsu_ex4_potnt_norm, + fdsu_ex4_result_nor, + fdsu_frbus_data, + fdsu_frbus_fflags, + fdsu_frbus_freg, + fdsu_yy_expnt_rst, + fdsu_yy_of, + fdsu_yy_of_rm_lfn, + fdsu_yy_potnt_of, + fdsu_yy_potnt_uf, + fdsu_yy_result_inf, + fdsu_yy_result_lfn, + fdsu_yy_result_sign, + fdsu_yy_rslt_denorm, + fdsu_yy_uf, + fdsu_yy_wb_freg +); + +// &Ports; @24 +input fdsu_ex4_denorm_to_tiny_frac; +input [25:0] fdsu_ex4_frac; +input fdsu_ex4_nx; +input [1 :0] fdsu_ex4_potnt_norm; +input fdsu_ex4_result_nor; +input [9 :0] fdsu_yy_expnt_rst; +input fdsu_yy_of; +input fdsu_yy_of_rm_lfn; +input fdsu_yy_potnt_of; +input fdsu_yy_potnt_uf; +input fdsu_yy_result_inf; +input fdsu_yy_result_lfn; +input fdsu_yy_result_sign; +input fdsu_yy_rslt_denorm; +input fdsu_yy_uf; +input [4 :0] fdsu_yy_wb_freg; +output [31:0] fdsu_frbus_data; +output [4 :0] fdsu_frbus_fflags; +output [4 :0] fdsu_frbus_freg; + +// &Regs; @25 +reg [22:0] ex4_frac_23; +reg [31:0] ex4_result; +reg [22:0] ex4_single_denorm_frac; +reg [9 :0] expnt_add_op1; + +// &Wires; @26 +wire ex4_cor_nx; +wire ex4_cor_uf; +wire ex4_denorm_potnt_norm; +wire [31:0] ex4_denorm_result; +wire [9 :0] ex4_expnt_rst; +wire [4 :0] ex4_expt; +wire ex4_final_rst_norm; +wire [25:0] ex4_frac; +wire ex4_of_plus; +wire ex4_result_inf; +wire ex4_result_lfn; +wire ex4_rslt_denorm; +wire [31:0] ex4_rst_inf; +wire [31:0] ex4_rst_lfn; +wire ex4_rst_nor; +wire [31:0] ex4_rst_norm; +wire ex4_uf_plus; +wire fdsu_ex4_denorm_to_tiny_frac; +wire fdsu_ex4_dz; +wire [9 :0] fdsu_ex4_expnt_rst; +wire [25:0] fdsu_ex4_frac; +wire fdsu_ex4_nv; +wire fdsu_ex4_nx; +wire fdsu_ex4_of; +wire fdsu_ex4_of_rst_lfn; +wire [1 :0] fdsu_ex4_potnt_norm; +wire fdsu_ex4_potnt_of; +wire fdsu_ex4_potnt_uf; +wire fdsu_ex4_result_inf; +wire fdsu_ex4_result_lfn; +wire fdsu_ex4_result_nor; +wire fdsu_ex4_result_sign; +wire fdsu_ex4_rslt_denorm; +wire fdsu_ex4_uf; +wire [31:0] fdsu_frbus_data; +wire [4 :0] fdsu_frbus_fflags; +wire [4 :0] fdsu_frbus_freg; +wire [9 :0] fdsu_yy_expnt_rst; +wire fdsu_yy_of; +wire fdsu_yy_of_rm_lfn; +wire fdsu_yy_potnt_of; +wire fdsu_yy_potnt_uf; +wire fdsu_yy_result_inf; +wire fdsu_yy_result_lfn; +wire fdsu_yy_result_sign; +wire fdsu_yy_rslt_denorm; +wire fdsu_yy_uf; +wire [4 :0] fdsu_yy_wb_freg; + + +assign fdsu_ex4_result_sign = fdsu_yy_result_sign; +assign fdsu_ex4_of_rst_lfn = fdsu_yy_of_rm_lfn; +assign fdsu_ex4_result_inf = fdsu_yy_result_inf; +assign fdsu_ex4_result_lfn = fdsu_yy_result_lfn; +assign fdsu_ex4_of = fdsu_yy_of; +assign fdsu_ex4_uf = fdsu_yy_uf; +assign fdsu_ex4_potnt_of = fdsu_yy_potnt_of; +assign fdsu_ex4_potnt_uf = fdsu_yy_potnt_uf; +assign fdsu_ex4_nv = 1'b0; +assign fdsu_ex4_dz = 1'b0; +assign fdsu_ex4_expnt_rst[9:0] = fdsu_yy_expnt_rst[9:0]; +assign fdsu_ex4_rslt_denorm = fdsu_yy_rslt_denorm; +//============================EX4 STAGE===================== +assign ex4_frac[25:0] = fdsu_ex4_frac[25:0]; +//exponent adder +// &CombBeg; @43 +always @( ex4_frac[25:24]) +begin +casez(ex4_frac[25:24]) + 2'b00 : expnt_add_op1[9:0] = 10'h1ff; //the expnt sub 1 + 2'b01 : expnt_add_op1[9:0] = 10'h0; //the expnt stay the origi + 2'b1? : expnt_add_op1[9:0] = 10'h1; // the exptn add 1 + default : expnt_add_op1[9:0] = 10'b0; +endcase +// &CombEnd; @50 +end +assign ex4_expnt_rst[9:0] = fdsu_ex4_expnt_rst[9:0] + + expnt_add_op1[9:0]; + +//==========================Result Pack===================== + +// result denormal pack +// shift to the denormal number +// &CombBeg; @58 +always @( fdsu_ex4_expnt_rst[9:0] + or fdsu_ex4_denorm_to_tiny_frac + or ex4_frac[25:1]) +begin +case(fdsu_ex4_expnt_rst[9:0]) + 10'h1: ex4_single_denorm_frac[22:0] = { ex4_frac[23:1]}; //-1022 1 + 10'h0: ex4_single_denorm_frac[22:0] = { ex4_frac[24:2]}; //-1023 0 + 10'h3ff:ex4_single_denorm_frac[22:0] = { ex4_frac[25:3]}; //-1024 -1 + 10'h3fe:ex4_single_denorm_frac[22:0] = {1'b0, ex4_frac[25:4]}; //-1025 -2 + 10'h3fd:ex4_single_denorm_frac[22:0] = {2'b0, ex4_frac[25:5]}; //-1026 -3 + 10'h3fc:ex4_single_denorm_frac[22:0] = {3'b0, ex4_frac[25:6]}; //-1027 -4 + 10'h3fb:ex4_single_denorm_frac[22:0] = {4'b0, ex4_frac[25:7]}; //-1028 -5 + 10'h3fa:ex4_single_denorm_frac[22:0] = {5'b0, ex4_frac[25:8]}; //-1029 -6 + 10'h3f9:ex4_single_denorm_frac[22:0] = {6'b0, ex4_frac[25:9]}; //-1030 -7 + 10'h3f8:ex4_single_denorm_frac[22:0] = {7'b0, ex4_frac[25:10]}; //-1031 -8 + 10'h3f7:ex4_single_denorm_frac[22:0] = {8'b0, ex4_frac[25:11]}; //-1032 -9 + 10'h3f6:ex4_single_denorm_frac[22:0] = {9'b0, ex4_frac[25:12]}; //-1033 -10 + 10'h3f5:ex4_single_denorm_frac[22:0] = {10'b0,ex4_frac[25:13]}; //-1034 -11 + 10'h3f4:ex4_single_denorm_frac[22:0] = {11'b0,ex4_frac[25:14]}; //-1035 -12 + 10'h3f3:ex4_single_denorm_frac[22:0] = {12'b0,ex4_frac[25:15]}; //-1036 -13 + 10'h3f2:ex4_single_denorm_frac[22:0] = {13'b0,ex4_frac[25:16]}; // -1037 + 10'h3f1:ex4_single_denorm_frac[22:0] = {14'b0,ex4_frac[25:17]}; //-1038 + 10'h3f0:ex4_single_denorm_frac[22:0] = {15'b0,ex4_frac[25:18]}; //-1039 + 10'h3ef:ex4_single_denorm_frac[22:0] = {16'b0,ex4_frac[25:19]}; //-1040 + 10'h3ee:ex4_single_denorm_frac[22:0] = {17'b0,ex4_frac[25:20]}; //-1041 + 10'h3ed:ex4_single_denorm_frac[22:0] = {18'b0,ex4_frac[25:21]}; //-1042 + 10'h3ec:ex4_single_denorm_frac[22:0] = {19'b0,ex4_frac[25:22]}; //-1043 + 10'h3eb:ex4_single_denorm_frac[22:0] = {20'b0,ex4_frac[25:23]}; //-1044 + 10'h3ea:ex4_single_denorm_frac[22:0] = {21'b0,ex4_frac[25:24]}; //-1044 + default :ex4_single_denorm_frac[22:0] = fdsu_ex4_denorm_to_tiny_frac ? 23'b1 : 23'b0; //-1045 +endcase +// &CombEnd; @86 +end +//here when denormal number round to add1, it will become normal number +assign ex4_denorm_potnt_norm = (fdsu_ex4_potnt_norm[1] && ex4_frac[24]) || + (fdsu_ex4_potnt_norm[0] && ex4_frac[25]) ; +assign ex4_rslt_denorm = fdsu_ex4_rslt_denorm && !ex4_denorm_potnt_norm; +assign ex4_denorm_result[31:0] = {fdsu_ex4_result_sign, + 8'h0,ex4_single_denorm_frac[22:0]}; + + +//ex4 overflow/underflow plus +assign ex4_rst_nor = fdsu_ex4_result_nor; +assign ex4_of_plus = fdsu_ex4_potnt_of && + (|ex4_frac[25:24]) && + ex4_rst_nor; +assign ex4_uf_plus = fdsu_ex4_potnt_uf && + (~|ex4_frac[25:24]) && + ex4_rst_nor; +//ex4 overflow round result +assign ex4_result_lfn = (ex4_of_plus && fdsu_ex4_of_rst_lfn) || + fdsu_ex4_result_lfn; +assign ex4_result_inf = (ex4_of_plus && !fdsu_ex4_of_rst_lfn) || + fdsu_ex4_result_inf; +//Special Result Form +// result largest finity number +assign ex4_rst_lfn[31:0] = {fdsu_ex4_result_sign,8'hfe,{23{1'b1}}}; +//result infinity +assign ex4_rst_inf[31:0] = {fdsu_ex4_result_sign,8'hff,23'b0}; +//result normal +// &CombBeg; @114 +always @( ex4_frac[25:0]) +begin +casez(ex4_frac[25:24]) + 2'b00 : ex4_frac_23[22:0] = ex4_frac[22:0]; + 2'b01 : ex4_frac_23[22:0] = ex4_frac[23:1]; + 2'b1? : ex4_frac_23[22:0] = ex4_frac[24:2]; + default : ex4_frac_23[22:0] = 23'b0; +endcase +// &CombEnd; @121 +end +assign ex4_rst_norm[31:0] = {fdsu_ex4_result_sign, + ex4_expnt_rst[7:0], + ex4_frac_23[22:0]}; +assign ex4_cor_uf = (fdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) + && fdsu_ex4_nx; +assign ex4_cor_nx = fdsu_ex4_nx + || fdsu_ex4_of + || ex4_of_plus; + +assign ex4_expt[4:0] = { + fdsu_ex4_nv, + fdsu_ex4_dz, + fdsu_ex4_of | ex4_of_plus, + ex4_cor_uf, + ex4_cor_nx}; + +assign ex4_final_rst_norm = !ex4_result_inf && + !ex4_result_lfn && + !ex4_rslt_denorm; +// &CombBeg; @141 +always @( ex4_denorm_result[31:0] + or ex4_result_lfn + or ex4_result_inf + or ex4_final_rst_norm + or ex4_rst_norm[31:0] + or ex4_rst_lfn[31:0] + or ex4_rst_inf[31:0] + or ex4_rslt_denorm) +begin +case({ex4_rslt_denorm, + ex4_result_inf, + ex4_result_lfn, + ex4_final_rst_norm}) + 4'b1000 : ex4_result[31:0] = ex4_denorm_result[31:0]; + 4'b0100 : ex4_result[31:0] = ex4_rst_inf[31:0]; + 4'b0010 : ex4_result[31:0] = ex4_rst_lfn[31:0]; + 4'b0001 : ex4_result[31:0] = ex4_rst_norm[31:0]; + default : ex4_result[31:0] = 32'b0; +endcase +// &CombEnd; @152 +end + +//========================================================== +// Result Generate +//========================================================== +assign fdsu_frbus_freg[4:0] = fdsu_yy_wb_freg[4:0]; +assign fdsu_frbus_data[31:0] = ex4_result[31:0]; +assign fdsu_frbus_fflags[4:0] = ex4_expt[4:0]; + +// &ModuleEnd; @161 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_prepare.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_prepare.v new file mode 100644 index 000000000..f7bc5d2ae --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_prepare.v @@ -0,0 +1,286 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_prepare( + dp_xx_ex1_rm, + ex1_div, + ex1_divisor, + ex1_expnt_adder_op0, + ex1_expnt_adder_op1, + ex1_of_result_lfn, + ex1_op0_id, + ex1_op0_sign, + ex1_op1_id, + ex1_op1_id_vld, + ex1_op1_sel, + ex1_oper_id_expnt, + ex1_oper_id_expnt_f, + ex1_oper_id_frac, + ex1_oper_id_frac_f, + ex1_remainder, + ex1_result_sign, + ex1_rm, + ex1_sqrt, + fdsu_ex1_sel, + idu_fpu_ex1_func, + idu_fpu_ex1_srcf0, + idu_fpu_ex1_srcf1 +); + +// &Ports; @24 +input [2 :0] dp_xx_ex1_rm; +input ex1_op0_id; +input ex1_op1_id; +input ex1_op1_sel; +input [12:0] ex1_oper_id_expnt_f; +input [51:0] ex1_oper_id_frac_f; +input fdsu_ex1_sel; +input [9 :0] idu_fpu_ex1_func; +input [31:0] idu_fpu_ex1_srcf0; +input [31:0] idu_fpu_ex1_srcf1; +output ex1_div; +output [23:0] ex1_divisor; +output [12:0] ex1_expnt_adder_op0; +output [12:0] ex1_expnt_adder_op1; +output ex1_of_result_lfn; +output ex1_op0_sign; +output ex1_op1_id_vld; +output [12:0] ex1_oper_id_expnt; +output [51:0] ex1_oper_id_frac; +output [31:0] ex1_remainder; +output ex1_result_sign; +output [2 :0] ex1_rm; +output ex1_sqrt; + +// &Regs; @25 +reg [12:0] ex1_expnt_adder_op1; +reg ex1_of_result_lfn; + +// &Wires; @26 +wire div_sign; +wire [2 :0] dp_xx_ex1_rm; +wire ex1_div; +wire [52:0] ex1_div_noid_nor_srt_op0; +wire [52:0] ex1_div_noid_nor_srt_op1; +wire [52:0] ex1_div_nor_srt_op0; +wire [52:0] ex1_div_nor_srt_op1; +wire [12:0] ex1_div_op0_expnt; +wire [12:0] ex1_div_op1_expnt; +wire [52:0] ex1_div_srt_op0; +wire [52:0] ex1_div_srt_op1; +wire [23:0] ex1_divisor; +wire ex1_double; +wire [12:0] ex1_expnt_adder_op0; +wire ex1_op0_id; +wire ex1_op0_id_nor; +wire ex1_op0_sign; +wire ex1_op1_id; +wire ex1_op1_id_nor; +wire ex1_op1_id_vld; +wire ex1_op1_sel; +wire ex1_op1_sign; +wire [63:0] ex1_oper0; +wire [51:0] ex1_oper0_frac; +wire [12:0] ex1_oper0_id_expnt; +wire [51:0] ex1_oper0_id_frac; +wire [63:0] ex1_oper1; +wire [51:0] ex1_oper1_frac; +wire [12:0] ex1_oper1_id_expnt; +wire [51:0] ex1_oper1_id_frac; +wire [51:0] ex1_oper_frac; +wire [12:0] ex1_oper_id_expnt; +wire [12:0] ex1_oper_id_expnt_f; +wire [51:0] ex1_oper_id_frac; +wire [51:0] ex1_oper_id_frac_f; +wire [31:0] ex1_remainder; +wire ex1_result_sign; +wire [2 :0] ex1_rm; +wire ex1_single; +wire ex1_sqrt; +wire ex1_sqrt_expnt_odd; +wire ex1_sqrt_op0_expnt_0; +wire [12:0] ex1_sqrt_op1_expnt; +wire [52:0] ex1_sqrt_srt_op0; +wire fdsu_ex1_sel; +wire [9 :0] idu_fpu_ex1_func; +wire [31:0] idu_fpu_ex1_srcf0; +wire [31:0] idu_fpu_ex1_srcf1; +wire [59:0] sqrt_remainder; +wire sqrt_sign; + + +assign ex1_sqrt = idu_fpu_ex1_func[0]; +assign ex1_div = idu_fpu_ex1_func[1]; +assign ex1_oper0[63:0] = {32'b0, idu_fpu_ex1_srcf0[31:0] & {32{fdsu_ex1_sel}}}; +assign ex1_oper1[63:0] = {32'b0, idu_fpu_ex1_srcf1[31:0] & {32{fdsu_ex1_sel}}}; +assign ex1_double = 1'b0; +assign ex1_single = 1'b1; +// &Force("bus", "idu_fpu_ex1_func", 9, 0); @43 +assign ex1_op0_id_nor = ex1_op0_id; +assign ex1_op1_id_nor = ex1_op1_id; + +//Sign bit prepare +assign ex1_op0_sign = ex1_double && ex1_oper0[63] + || ex1_single && ex1_oper0[31]; +assign ex1_op1_sign = ex1_double && ex1_oper1[63] + || ex1_single && ex1_oper1[31]; +assign div_sign = ex1_op0_sign ^ ex1_op1_sign; +assign sqrt_sign = ex1_op0_sign; +assign ex1_result_sign = (ex1_div) + ? div_sign + : sqrt_sign; + +//=====================find first one======================= +// this is for the denormal number +assign ex1_oper_frac[51:0] = ex1_op1_sel ? ex1_oper1_frac[51:0] + : ex1_oper0_frac[51:0]; + +// &Instance("pa_fdsu_ff1", "x_frac_expnt"); @63 +pa_fdsu_ff1 x_frac_expnt ( + .fanc_shift_num (ex1_oper_id_frac[51:0] ), + .frac_bin_val (ex1_oper_id_expnt[12:0]), + .frac_num (ex1_oper_frac[51:0] ) +); + +// &Connect(.frac_num(ex1_oper_frac[51:0])); @64 +// &Connect(.frac_bin_val(ex1_oper_id_expnt[12:0])); @65 +// &Connect(.fanc_shift_num(ex1_oper_id_frac[51:0])); @66 +// &Force("output", "ex1_oper_id_expnt"); &Force("bus", "ex1_oper_id_expnt", 12, 0); @67 +// &Force("output", "ex1_oper_id_frac"); &Force("bus", "ex1_oper_id_frac", 51, 0); @68 + +assign ex1_oper0_id_expnt[12:0] = ex1_op1_sel ? ex1_oper_id_expnt_f[12:0] + : ex1_oper_id_expnt[12:0]; +assign ex1_oper0_id_frac[51:0] = ex1_op1_sel ? ex1_oper_id_frac_f[51:0] + : ex1_oper_id_frac[51:0]; +assign ex1_oper1_id_expnt[12:0] = ex1_oper_id_expnt[12:0]; +assign ex1_oper1_id_frac[51:0] = ex1_oper_id_frac[51:0]; + +assign ex1_oper0_frac[51:0] = {52{ex1_double}} & ex1_oper0[51:0] + | {52{ex1_single}} & {ex1_oper0[22:0],29'b0}; +assign ex1_oper1_frac[51:0] = {52{ex1_double}} & ex1_oper1[51:0] + | {52{ex1_single}} & {ex1_oper1[22:0],29'b0}; + +//=====================exponent add========================= +//exponent number 0 +assign ex1_div_op0_expnt[12:0] = {13{ex1_double}} & {2'b0,ex1_oper0[62:52]} + | {13{ex1_single}} & {5'b0,ex1_oper0[30:23]}; +assign ex1_expnt_adder_op0[12:0] = ex1_op0_id_nor ? ex1_oper0_id_expnt[12:0] + : ex1_div_op0_expnt[12:0]; +//exponent number 1 +assign ex1_div_op1_expnt[12:0] = {13{ex1_double}} & {2'b0,ex1_oper1[62:52]} + | {13{ex1_single}} & {5'b0,ex1_oper1[30:23]}; +assign ex1_sqrt_op1_expnt[12:0] = {13{ex1_double}} & {3'b0,{10{1'b1}}} //'d1023 + | {13{ex1_single}} & {6'b0,{7{1'b1}}}; //'d127 +// &CombBeg; @93 +always @( ex1_oper1_id_expnt[12:0] + or ex1_div + or ex1_op1_id_nor + or ex1_sqrt_op1_expnt[12:0] + or ex1_sqrt + or ex1_div_op1_expnt[12:0]) +begin +case({ex1_div,ex1_sqrt}) + 2'b10: ex1_expnt_adder_op1[12:0] = ex1_op1_id_nor ? ex1_oper1_id_expnt[12:0] + : ex1_div_op1_expnt[12:0]; + 2'b01: ex1_expnt_adder_op1[12:0] = ex1_sqrt_op1_expnt[12:0]; + default: ex1_expnt_adder_op1[12:0] = 13'b0; +endcase +// &CombEnd; @100 +end + +//ex1_sqrt_expnt_odd +//fraction will shift left by 1 +// adder_op0/1 timing is bad. +// assign ex1_sqrt_expnt_odd = ex1_expnt_adder_op0[0] ^ ex1_expnt_adder_op1[0]; + +// sqrt_odd is only used when is sqrt. +assign ex1_sqrt_op0_expnt_0 = ex1_op0_id_nor ? ex1_oper_id_expnt[0] + : ex1_div_op0_expnt[0]; +// ex1_expnt_adder_op1 is always 1'b1, so adder_op0[0] should be 0. +assign ex1_sqrt_expnt_odd = !ex1_sqrt_op0_expnt_0; + +assign ex1_rm[2:0] = dp_xx_ex1_rm[2:0]; +//RNE : Always inc 1 because round to nearest of 1.111...11 +//RTZ : Always not inc 1 +//RUP : Always not inc 1 when posetive +//RDN : Always not inc 1 when negative +//RMM : Always inc 1 because round to max magnitude +// &CombBeg; @119 +always @( ex1_rm[2:0] + or ex1_result_sign) +begin +case(ex1_rm[2:0]) + 3'b000 : ex1_of_result_lfn = 1'b0; + 3'b001 : ex1_of_result_lfn = 1'b1; + 3'b010 : ex1_of_result_lfn = !ex1_result_sign; + 3'b011 : ex1_of_result_lfn = ex1_result_sign; + 3'b100 : ex1_of_result_lfn = 1'b0; + default: ex1_of_result_lfn = 1'b0; +endcase +// &CombEnd; @128 +end + +//EX1 Remainder +//div : 1/8 <= x < 1/4 +//sqrt : 1/16 <= x < 1/4 +assign ex1_remainder[31:0] = {32{ex1_div }} & {5'b0,ex1_div_srt_op0[52:28],2'b0} | + {32{ex1_sqrt}} & sqrt_remainder[59:28]; + +//EX1 Divisor +//1/2 <= y < 1 +assign ex1_divisor[23:0] = ex1_div_srt_op1[52:29]; + +//ex1_div_srt_op0 +assign ex1_div_srt_op0[52:0] = ex1_div_nor_srt_op0[52:0]; +//ex1_div_srt_op1 +assign ex1_div_srt_op1[52:0] = ex1_div_nor_srt_op1[52:0]; +//ex1_div_nor_srt_op0 +assign ex1_div_noid_nor_srt_op0[52:0] = {53{ex1_double}} & {1'b1,ex1_oper0[51:0]} + | {53{ex1_single}} & {1'b1,ex1_oper0[22:0],29'b0}; +assign ex1_div_nor_srt_op0[52:0] = ex1_op0_id_nor ? {ex1_oper0_id_frac[51:0],1'b0} + : ex1_div_noid_nor_srt_op0[52:0]; +//ex1_div_nor_srt_op1 +assign ex1_div_noid_nor_srt_op1[52:0] = {53{ex1_double}} & {1'b1,ex1_oper1[51:0]} + | {53{ex1_single}} & {1'b1,ex1_oper1[22:0],29'b0}; +assign ex1_div_nor_srt_op1[52:0] = ex1_op1_id_nor ? {ex1_oper1_id_frac[51:0],1'b0} + : ex1_div_noid_nor_srt_op1[52:0]; +//sqrt_remainder +assign sqrt_remainder[59:0] = (ex1_sqrt_expnt_odd) + ? {5'b0,ex1_sqrt_srt_op0[52:0],2'b0} + : {6'b0,ex1_sqrt_srt_op0[52:0],1'b0}; +//ex1_sqrt_srt_op0 +assign ex1_sqrt_srt_op0[52:0] = ex1_div_srt_op0[52:0]; + +//========================Pipe to EX2======================= +//exponent register cal result +// &Force("output", "ex1_expnt_adder_op0"); &Force("bus", "ex1_expnt_adder_op0", 12, 0); @173 +// &Force("output", "ex1_expnt_adder_op1"); &Force("bus", "ex1_expnt_adder_op1", 12, 0); @174 +// &Force("output", "ex1_double"); @175 +// &Force("output", "ex1_expnt_adder_op0"); &Force("bus", "ex1_expnt_adder_op0", 12, 0); @177 +// &Force("output", "ex1_expnt_adder_op1"); &Force("bus", "ex1_expnt_adder_op1", 12, 0); @178 +// &Force("output", "ex1_result_sign"); @180 +// &Force("output", "ex1_div"); @181 +// &Force("output", "ex1_sqrt"); @182 +// &Force("output", "ex1_rm"); &Force("bus", "ex1_rm", 2, 0); @183 +// &Force("output", "ex1_op0_sign"); @184 + +assign ex1_op1_id_vld = ex1_op1_id_nor && ex1_div; + +// &ModuleEnd; @188 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_round_single.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_round_single.v new file mode 100644 index 000000000..7d6acbb4b --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_round_single.v @@ -0,0 +1,540 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_round_single( + cp0_fpu_icg_en, + cp0_yy_clk_en, + ex3_expnt_adjust_result, + ex3_frac_final_rst, + ex3_pipedown, + ex3_rslt_denorm, + fdsu_ex3_id_srt_skip, + fdsu_ex3_rem_sign, + fdsu_ex3_rem_zero, + fdsu_ex3_result_denorm_round_add_num, + fdsu_ex4_denorm_to_tiny_frac, + fdsu_ex4_nx, + fdsu_ex4_potnt_norm, + fdsu_ex4_result_nor, + fdsu_yy_expnt_rst, + fdsu_yy_result_inf, + fdsu_yy_result_lfn, + fdsu_yy_result_sign, + fdsu_yy_rm, + fdsu_yy_rslt_denorm, + forever_cpuclk, + pad_yy_icg_scan_en, + total_qt_rt_30 +); + +// &Ports; @24 +input cp0_fpu_icg_en; +input cp0_yy_clk_en; +input ex3_pipedown; +input fdsu_ex3_id_srt_skip; +input fdsu_ex3_rem_sign; +input fdsu_ex3_rem_zero; +input [23:0] fdsu_ex3_result_denorm_round_add_num; +input [9 :0] fdsu_yy_expnt_rst; +input fdsu_yy_result_inf; +input fdsu_yy_result_lfn; +input fdsu_yy_result_sign; +input [2 :0] fdsu_yy_rm; +input fdsu_yy_rslt_denorm; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input [29:0] total_qt_rt_30; +output [9 :0] ex3_expnt_adjust_result; +output [25:0] ex3_frac_final_rst; +output ex3_rslt_denorm; +output fdsu_ex4_denorm_to_tiny_frac; +output fdsu_ex4_nx; +output [1 :0] fdsu_ex4_potnt_norm; +output fdsu_ex4_result_nor; + +// &Regs; @25 +reg denorm_to_tiny_frac; +reg fdsu_ex4_denorm_to_tiny_frac; +reg fdsu_ex4_nx; +reg [1 :0] fdsu_ex4_potnt_norm; +reg fdsu_ex4_result_nor; +reg [25:0] frac_add1_op1; +reg frac_add_1; +reg frac_orig; +reg [25:0] frac_sub1_op1; +reg frac_sub_1; +reg [27:0] qt_result_single_denorm_for_round; +reg single_denorm_lst_frac; + +// &Wires; @26 +wire cp0_fpu_icg_en; +wire cp0_yy_clk_en; +wire ex3_denorm_eq; +wire ex3_denorm_gr; +wire ex3_denorm_lst_frac; +wire ex3_denorm_nx; +wire ex3_denorm_plus; +wire ex3_denorm_potnt_norm; +wire ex3_denorm_zero; +wire [9 :0] ex3_expnt_adjst; +wire [9 :0] ex3_expnt_adjust_result; +wire [25:0] ex3_frac_final_rst; +wire ex3_nx; +wire ex3_pipe_clk; +wire ex3_pipe_clk_en; +wire ex3_pipedown; +wire [1 :0] ex3_potnt_norm; +wire ex3_qt_eq; +wire ex3_qt_gr; +wire ex3_qt_sing_lo3_not0; +wire ex3_qt_sing_lo4_not0; +wire ex3_qt_zero; +wire ex3_rslt_denorm; +wire ex3_rst_eq_1; +wire ex3_rst_nor; +wire ex3_single_denorm_eq; +wire ex3_single_denorm_gr; +wire ex3_single_denorm_zero; +wire ex3_single_low_not_zero; +wire [9 :0] fdsu_ex3_expnt_rst; +wire fdsu_ex3_id_srt_skip; +wire fdsu_ex3_rem_sign; +wire fdsu_ex3_rem_zero; +wire [23:0] fdsu_ex3_result_denorm_round_add_num; +wire fdsu_ex3_result_inf; +wire fdsu_ex3_result_lfn; +wire fdsu_ex3_result_sign; +wire [2 :0] fdsu_ex3_rm; +wire fdsu_ex3_rslt_denorm; +wire [9 :0] fdsu_yy_expnt_rst; +wire fdsu_yy_result_inf; +wire fdsu_yy_result_lfn; +wire fdsu_yy_result_sign; +wire [2 :0] fdsu_yy_rm; +wire fdsu_yy_rslt_denorm; +wire forever_cpuclk; +wire [25:0] frac_add1_op1_with_denorm; +wire [25:0] frac_add1_rst; +wire frac_denorm_rdn_add_1; +wire frac_denorm_rdn_sub_1; +wire frac_denorm_rmm_add_1; +wire frac_denorm_rne_add_1; +wire frac_denorm_rtz_sub_1; +wire frac_denorm_rup_add_1; +wire frac_denorm_rup_sub_1; +wire [25:0] frac_final_rst; +wire frac_rdn_add_1; +wire frac_rdn_sub_1; +wire frac_rmm_add_1; +wire frac_rne_add_1; +wire frac_rtz_sub_1; +wire frac_rup_add_1; +wire frac_rup_sub_1; +wire [25:0] frac_sub1_op1_with_denorm; +wire [25:0] frac_sub1_rst; +wire pad_yy_icg_scan_en; +wire [29:0] total_qt_rt_30; + + +assign fdsu_ex3_result_sign = fdsu_yy_result_sign; +assign fdsu_ex3_expnt_rst[9:0] = fdsu_yy_expnt_rst[9:0]; +assign fdsu_ex3_result_inf = fdsu_yy_result_inf; +assign fdsu_ex3_result_lfn = fdsu_yy_result_lfn; +assign fdsu_ex3_rm[2:0] = fdsu_yy_rm[2:0]; +assign fdsu_ex3_rslt_denorm = fdsu_yy_rslt_denorm; +//=======================Round Rule========================= +//1/8 <= x < 1/4, 1/2 <= y < 1, => 1/8 < z < 1/2 +//q[29:0] represent the fraction part result of quotient, q[29] for 1/2 +//Thus the first "1" in 30 bit quotient will be in q[28] or q[27] +//For Single Float +//15 round to get 30 bit quotient, 23+1 bit as valid result, other for round +//if q[28] is 1, q[28:5] as 1.xxxx valid result, [4:0] for round +//if q[28] is 0, q[27:4] as 1.xxxx valid result, [3:0] for round +// &Force("bus","total_qt_rt_30",29,0); @42 +assign ex3_qt_sing_lo4_not0 = |total_qt_rt_30[3:0]; +assign ex3_qt_sing_lo3_not0 = |total_qt_rt_30[2:0]; +//the quotient round bits great than "10000"(ronnd bits 10..0) +assign ex3_qt_gr = (total_qt_rt_30[28]) + ? total_qt_rt_30[4] && ex3_qt_sing_lo4_not0 + : total_qt_rt_30[3] && ex3_qt_sing_lo3_not0; + +//the quotient round bits is equal to "10000"(ronnd bits 10..0) +assign ex3_qt_eq = (total_qt_rt_30[28]) + ? total_qt_rt_30[4] && !ex3_qt_sing_lo4_not0 + : total_qt_rt_30[3] && !ex3_qt_sing_lo3_not0; +//the quotient round bits is zero +assign ex3_qt_zero = (total_qt_rt_30[28]) + ? ~|total_qt_rt_30[4:0] + : ~|total_qt_rt_30[3:0]; +//quotient is 1.00000..00 need special dealt with in the following +assign ex3_rst_eq_1 = total_qt_rt_30[28] && ~|total_qt_rt_30[27:5]; +// for denormal result, first select the quotation num for rounding +// specially for the result e=-126 and e=-1022,the denorm depends on the +// MSB of the quotient +assign ex3_denorm_plus = !total_qt_rt_30[28] && (fdsu_ex3_expnt_rst[9:0] == 10'h382); +assign ex3_denorm_potnt_norm = total_qt_rt_30[28] && (fdsu_ex3_expnt_rst[9:0] == 10'h381); +assign ex3_rslt_denorm = ex3_denorm_plus || fdsu_ex3_rslt_denorm; +// &Force("output", "ex3_rslt_denorm"); @66 + +//denomal result, check for rounding further optimization can be done in +//future +// &CombBeg; @70 +always @( total_qt_rt_30[28:0] + or fdsu_ex3_expnt_rst[9:0]) +begin +case(fdsu_ex3_expnt_rst[9:0]) + 10'h382:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[4:0],23'b0}; //-126 1 + single_denorm_lst_frac = total_qt_rt_30[5]; + end//-1022 1 + 10'h381:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[5:0],22'b0}; //-127 0 + single_denorm_lst_frac = total_qt_rt_30[6]; + end//-1022 1 + 10'h380:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[6:0],21'b0}; //-128 -1 + single_denorm_lst_frac = total_qt_rt_30[7]; + end//-1022 1 + 10'h37f:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[7:0],20'b0}; //-129 -2 + single_denorm_lst_frac = total_qt_rt_30[8]; + end//-1022 1 + 10'h37e:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[8:0],19'b0}; //-130 -3 + single_denorm_lst_frac = total_qt_rt_30[9]; + end//-1022 1 + 10'h37d:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[9:0],18'b0}; //-131 -4 + single_denorm_lst_frac = total_qt_rt_30[10]; + end//-1022 1 + 10'h37c:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[10:0],17'b0}; //-132 -5 + single_denorm_lst_frac = total_qt_rt_30[11]; + end//-1022 1 + 10'h37b:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[11:0],16'b0}; //-133 -6 + single_denorm_lst_frac = total_qt_rt_30[12]; + end//-1022 1 + 10'h37a:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[12:0],15'b0}; //-134 -7 + single_denorm_lst_frac = total_qt_rt_30[13]; + end//-1022 1 + 10'h379:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[13:0],14'b0}; //-135 -8 + single_denorm_lst_frac = total_qt_rt_30[14]; + end//-1022 1 + 10'h378:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[14:0],13'b0}; //-136 -9 + single_denorm_lst_frac = total_qt_rt_30[15]; + end//-1022 1 + 10'h377:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[15:0],12'b0}; //-137 -10 + single_denorm_lst_frac = total_qt_rt_30[16]; + end//-1022 1 + 10'h376:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[16:0],11'b0}; //-138 -11 + single_denorm_lst_frac = total_qt_rt_30[17]; + end//-1022 1 + 10'h375:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[17:0],10'b0}; //-139 -12 + single_denorm_lst_frac = total_qt_rt_30[18]; + end//-1022 1 + 10'h374:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[18:0],9'b0}; //-140 -13 + single_denorm_lst_frac = total_qt_rt_30[19]; + end//-1022 1 + 10'h373:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[19:0],8'b0}; // -141 + single_denorm_lst_frac = total_qt_rt_30[20]; + end//-1022 1 + 10'h372:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[20:0],7'b0};//-142 + single_denorm_lst_frac = total_qt_rt_30[21]; + end//-1022 1 + 10'h371:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[21:0],6'b0};//-143 + single_denorm_lst_frac = total_qt_rt_30[22]; + end//-1022 1 + 10'h370:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[22:0],5'b0}; //-144 + single_denorm_lst_frac = total_qt_rt_30[23]; + end//-1022 1 + 10'h36f:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[23:0],4'b0}; //-145 + single_denorm_lst_frac = total_qt_rt_30[24]; + end//-1022 1 + 10'h36e:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[24:0],3'b0}; //-146 + single_denorm_lst_frac = total_qt_rt_30[25]; + end//-1022 1 + 10'h36d:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[25:0],2'b0}; //-147 + single_denorm_lst_frac = total_qt_rt_30[26]; + end//-1022 1 + 10'h36c:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[26:0],1'b0}; //-148 + single_denorm_lst_frac = total_qt_rt_30[27]; + end//-1022 1 + 10'h36b: begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[27:0]}; + single_denorm_lst_frac = total_qt_rt_30[28] ; + end//-1022 1 + default: begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_30[28:1]}; + single_denorm_lst_frac = 1'b0; + end//-1022 1 +endcase +// &CombEnd; @148 +end +//rounding evaluation for single denormalize number +assign ex3_single_denorm_eq = qt_result_single_denorm_for_round[27] + && !ex3_single_low_not_zero; +assign ex3_single_low_not_zero = |qt_result_single_denorm_for_round[26:0]; +assign ex3_single_denorm_gr = qt_result_single_denorm_for_round[27] + && ex3_single_low_not_zero; +assign ex3_single_denorm_zero = !qt_result_single_denorm_for_round[27] + && !ex3_single_low_not_zero; + +//rounding check fo denormalize result +assign ex3_denorm_eq = ex3_single_denorm_eq; +assign ex3_denorm_gr = ex3_single_denorm_gr; +assign ex3_denorm_zero = ex3_single_denorm_zero; +assign ex3_denorm_lst_frac = single_denorm_lst_frac; +//Different Round Mode with different rounding rule +//Here we call rounding bit as "rb", remainder as "rem" +//RNE : +// 1.+1 : rb>10000 || rb==10000 && rem>0 +// 2. 0 : Rest Condition +// 3.-1 : Never occur +//RTZ : +// 1.+1 : Never occur +// 2. 0 : Rest Condition +// 3.-1 : rb=10000 && rem<0 +//RDN : +// 1.+1 : Q>0 Never occur ; Q<0 Rest condition +// 2. 0 : Q>0 Rest condition; Q<0 Rem<0 && rb=0 +// 3.-1 : Q>0 Rem<0 && rb=0 ; Q<0 Never occur +//RUP : +// 1.+1 : Q>0 Rest Condition; Q<0 Never occur +// 2. 0 : Q>0 Rem<0 && rb=0 ; Q<0 Rest condition +// 3.-1 : Q>0 Never occur ; Q<0 Rem<0 && rb=0 +//RMM : +// 1.+1 : rb>10000 || rb==10000 && rem>0 +// 2. 0 : Rest Condition +// 3.-1 : Never occur +assign frac_rne_add_1 = ex3_qt_gr || + (ex3_qt_eq && !fdsu_ex3_rem_sign); +assign frac_rtz_sub_1 = ex3_qt_zero && fdsu_ex3_rem_sign; +assign frac_rup_add_1 = !fdsu_ex3_result_sign && + (!ex3_qt_zero || + (!fdsu_ex3_rem_sign && !fdsu_ex3_rem_zero)); +assign frac_rup_sub_1 = fdsu_ex3_result_sign && + (ex3_qt_zero && fdsu_ex3_rem_sign); +assign frac_rdn_add_1 = fdsu_ex3_result_sign && + (!ex3_qt_zero || + (!fdsu_ex3_rem_sign && !fdsu_ex3_rem_zero)); +assign frac_rdn_sub_1 = !fdsu_ex3_result_sign && + (ex3_qt_zero && fdsu_ex3_rem_sign); +assign frac_rmm_add_1 = ex3_qt_gr || + (ex3_qt_eq && !fdsu_ex3_rem_sign); +//denormal result +assign frac_denorm_rne_add_1 = ex3_denorm_gr || + (ex3_denorm_eq && + ((fdsu_ex3_rem_zero && + ex3_denorm_lst_frac) || + (!fdsu_ex3_rem_zero && + !fdsu_ex3_rem_sign))); +assign frac_denorm_rtz_sub_1 = ex3_denorm_zero && fdsu_ex3_rem_sign; +assign frac_denorm_rup_add_1 = !fdsu_ex3_result_sign && + (!ex3_denorm_zero || + (!fdsu_ex3_rem_sign && !fdsu_ex3_rem_zero)); +assign frac_denorm_rup_sub_1 = fdsu_ex3_result_sign && + (ex3_denorm_zero && fdsu_ex3_rem_sign); +assign frac_denorm_rdn_add_1 = fdsu_ex3_result_sign && + (!ex3_denorm_zero || + (!fdsu_ex3_rem_sign && !fdsu_ex3_rem_zero)); +assign frac_denorm_rdn_sub_1 = !fdsu_ex3_result_sign && + (ex3_denorm_zero && fdsu_ex3_rem_sign); +assign frac_denorm_rmm_add_1 = ex3_denorm_gr || + (ex3_denorm_eq && !fdsu_ex3_rem_sign); + +//RM select +// &CombBeg; @222 +always @( fdsu_ex3_rm[2:0] + or frac_denorm_rdn_add_1 + or frac_rne_add_1 + or frac_denorm_rdn_sub_1 + or fdsu_ex3_result_sign + or frac_rup_add_1 + or frac_denorm_rup_sub_1 + or frac_rdn_sub_1 + or frac_rtz_sub_1 + or frac_rdn_add_1 + or fdsu_ex3_id_srt_skip + or frac_denorm_rtz_sub_1 + or ex3_rslt_denorm + or frac_rup_sub_1 + or frac_denorm_rmm_add_1 + or frac_denorm_rup_add_1 + or frac_denorm_rne_add_1 + or frac_rmm_add_1) +begin +case(fdsu_ex3_rm[2:0]) + 3'b000://round to nearst,ties to even + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rne_add_1 : frac_rne_add_1; + frac_sub_1 = 1'b0; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rne_add_1 : !frac_rne_add_1; + denorm_to_tiny_frac = fdsu_ex3_id_srt_skip ? 1'b0 : frac_denorm_rne_add_1; + end + 3'b001:// round to 0 + begin + frac_add_1 = 1'b0; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rtz_sub_1 : frac_rtz_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rtz_sub_1 : !frac_rtz_sub_1; + denorm_to_tiny_frac = 1'b0; + end + 3'b010://round to -inf + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rdn_add_1 : frac_rdn_add_1; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rdn_sub_1 : frac_rdn_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rdn_add_1 && !frac_denorm_rdn_sub_1 + : !frac_rdn_add_1 && !frac_rdn_sub_1; + denorm_to_tiny_frac = fdsu_ex3_id_srt_skip ? fdsu_ex3_result_sign + : frac_denorm_rdn_add_1; + end + 3'b011://round to +inf + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rup_add_1 : frac_rup_add_1; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rup_sub_1 : frac_rup_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rup_add_1 && !frac_denorm_rup_sub_1 + : !frac_rup_add_1 && !frac_rup_sub_1; + denorm_to_tiny_frac = fdsu_ex3_id_srt_skip ? !fdsu_ex3_result_sign + : frac_denorm_rup_add_1; + end + 3'b100://round to nearest,ties to max magnitude + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rmm_add_1 : frac_rmm_add_1; + frac_sub_1 = 1'b0; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rmm_add_1 : !frac_rmm_add_1; + denorm_to_tiny_frac = fdsu_ex3_id_srt_skip ? 1'b0 : frac_denorm_rmm_add_1; + end + default: + begin + frac_add_1 = 1'b0; + frac_sub_1 = 1'b0; + frac_orig = 1'b0; + denorm_to_tiny_frac = 1'b0; + end +endcase +// &CombEnd; @271 +end +//Add 1 or Sub 1 constant +// &CombBeg; @273 +always @( total_qt_rt_30[28]) +begin +case(total_qt_rt_30[28]) + 1'b0: + begin + frac_add1_op1[25:0] = {2'b0,24'b1}; + frac_sub1_op1[25:0] = {2'b11,{24{1'b1}}}; + end + 1'b1: + begin + frac_add1_op1[25:0] = {25'b1,1'b0}; + frac_sub1_op1[25:0] = {{25{1'b1}},1'b0}; + end + default: + begin + frac_add1_op1[25:0] = 26'b0; + frac_sub1_op1[25:0] = 26'b0; + end +endcase +// &CombEnd; @291 +end + +//Add 1 or Sub1 final result +//Conner case when quotient is 0.010000...00 and remainder is negative, +//The real quotient is actually 0.00fff..ff, +//The final result will need to sub 1 when +//RN : Never occur +//RP : sign of quotient is - +//RM : sign of quotient is + +assign frac_add1_rst[25:0] = {1'b0,total_qt_rt_30[28:4]} + + frac_add1_op1_with_denorm[25:0]; +assign frac_add1_op1_with_denorm[25:0] = ex3_rslt_denorm ? + {1'b0,fdsu_ex3_result_denorm_round_add_num[23:0],1'b0} : + frac_add1_op1[25:0]; +assign frac_sub1_rst[25:0] = (ex3_rst_eq_1) + ? {3'b0,{23{1'b1}}} + : {1'b0,total_qt_rt_30[28:4]} + + frac_sub1_op1_with_denorm[25:0] + {25'b0, ex3_rslt_denorm}; +assign frac_sub1_op1_with_denorm[25:0] = ex3_rslt_denorm ? + ~{1'b0,fdsu_ex3_result_denorm_round_add_num[23:0],1'b0} : + frac_sub1_op1[25:0]; +assign frac_final_rst[25:0] = (frac_add1_rst[25:0] & {26{frac_add_1}}) | + (frac_sub1_rst[25:0] & {26{frac_sub_1}}) | + ({1'b0,total_qt_rt_30[28:4]} & {26{frac_orig}}); + +//===============Pipe down signal prepare=================== +// assign ex3_rst_nor = !fdsu_ex3_result_zero && +// !fdsu_ex3_result_qnan && +// !fdsu_ex3_result_inf && +// !fdsu_ex3_result_lfn; +assign ex3_rst_nor = !fdsu_ex3_result_inf && + !fdsu_ex3_result_lfn; +assign ex3_nx = ex3_rst_nor && + (!ex3_qt_zero || !fdsu_ex3_rem_zero || ex3_denorm_nx); +assign ex3_denorm_nx = ex3_rslt_denorm && (!ex3_denorm_zero || !fdsu_ex3_rem_zero); +//Adjust expnt +//Div:Actural expnt should plus 1 when op0 is id, sub 1 when op1 id +assign ex3_expnt_adjst[9:0] = 10'h7f; + +assign ex3_expnt_adjust_result[9:0] = fdsu_ex3_expnt_rst[9:0] + + ex3_expnt_adjst[9:0]; +//this information is for the packing, which determin the result is normal +//numer or not; +assign ex3_potnt_norm[1:0] = {ex3_denorm_plus,ex3_denorm_potnt_norm}; +//=======================Pipe to EX4======================== +//gate clk +// &Instance("gated_clk_cell","x_ex3_pipe_clk"); @337 +gated_clk_cell x_ex3_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex3_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex3_pipe_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @338 +// .clk_out (ex3_pipe_clk),//Out Clock @339 +// .external_en (1'b0), @340 +// .global_en (cp0_yy_clk_en), @341 +// .local_en (ex3_pipe_clk_en),//Local Condition @342 +// .module_en (cp0_fpu_icg_en) @343 +// ); @344 +assign ex3_pipe_clk_en = ex3_pipedown; + +always @(posedge ex3_pipe_clk) +begin + if(ex3_pipedown) + begin + fdsu_ex4_result_nor <= ex3_rst_nor; + fdsu_ex4_nx <= ex3_nx; + fdsu_ex4_denorm_to_tiny_frac + <= denorm_to_tiny_frac; + fdsu_ex4_potnt_norm[1:0] <= ex3_potnt_norm[1:0]; + end + else + begin + fdsu_ex4_result_nor <= fdsu_ex4_result_nor; + fdsu_ex4_nx <= fdsu_ex4_nx; + fdsu_ex4_denorm_to_tiny_frac + <= fdsu_ex4_denorm_to_tiny_frac; + fdsu_ex4_potnt_norm[1:0] <= fdsu_ex4_potnt_norm[1:0]; + end +end + +// ex3_frac Pipedown to ex4 use srt_divisor. +assign ex3_frac_final_rst[25:0] = frac_final_rst[25:0]; +// &Force("output","fdsu_ex4_result_nor"); @397 +// &Force("output","fdsu_ex4_nx"); @398 +// &Force("output","fdsu_ex4_denorm_to_tiny_frac"); @399 +// &Force("output","fdsu_ex4_potnt_norm"); @400 + + +// &ModuleEnd; @403 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_special.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_special.v new file mode 100644 index 000000000..38aebd9df --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_special.v @@ -0,0 +1,345 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_special( + cp0_fpu_xx_dqnan, + dp_xx_ex1_cnan, + dp_xx_ex1_id, + dp_xx_ex1_inf, + dp_xx_ex1_qnan, + dp_xx_ex1_snan, + dp_xx_ex1_zero, + ex1_div, + ex1_op0_id, + ex1_op0_norm, + ex1_op0_sign, + ex1_op1_id, + ex1_op1_norm, + ex1_result_sign, + ex1_sqrt, + ex1_srt_skip, + fdsu_fpu_ex1_fflags, + fdsu_fpu_ex1_special_sel, + fdsu_fpu_ex1_special_sign +); + +// &Ports; @24 +input cp0_fpu_xx_dqnan; +input [2:0] dp_xx_ex1_cnan; +input [2:0] dp_xx_ex1_id; +input [2:0] dp_xx_ex1_inf; +input [2:0] dp_xx_ex1_qnan; +input [2:0] dp_xx_ex1_snan; +input [2:0] dp_xx_ex1_zero; +input ex1_div; +input ex1_op0_sign; +input ex1_result_sign; +input ex1_sqrt; +output ex1_op0_id; +output ex1_op0_norm; +output ex1_op1_id; +output ex1_op1_norm; +output ex1_srt_skip; +output [4:0] fdsu_fpu_ex1_fflags; +output [7:0] fdsu_fpu_ex1_special_sel; +output [3:0] fdsu_fpu_ex1_special_sign; + +// &Regs; @25 +reg ex1_result_cnan; +reg ex1_result_qnan_op0; +reg ex1_result_qnan_op1; + +// &Wires; @26 +wire cp0_fpu_xx_dqnan; +wire [2:0] dp_xx_ex1_cnan; +wire [2:0] dp_xx_ex1_id; +wire [2:0] dp_xx_ex1_inf; +wire [2:0] dp_xx_ex1_qnan; +wire [2:0] dp_xx_ex1_snan; +wire [2:0] dp_xx_ex1_zero; +wire ex1_div; +wire ex1_div_dz; +wire ex1_div_nv; +wire ex1_div_rst_inf; +wire ex1_div_rst_qnan; +wire ex1_div_rst_zero; +wire ex1_dz; +wire [4:0] ex1_fflags; +wire ex1_nv; +wire ex1_op0_cnan; +wire ex1_op0_id; +wire ex1_op0_inf; +wire ex1_op0_is_qnan; +wire ex1_op0_is_snan; +wire ex1_op0_norm; +wire ex1_op0_qnan; +wire ex1_op0_sign; +wire ex1_op0_snan; +wire ex1_op0_tt_zero; +wire ex1_op0_zero; +wire ex1_op1_cnan; +wire ex1_op1_id; +wire ex1_op1_inf; +wire ex1_op1_is_qnan; +wire ex1_op1_is_snan; +wire ex1_op1_norm; +wire ex1_op1_qnan; +wire ex1_op1_snan; +wire ex1_op1_tt_zero; +wire ex1_op1_zero; +wire ex1_result_inf; +wire ex1_result_lfn; +wire ex1_result_qnan; +wire ex1_result_sign; +wire ex1_result_zero; +wire ex1_rst_default_qnan; +wire [7:0] ex1_special_sel; +wire [3:0] ex1_special_sign; +wire ex1_sqrt; +wire ex1_sqrt_nv; +wire ex1_sqrt_rst_inf; +wire ex1_sqrt_rst_qnan; +wire ex1_sqrt_rst_zero; +wire ex1_srt_skip; +wire [4:0] fdsu_fpu_ex1_fflags; +wire [7:0] fdsu_fpu_ex1_special_sel; +wire [3:0] fdsu_fpu_ex1_special_sign; + + +//infinity number +// &Force("bus", "dp_xx_ex1_inf", 2, 0); @29 +assign ex1_op0_inf = dp_xx_ex1_inf[0]; +assign ex1_op1_inf = dp_xx_ex1_inf[1]; + +//zero +// &Force("bus", "dp_xx_ex1_zero", 2, 0); @34 +assign ex1_op0_zero = dp_xx_ex1_zero[0]; +assign ex1_op1_zero = dp_xx_ex1_zero[1]; + +//denormalize number +// &Force("bus", "dp_xx_ex1_id", 2, 0); @39 +assign ex1_op0_id = dp_xx_ex1_id[0]; +assign ex1_op1_id = dp_xx_ex1_id[1]; + +//cNaN +// &Force("bus", "dp_xx_ex1_cnan", 2, 0); @44 +assign ex1_op0_cnan = dp_xx_ex1_cnan[0]; +assign ex1_op1_cnan = dp_xx_ex1_cnan[1]; + +//sNaN +// &Force("bus", "dp_xx_ex1_snan", 2, 0); @49 +assign ex1_op0_snan = dp_xx_ex1_snan[0]; +assign ex1_op1_snan = dp_xx_ex1_snan[1]; + +//qNaN +// &Force("bus", "dp_xx_ex1_qnan", 2, 0); @54 +assign ex1_op0_qnan = dp_xx_ex1_qnan[0]; +assign ex1_op1_qnan = dp_xx_ex1_qnan[1]; + + +//======================EX1 expt detect===================== +//ex1_id_detect +//any opration is zero +// no input denormalize exception anymore +// +//ex1_nv_detect +//div_nv +// 1.any operation is sNaN +// 2.0/0(include DN flush to zero) +// 3.inf/inf +//sqrt_nv +// 1.any operation is sNaN +// 2.operation sign is 1 && operation is not zero/qNaN +assign ex1_nv = ex1_div && ex1_div_nv || + ex1_sqrt && ex1_sqrt_nv; +//ex1_div_nv +assign ex1_div_nv = ex1_op0_snan || + ex1_op1_snan || + (ex1_op0_tt_zero && ex1_op1_tt_zero)|| + (ex1_op0_inf && ex1_op1_inf); +assign ex1_op0_tt_zero = ex1_op0_zero; +assign ex1_op1_tt_zero = ex1_op1_zero; +//ex1_sqrt_nv +assign ex1_sqrt_nv = ex1_op0_snan || + ex1_op0_sign && + (ex1_op0_norm || + ex1_op0_inf ); + +// This 'norm' also include denorm. +assign ex1_op0_norm = !ex1_op0_inf && !ex1_op0_zero && !ex1_op0_snan && !ex1_op0_qnan && !ex1_op0_cnan; +assign ex1_op1_norm = !ex1_op1_inf && !ex1_op1_zero && !ex1_op1_snan && !ex1_op1_qnan && !ex1_op1_cnan; + +//ex1_of_detect +//div_of +// 1.only detect id overflow case +//assign ex1_of = ex1_div && ex1_div_of; +//assign ex1_div_of = ex1_op1_id_fm1 && +// ex1_op0_norm && +// ex1_div_id_of; +// +////ex1_uf_detect +////div_uf +//// 1.only detect id underflow case +//assign ex1_uf = ex1_div && ex1_div_uf; +//assign ex1_div_uf = ex1_op0_id && +// ex1_op1_norm && +// ex1_div_id_uf; +//ex1_dz_detect +//div_dz +// 1.op0 is normal && op1 zero +assign ex1_dz = ex1_div && ex1_div_dz; +assign ex1_div_dz = ex1_op1_tt_zero && ex1_op0_norm; + +//===================special cal result===================== +//ex1 result is zero +//div_zero +// 1.op0 is zero && op1 is normal +// 2.op0 is zero/normal && op1 is inf +//sqrt_zero +// 1.op0 is zero +assign ex1_result_zero = ex1_div_rst_zero && ex1_div || + ex1_sqrt_rst_zero && ex1_sqrt; +assign ex1_div_rst_zero = (ex1_op0_tt_zero && ex1_op1_norm ) || + // (!ex1_expnt0_max && !ex1_op0_cnan && ex1_op1_inf); + (!ex1_op0_inf && !ex1_op0_qnan && !ex1_op0_snan && !ex1_op0_cnan && ex1_op1_inf); +assign ex1_sqrt_rst_zero = ex1_op0_tt_zero; + +//ex1 result is qNaN +//ex1_nv +//div_qnan +// 1.op0 is qnan || op1 is qnan +//sqrt_qnan +// 1.op0 is qnan +assign ex1_result_qnan = ex1_div_rst_qnan && ex1_div || + ex1_sqrt_rst_qnan && ex1_sqrt || + ex1_nv; +assign ex1_div_rst_qnan = ex1_op0_qnan || + ex1_op1_qnan; +assign ex1_sqrt_rst_qnan = ex1_op0_qnan; + +//ex1_rst_default_qnan +//0/0, inf/inf, sqrt negative should get default qNaN +assign ex1_rst_default_qnan = (ex1_div && ex1_op0_zero && ex1_op1_zero) || + (ex1_div && ex1_op0_inf && ex1_op1_inf) || + (ex1_sqrt&& ex1_op0_sign && (ex1_op0_norm || ex1_op0_inf)); + +//ex1 result is inf +//ex1_dz +// +//div_inf +// 1.op0 is inf && op1 is normal/zero +//sqrt_inf +// 1.op0 is inf +assign ex1_result_inf = ex1_div_rst_inf && ex1_div || + ex1_sqrt_rst_inf && ex1_sqrt || + ex1_dz ; +// assign ex1_div_rst_inf = ex1_op0_inf && !ex1_expnt1_max && !ex1_op1_cnan; +assign ex1_div_rst_inf = ex1_op0_inf && !ex1_op1_inf && !ex1_op1_qnan && !ex1_op1_snan && !ex1_op1_cnan; +assign ex1_sqrt_rst_inf = ex1_op0_inf && !ex1_op0_sign; + +//ex1 result is lfn +//ex1_of && round result toward not inc 1 +assign ex1_result_lfn = 1'b0; + +//Default_qnan/Standard_qnan Select +assign ex1_op0_is_snan = ex1_op0_snan; +assign ex1_op1_is_snan = ex1_op1_snan && ex1_div; +assign ex1_op0_is_qnan = ex1_op0_qnan; +assign ex1_op1_is_qnan = ex1_op1_qnan && ex1_div; + +// &CombBeg; @169 +always @( ex1_op0_is_snan + or ex1_op0_cnan + or ex1_result_qnan + or ex1_op0_is_qnan + or ex1_rst_default_qnan + or cp0_fpu_xx_dqnan + or ex1_op1_cnan + or ex1_op1_is_qnan + or ex1_op1_is_snan) +begin +if(ex1_rst_default_qnan) +begin + ex1_result_qnan_op0 = 1'b0; + ex1_result_qnan_op1 = 1'b0; + ex1_result_cnan = ex1_result_qnan; +end +else if(ex1_op0_is_snan && cp0_fpu_xx_dqnan) +begin + ex1_result_qnan_op0 = ex1_result_qnan; + ex1_result_qnan_op1 = 1'b0; + ex1_result_cnan = 1'b0; +end +else if(ex1_op1_is_snan && cp0_fpu_xx_dqnan) +begin + ex1_result_qnan_op0 = 1'b0; + ex1_result_qnan_op1 = ex1_result_qnan; + ex1_result_cnan = 1'b0; +end +else if(ex1_op0_is_qnan && cp0_fpu_xx_dqnan) +begin + ex1_result_qnan_op0 = ex1_result_qnan && !ex1_op0_cnan; + ex1_result_qnan_op1 = 1'b0; + ex1_result_cnan = ex1_result_qnan && ex1_op0_cnan; +end +else if(ex1_op1_is_qnan && cp0_fpu_xx_dqnan) +begin + ex1_result_qnan_op0 = 1'b0; + ex1_result_qnan_op1 = ex1_result_qnan && !ex1_op1_cnan; + ex1_result_cnan = ex1_result_qnan && ex1_op1_cnan; +end +else +begin + ex1_result_qnan_op0 = 1'b0; + ex1_result_qnan_op1 = 1'b0; + ex1_result_cnan = ex1_result_qnan; +end +// &CombEnd; @206 +end + + +//Special result should skip SRT logic +assign ex1_srt_skip = ex1_result_zero || + ex1_result_qnan || + ex1_result_lfn || + ex1_result_inf; +// fflags: +// NV, DZ, OF, UF, NX +assign ex1_fflags[4:0] = {ex1_nv, ex1_dz, 3'b0}; +// Special Sel[7:0]: +// qnan_src2, qnan_src1, qnan_src0, cnan, lfn, inf, zero, src2 +assign ex1_special_sel[7:0] = {1'b0, ex1_result_qnan_op1, ex1_result_qnan_op0, + ex1_result_cnan, ex1_result_lfn, ex1_result_inf, + ex1_result_zero, 1'b0}; +// Special Sign[3:0] +// lfn, inf, zero, src2 +assign ex1_special_sign[3:0] = {ex1_result_sign, ex1_result_sign, ex1_result_sign, 1'b0}; + +//========================================================== +// Output Signal +//========================================================== +assign fdsu_fpu_ex1_fflags[4:0] = ex1_fflags[4:0]; +assign fdsu_fpu_ex1_special_sel[7:0] = ex1_special_sel[7:0]; +assign fdsu_fpu_ex1_special_sign[3:0] = ex1_special_sign[3:0]; + +// &Force("output", "ex1_op0_norm"); @233 +// &Force("output", "ex1_op1_norm"); @234 + +// &ModuleEnd; @236 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v new file mode 100644 index 000000000..8d947f8f9 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v @@ -0,0 +1,824 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_srt_single( + cp0_fpu_icg_en, + cp0_yy_clk_en, + ex1_divisor, + ex1_expnt_adder_op1, + ex1_oper_id_frac, + ex1_oper_id_frac_f, + ex1_pipedown, + ex1_pipedown_gate, + ex1_remainder, + ex1_save_op0, + ex1_save_op0_gate, + ex2_expnt_adder_op0, + ex2_of, + ex2_pipe_clk, + ex2_pipedown, + ex2_potnt_of, + ex2_potnt_uf, + ex2_result_inf, + ex2_result_lfn, + ex2_rslt_denorm, + ex2_srt_expnt_rst, + ex2_srt_first_round, + ex2_uf, + ex2_uf_srt_skip, + ex3_frac_final_rst, + ex3_pipedown, + fdsu_ex3_id_srt_skip, + fdsu_ex3_rem_sign, + fdsu_ex3_rem_zero, + fdsu_ex3_result_denorm_round_add_num, + fdsu_ex4_frac, + fdsu_yy_div, + fdsu_yy_of_rm_lfn, + fdsu_yy_op0_norm, + fdsu_yy_op1_norm, + fdsu_yy_sqrt, + forever_cpuclk, + pad_yy_icg_scan_en, + srt_remainder_zero, + srt_sm_on, + total_qt_rt_30 +); + +// &Ports; @24 +input cp0_fpu_icg_en; +input cp0_yy_clk_en; +input [23:0] ex1_divisor; +input [12:0] ex1_expnt_adder_op1; +input [51:0] ex1_oper_id_frac; +input ex1_pipedown; +input ex1_pipedown_gate; +input [31:0] ex1_remainder; +input ex1_save_op0; +input ex1_save_op0_gate; +input [9 :0] ex2_expnt_adder_op0; +input ex2_pipe_clk; +input ex2_pipedown; +input ex2_srt_first_round; +input [25:0] ex3_frac_final_rst; +input ex3_pipedown; +input fdsu_yy_div; +input fdsu_yy_of_rm_lfn; +input fdsu_yy_op0_norm; +input fdsu_yy_op1_norm; +input fdsu_yy_sqrt; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input srt_sm_on; +output [51:0] ex1_oper_id_frac_f; +output ex2_of; +output ex2_potnt_of; +output ex2_potnt_uf; +output ex2_result_inf; +output ex2_result_lfn; +output ex2_rslt_denorm; +output [9 :0] ex2_srt_expnt_rst; +output ex2_uf; +output ex2_uf_srt_skip; +output fdsu_ex3_id_srt_skip; +output fdsu_ex3_rem_sign; +output fdsu_ex3_rem_zero; +output [23:0] fdsu_ex3_result_denorm_round_add_num; +output [25:0] fdsu_ex4_frac; +output srt_remainder_zero; +output [29:0] total_qt_rt_30; + +// &Regs; @25 +reg [31:0] cur_rem; +reg [7 :0] digit_bound_1; +reg [7 :0] digit_bound_2; +reg [23:0] ex2_result_denorm_round_add_num; +reg fdsu_ex3_id_srt_skip; +reg fdsu_ex3_rem_sign; +reg fdsu_ex3_rem_zero; +reg [23:0] fdsu_ex3_result_denorm_round_add_num; +reg [29:0] qt_rt_const_shift_std; +reg [7 :0] qtrt_sel_rem; +reg [31:0] rem_add1_op1; +reg [31:0] rem_add2_op1; +reg [25:0] srt_divisor; +reg [31:0] srt_remainder; +reg [29:0] total_qt_rt_30; +reg [29:0] total_qt_rt_30_next; +reg [29:0] total_qt_rt_minus_30; +reg [29:0] total_qt_rt_minus_30_next; + +// &Wires; @26 +wire [7 :0] bound1_cmp_result; +wire bound1_cmp_sign; +wire [7 :0] bound2_cmp_result; +wire bound2_cmp_sign; +wire [3 :0] bound_sel; +wire cp0_fpu_icg_en; +wire cp0_yy_clk_en; +wire [31:0] cur_doub_rem_1; +wire [31:0] cur_doub_rem_2; +wire [31:0] cur_rem_1; +wire [31:0] cur_rem_2; +wire [31:0] div_qt_1_rem_add_op1; +wire [31:0] div_qt_2_rem_add_op1; +wire [31:0] div_qt_r1_rem_add_op1; +wire [31:0] div_qt_r2_rem_add_op1; +wire [23:0] ex1_divisor; +wire ex1_ex2_pipe_clk; +wire ex1_ex2_pipe_clk_en; +wire [12:0] ex1_expnt_adder_op1; +wire [51:0] ex1_oper_id_frac; +wire [51:0] ex1_oper_id_frac_f; +wire ex1_pipedown; +wire ex1_pipedown_gate; +wire [31:0] ex1_remainder; +wire ex1_save_op0; +wire ex1_save_op0_gate; +wire ex2_div_of; +wire ex2_div_uf; +wire [9 :0] ex2_expnt_adder_op0; +wire [9 :0] ex2_expnt_adder_op1; +wire ex2_expnt_of; +wire [9 :0] ex2_expnt_result; +wire ex2_expnt_uf; +wire ex2_id_nor_srt_skip; +wire ex2_of; +wire ex2_of_plus; +wire ex2_pipe_clk; +wire ex2_pipedown; +wire ex2_potnt_of; +wire ex2_potnt_of_pre; +wire ex2_potnt_uf; +wire ex2_potnt_uf_pre; +wire ex2_result_inf; +wire ex2_result_lfn; +wire ex2_rslt_denorm; +wire [9 :0] ex2_sqrt_expnt_result; +wire [9 :0] ex2_srt_expnt_rst; +wire ex2_srt_first_round; +wire ex2_uf; +wire ex2_uf_plus; +wire ex2_uf_srt_skip; +wire [25:0] ex3_frac_final_rst; +wire ex3_pipedown; +wire fdsu_ex2_div; +wire [9 :0] fdsu_ex2_expnt_rst; +wire fdsu_ex2_of_rm_lfn; +wire fdsu_ex2_op0_norm; +wire fdsu_ex2_op1_norm; +wire fdsu_ex2_result_lfn; +wire fdsu_ex2_sqrt; +wire [25:0] fdsu_ex4_frac; +wire fdsu_yy_div; +wire fdsu_yy_of_rm_lfn; +wire fdsu_yy_op0_norm; +wire fdsu_yy_op1_norm; +wire fdsu_yy_sqrt; +wire forever_cpuclk; +wire pad_yy_icg_scan_en; +wire qt_clk; +wire qt_clk_en; +wire [29:0] qt_rt_const_pre_sel_q1; +wire [29:0] qt_rt_const_pre_sel_q2; +wire [29:0] qt_rt_const_q1; +wire [29:0] qt_rt_const_q2; +wire [29:0] qt_rt_const_q3; +wire [29:0] qt_rt_const_shift_std_next; +wire [29:0] qt_rt_mins_const_pre_sel_q1; +wire [29:0] qt_rt_mins_const_pre_sel_q2; +wire rem_sign; +wire [31:0] sqrt_qt_1_rem_add_op1; +wire [31:0] sqrt_qt_2_rem_add_op1; +wire [31:0] sqrt_qt_r1_rem_add_op1; +wire [31:0] sqrt_qt_r2_rem_add_op1; +wire srt_div_clk; +wire srt_div_clk_en; +wire [31:0] srt_remainder_nxt; +wire [31:0] srt_remainder_shift; +wire srt_remainder_sign; +wire srt_remainder_zero; +wire srt_sm_on; +wire [29:0] total_qt_rt_pre_sel; + + +assign fdsu_ex2_div = fdsu_yy_div; +assign fdsu_ex2_sqrt = fdsu_yy_sqrt; +assign fdsu_ex2_op0_norm = fdsu_yy_op0_norm; +assign fdsu_ex2_op1_norm = fdsu_yy_op1_norm; +assign fdsu_ex2_of_rm_lfn = fdsu_yy_of_rm_lfn; +assign fdsu_ex2_result_lfn = 1'b0; + +//========================================================== +// EX2 Expnt Generate +//========================================================== +//expnt0 sub expnt1 +assign ex2_expnt_result[9:0] = ex2_expnt_adder_op0[9:0] - + ex2_expnt_adder_op1[9:0]; + +//===================sqrt exponent prepare================== +//sqrt exponent prepare +//afert E sub, div E by 2 +assign ex2_sqrt_expnt_result[9:0] = {ex2_expnt_result[9], + ex2_expnt_result[9:1]}; + +assign ex2_srt_expnt_rst[9:0] = (fdsu_ex2_sqrt) + ? ex2_sqrt_expnt_result[9:0] + : ex2_expnt_result[9:0]; +// &Force("output", "ex2_srt_expnt_rst"); &Force("bus", "ex2_srt_expnt_rst", 9, 0); @51 +assign fdsu_ex2_expnt_rst[9:0] = ex2_srt_expnt_rst[9:0]; + + +//====================EX2 Expt info========================= +//EX1 only detect of/uf under id condition +//EX2 will deal with other condition + +//When input is normal, overflow when E1-E2 > 128/1024 +assign ex2_expnt_of = ~fdsu_ex2_expnt_rst[9] && (fdsu_ex2_expnt_rst[8] + || (fdsu_ex2_expnt_rst[7] && + |fdsu_ex2_expnt_rst[6:0])); +//potential overflow when E1-E2 = 128/1024 +assign ex2_potnt_of_pre = ~fdsu_ex2_expnt_rst[9] && + ~fdsu_ex2_expnt_rst[8] && + fdsu_ex2_expnt_rst[7] && + ~|fdsu_ex2_expnt_rst[6:0]; +assign ex2_potnt_of = ex2_potnt_of_pre && + fdsu_ex2_op0_norm && + fdsu_ex2_op1_norm && + fdsu_ex2_div; + +//When input is normal, underflow when E1-E2 <= -127/-1023 +assign ex2_expnt_uf = fdsu_ex2_expnt_rst[9] &&(fdsu_ex2_expnt_rst[8:0] <= 9'h181); +//potential underflow when E1-E2 = -126/-1022 +assign ex2_potnt_uf_pre = &fdsu_ex2_expnt_rst[9:7] && + ~|fdsu_ex2_expnt_rst[6:2] && + fdsu_ex2_expnt_rst[1] && + !fdsu_ex2_expnt_rst[0]; +assign ex2_potnt_uf = (ex2_potnt_uf_pre && + fdsu_ex2_op0_norm && + fdsu_ex2_op1_norm && + fdsu_ex2_div) || + (ex2_potnt_uf_pre && + fdsu_ex2_op0_norm); + +//========================EX2 Overflow====================== +//ex2 overflow when +// 1.op0 & op1 both norm && expnt overflow +// 2.ex1_id_of +// &Force("output","ex2_of"); @91 +assign ex2_of = ex2_of_plus; +assign ex2_of_plus = ex2_div_of && fdsu_ex2_div; +assign ex2_div_of = fdsu_ex2_op0_norm && + fdsu_ex2_op1_norm && + ex2_expnt_of; + +//=======================EX2 Underflow====================== +//ex2 underflow when +// 1.op0 & op1 both norm && expnt underflow +// 2.ex1_id_uf +// and detect when to skip the srt, here, we have further optmization +assign ex2_uf = ex2_uf_plus; +assign ex2_uf_plus = ex2_div_uf && fdsu_ex2_div; +assign ex2_div_uf = fdsu_ex2_op0_norm && + fdsu_ex2_op1_norm && + ex2_expnt_uf; +assign ex2_id_nor_srt_skip = fdsu_ex2_expnt_rst[9] + && (fdsu_ex2_expnt_rst[8:0]<9'h16a); +assign ex2_uf_srt_skip = ex2_id_nor_srt_skip; +assign ex2_rslt_denorm = ex2_uf; +//===============ex2 round prepare for denormal round====== +// &CombBeg; @113 +always @( fdsu_ex2_expnt_rst[9:0]) +begin +case(fdsu_ex2_expnt_rst[9:0]) + 10'h382:ex2_result_denorm_round_add_num[23:0] = 24'h1; //-126 1 + 10'h381:ex2_result_denorm_round_add_num[23:0] = 24'h2; //-127 0 + 10'h380:ex2_result_denorm_round_add_num[23:0] = 24'h4; //-128 -1 + 10'h37f:ex2_result_denorm_round_add_num[23:0] = 24'h8; //-129 -2 + 10'h37e:ex2_result_denorm_round_add_num[23:0] = 24'h10; //-130 -3 + 10'h37d:ex2_result_denorm_round_add_num[23:0] = 24'h20; //-131 -4 + 10'h37c:ex2_result_denorm_round_add_num[23:0] = 24'h40; //-132 -5 + 10'h37b:ex2_result_denorm_round_add_num[23:0] = 24'h80; //-133 -6 + 10'h37a:ex2_result_denorm_round_add_num[23:0] = 24'h100; //-134 -7 + 10'h379:ex2_result_denorm_round_add_num[23:0] = 24'h200; //-135 -8 + 10'h378:ex2_result_denorm_round_add_num[23:0] = 24'h400; //-136 -9 + 10'h377:ex2_result_denorm_round_add_num[23:0] = 24'h800; //-137 -10 + 10'h376:ex2_result_denorm_round_add_num[23:0] = 24'h1000; //-138 -11 + 10'h375:ex2_result_denorm_round_add_num[23:0] = 24'h2000; //-139 -12 + 10'h374:ex2_result_denorm_round_add_num[23:0] = 24'h4000; //-140 -13 + 10'h373:ex2_result_denorm_round_add_num[23:0] = 24'h8000; // -141 -14 + 10'h372:ex2_result_denorm_round_add_num[23:0] = 24'h10000;//-142 -15 + 10'h371:ex2_result_denorm_round_add_num[23:0] = 24'h20000;//-143 -16 + 10'h370:ex2_result_denorm_round_add_num[23:0] = 24'h40000; //-144 -17 + 10'h36f:ex2_result_denorm_round_add_num[23:0] = 24'h80000; //-145 -18 + 10'h36e:ex2_result_denorm_round_add_num[23:0] = 24'h100000; //-146 -19 + 10'h36d:ex2_result_denorm_round_add_num[23:0] = 24'h200000; //-147 -20 + 10'h36c:ex2_result_denorm_round_add_num[23:0] = 24'h400000; //-148 -21 + 10'h36b:ex2_result_denorm_round_add_num[23:0] = 24'h800000; //-148 -22 + default: ex2_result_denorm_round_add_num[23:0] = 24'h0; // -23 +endcase +// &CombEnd; @141 +end + +//===================special result======================== +assign ex2_result_inf = ex2_of_plus && !fdsu_ex2_of_rm_lfn; +assign ex2_result_lfn = fdsu_ex2_result_lfn || + ex2_of_plus && fdsu_ex2_of_rm_lfn; + + + +//====================Pipe to EX3=========================== +always @(posedge ex1_ex2_pipe_clk) +begin + if(ex1_pipedown) + begin + fdsu_ex3_result_denorm_round_add_num[23:0] + <= {14'b0, ex1_expnt_adder_op1[9:0]}; + end + else if(ex2_pipedown) + begin + fdsu_ex3_result_denorm_round_add_num[23:0] + <= ex2_result_denorm_round_add_num[23:0]; + end + else + begin + fdsu_ex3_result_denorm_round_add_num[23:0] + <= fdsu_ex3_result_denorm_round_add_num[23:0]; + end +end +assign ex2_expnt_adder_op1 = fdsu_ex3_result_denorm_round_add_num[9:0]; +// &Force("bus", "ex1_expnt_adder_op1", 12, 0); @193 + +assign ex1_ex2_pipe_clk_en = ex1_pipedown_gate || ex2_pipedown; +// &Instance("gated_clk_cell", "x_ex1_ex2_pipe_clk"); @196 +gated_clk_cell x_ex1_ex2_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex1_ex2_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex1_ex2_pipe_clk_en), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ) +); + +// &Connect(.clk_in (forever_cpuclk), @197 +// .external_en (1'b0), @198 +// .global_en (cp0_yy_clk_en), @199 +// .module_en (cp0_fpu_icg_en), @200 +// .local_en (ex1_ex2_pipe_clk_en), @201 +// .clk_out (ex1_ex2_pipe_clk)); @202 + +always @(posedge ex2_pipe_clk) +begin + if(ex2_pipedown) + begin + fdsu_ex3_rem_sign <= srt_remainder_sign; + fdsu_ex3_rem_zero <= srt_remainder_zero; + fdsu_ex3_id_srt_skip <= ex2_id_nor_srt_skip; + end + else + begin + fdsu_ex3_rem_sign <= fdsu_ex3_rem_sign; + fdsu_ex3_rem_zero <= fdsu_ex3_rem_zero; + fdsu_ex3_id_srt_skip <= fdsu_ex3_id_srt_skip; + end +end + +// &Force("output","fdsu_ex3_rem_sign"); @243 +// &Force("output","fdsu_ex3_rem_zero"); @244 +// &Force("output","fdsu_ex3_result_denorm_round_add_num"); @245 +// &Force("output","fdsu_ex3_id_srt_skip"); @246 + +//========================================================== +// SRT Remainder & Divisor for Quotient/Root Generate +//========================================================== + +//===================Remainder Generate===================== +//gate clk +// &Instance("gated_clk_cell","x_srt_rem_clk"); +// // &Connect( .clk_in (forever_cpuclk), @255 +// // .clk_out (srt_rem_clk),//Out Clock @256 +// // .external_en (1'b0), @257 +// // .global_en (cp0_yy_clk_en), @258 +// // .local_en (srt_rem_clk_en),//Local Condition @259 +// // .module_en (cp0_fpu_icg_en) @260 +// // ); @261 +// assign srt_rem_clk_en = ex1_pipedown || +// srt_sm_on; + +always @(posedge qt_clk) +begin + if (ex1_pipedown) + srt_remainder[31:0] <= ex1_remainder[31:0]; + else if (srt_sm_on) + srt_remainder[31:0] <= srt_remainder_nxt[31:0]; + else + srt_remainder[31:0] <= srt_remainder[31:0]; +end + +//=====================Divisor Generate===================== +//gate clk +// &Instance("gated_clk_cell","x_srt_div_clk"); @291 +gated_clk_cell x_srt_div_clk ( + .clk_in (forever_cpuclk ), + .clk_out (srt_div_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (srt_div_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @292 +// .clk_out (srt_div_clk),//Out Clock @293 +// .external_en (1'b0), @294 +// .global_en (cp0_yy_clk_en), @295 +// .local_en (srt_div_clk_en),//Local Condition @296 +// .module_en (cp0_fpu_icg_en) @297 +// ); @298 +assign srt_div_clk_en = ex1_pipedown_gate + || ex1_save_op0_gate + || ex3_pipedown; +// final_rst saved in srt_divisor. +// srt_divisor is 26 bits, final_rst is 24 bits. +always @(posedge srt_div_clk) +begin + if (ex1_save_op0) + srt_divisor[25:0] <= {3'b0, {ex1_oper_id_frac[51:29]}}; + else if (ex1_pipedown) + srt_divisor[25:0] <= {2'b0, ex1_divisor[23:0]}; + else if (ex3_pipedown) + srt_divisor[25:0] <= ex3_frac_final_rst[25:0]; + else + srt_divisor[25:0] <= srt_divisor[25:0]; +end +assign ex1_oper_id_frac_f[51:0] = {srt_divisor[22:0], 29'b0}; +// &Force("bus", "ex1_oper_id_frac", 51, 0); @332 +assign fdsu_ex4_frac[25:0] = srt_divisor[25:0]; + +//=======================Bound Select======================= +//---------------------------------------+ +// K | 8 | 9 | 10| 11| 12| 13| 14|15,16| +//---------------------------------------+ +//32S1 | 7 | 7 | 8 | 9 | 9 | 10| 11| 12 | +//---------------------------------------+ +//32S2 | 25| 28| 31| 33| 36| 39| 41| 47 | +//---------------------------------------+ + +//bound_sel[3:0] +//For div, use divisor high four bit as K +//For sqrt, use 2qi high four bit as K next round and +// use 1010 as K first round +assign bound_sel[3:0] = (fdsu_ex2_div) + ? srt_divisor[23:20] + : (ex2_srt_first_round) + ? 4'b1010 + : total_qt_rt_30[28:25]; +//Select bound as look up table +// K = bound_sel[3:0] +//32S1 = digit_bound_1[7:0] +//32s2 = digit_bound_2[7:0] +// &CombBeg; @357 +always @( bound_sel[3:0]) +begin +case(bound_sel[3:0]) +4'b0000: //when first interation get "10", choose k=16 + begin + digit_bound_1[7:0] = 8'b11110100;//-12 + digit_bound_2[7:0] = 8'b11010001;//-47 + end +4'b1000: + begin + digit_bound_1[7:0] = 8'b11111001;//-7 + digit_bound_2[7:0] = 8'b11100111;//-25 + end +4'b1001: + begin + digit_bound_1[7:0] = 8'b11111001;//-7 + digit_bound_2[7:0] = 8'b11100100;//-28 + end +4'b1010: + begin + digit_bound_1[7:0] = 8'b11111000;//-8 + digit_bound_2[7:0] = 8'b11100001;//-31 + end +4'b1011: + begin + digit_bound_1[7:0] = 8'b11110111;//-9 + digit_bound_2[7:0] = 8'b11011111;//-33 + end +4'b1100: + begin + digit_bound_1[7:0] = 8'b11110111;//-9 + digit_bound_2[7:0] = 8'b11011100;//-36 + end +4'b1101: + begin + digit_bound_1[7:0] = 8'b11110110;//-10 + digit_bound_2[7:0] = 8'b11011001;//-39 + end +4'b1110: + begin + digit_bound_1[7:0] = 8'b11110101;//-11 + digit_bound_2[7:0] = 8'b11010111;//-41 + end +4'b1111: + begin + digit_bound_1[7:0] = 8'b11110100;//-12 + digit_bound_2[7:0] = 8'b11010001;//-47 + end +default: + begin + digit_bound_1[7:0] = 8'b11111001;//-7 + digit_bound_2[7:0] = 8'b11100111;//-25 + end +endcase +// &CombEnd; @410 +end + +//==============Prepare for quotient generate=============== +assign bound1_cmp_result[7:0] = qtrt_sel_rem[7:0] + digit_bound_1[7:0]; +assign bound2_cmp_result[7:0] = qtrt_sel_rem[7:0] + digit_bound_2[7:0]; +assign bound1_cmp_sign = bound1_cmp_result[7]; +assign bound2_cmp_sign = bound2_cmp_result[7]; +assign rem_sign = srt_remainder[29]; + +//qtrt_sel_rem is use to select quotient +//Only when sqrt first round use 8R0 select quotient(special rule) +//4R0 is used to select quotient on other condition +//For negative remaider, we use ~rem not (~rem + 1) +//Because bound1 <= rem < bound2, when positive rem +// -bound2 <= rem < -bound1, when negative rem +//Thus bound1 < -rem <= bound2, when negative rem +//Thus bound1 <= -rem-1 < bound2, when negative rem +//Thus bound1 <= ~rem < bound2, when negative rem +//srt_remainder[29] used as sign bit +// &CombBeg; @429 +always @( ex2_srt_first_round + or fdsu_ex2_sqrt + or srt_remainder[29:21]) +begin +if(ex2_srt_first_round && fdsu_ex2_sqrt) + qtrt_sel_rem[7:0] = {srt_remainder[29], srt_remainder[27:21]}; +else + qtrt_sel_rem[7:0] = srt_remainder[29] ? ~srt_remainder[29:22] + : srt_remainder[29:22]; +// &CombEnd; @435 +end + +//========================================================== +// on fly round method to generate total quotient +//========================================================== +//gate clk +// &Instance("gated_clk_cell","x_qt_clk"); @441 +gated_clk_cell x_qt_clk ( + .clk_in (forever_cpuclk ), + .clk_out (qt_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (qt_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @442 +// .clk_out (qt_clk),//Out Clock @443 +// .external_en (1'b0), @444 +// .global_en (cp0_yy_clk_en), @445 +// .local_en (qt_clk_en),//Local Condition @446 +// .module_en (cp0_fpu_icg_en) @447 +// ); @448 +assign qt_clk_en = srt_sm_on || + ex1_pipedown_gate; + +//qt_rt_const_shift_std[29:0] is const data for on fly round +// which is used to record the times of round +// +//total_qt_rt[29:0] is total quotient +//total_qt_rt_minus[29:0] is total quotient minus +// which is used to generate quotient rapidly +always @(posedge qt_clk) +begin + if(ex1_pipedown) + begin + qt_rt_const_shift_std[29:0] <= {1'b0,1'b1,28'b0}; + total_qt_rt_30[29:0] <= 30'b0; + total_qt_rt_minus_30[29:0] <= 30'b0; + end + else if(srt_sm_on) + begin + qt_rt_const_shift_std[29:0] <= qt_rt_const_shift_std_next[29:0]; + total_qt_rt_30[29:0] <= total_qt_rt_30_next[29:0]; + total_qt_rt_minus_30[29:0] <= total_qt_rt_minus_30_next[29:0]; + end + else + begin + qt_rt_const_shift_std[29:0] <= qt_rt_const_shift_std[29:0]; + total_qt_rt_30[29:0] <= total_qt_rt_30[29:0]; + total_qt_rt_minus_30[29:0] <= total_qt_rt_minus_30[29:0]; + end +end +// &Force("output","total_qt_rt_30"); @508 + +//qt_rt_const_q1/q2/q3 for shift 1/2/3 in +assign qt_rt_const_q1[29:0] = qt_rt_const_shift_std[29:0]; +assign qt_rt_const_q2[29:0] = {qt_rt_const_shift_std[28:0],1'b0}; +assign qt_rt_const_q3[29:0] = qt_rt_const_q1[29:0] | + qt_rt_const_q2[29:0]; +//qt_rt_const update value +assign qt_rt_const_shift_std_next[29:0] = {2'b0, qt_rt_const_shift_std[29:2]}; + +//========total_qt_rt & total_qt_rt_minus update value====== +//q(i+1) is the total quotient/root after the (i+1) digit +//is calculated +// q(i+1) qm(i+1) +//d(i+1)=-2 qm(i)+2*shift qm(i)+1*shift +//d(i+1)=-1 qm(i)+3*shift qm(i)+2*shift +//d(i+1)=0 q(i) qm(i)+3*shift +//d(i+1)=1 q(i)+1*shift q(i) +//d(i+1)=2 q(i)+2*shift q(i)+1*shift +//Note: +//shift = 4^(-i-1), qm(i+1)=q(i+1)-shift + +//pre select for quotient +assign total_qt_rt_pre_sel[29:0] = (rem_sign) ? + total_qt_rt_minus_30[29:0] : + total_qt_rt_30[29:0]; +//when the quotient is 2 or -2 +assign qt_rt_const_pre_sel_q2[29:0] = qt_rt_const_q2[29:0]; +assign qt_rt_mins_const_pre_sel_q2[29:0] = qt_rt_const_q1[29:0]; +//when the quotient is 1 or -1 +assign qt_rt_const_pre_sel_q1[29:0] = (rem_sign) ? + qt_rt_const_q3[29:0] ://-1 + qt_rt_const_q1[29:0]; //1 +assign qt_rt_mins_const_pre_sel_q1[29:0] = (rem_sign) ? + qt_rt_const_q2[29:0] : //-1 + 30'b0; + +//After bound compare, the final selection +// &CombBeg; @546 +always @( qt_rt_const_q3[29:0] + or qt_rt_mins_const_pre_sel_q1[29:0] + or bound1_cmp_sign + or total_qt_rt_30[29:0] + or qt_rt_mins_const_pre_sel_q2[29:0] + or total_qt_rt_minus_30[29:0] + or bound2_cmp_sign + or qt_rt_const_pre_sel_q2[29:0] + or qt_rt_const_pre_sel_q1[29:0] + or total_qt_rt_pre_sel[29:0]) +begin +casez({bound1_cmp_sign,bound2_cmp_sign}) + 2'b00:// the quotient is -2 or 2 + begin + total_qt_rt_30_next[29:0] = total_qt_rt_pre_sel[29:0] | + qt_rt_const_pre_sel_q2[29:0]; + total_qt_rt_minus_30_next[29:0] = total_qt_rt_pre_sel[29:0] | + qt_rt_mins_const_pre_sel_q2[29:0]; + end + 2'b01:// quotient is -1 or 1 + begin + total_qt_rt_30_next[29:0] = total_qt_rt_pre_sel[29:0] | + qt_rt_const_pre_sel_q1[29:0]; + total_qt_rt_minus_30_next[29:0] = total_qt_rt_pre_sel[29:0] | + qt_rt_mins_const_pre_sel_q1[29:0]; + end + 2'b1?: // quotient is 0 + begin + total_qt_rt_30_next[29:0] = total_qt_rt_30[29:0]; + total_qt_rt_minus_30_next[29:0] = total_qt_rt_minus_30[29:0] | + qt_rt_const_q3[29:0]; + end + default: + begin + total_qt_rt_30_next[29:0] = 30'b0; + total_qt_rt_minus_30_next[29:0] = 30'b0; + end +endcase +// &CombEnd; @574 +end + +//========================================================== +// on fly round method to generate cur remainder +//========================================================== +//Division emainder add value +//Quoit 1 +assign div_qt_1_rem_add_op1[31:0] = ~{3'b0,srt_divisor[23:0],5'b0}; +//Quoit 2 +assign div_qt_2_rem_add_op1[31:0] = ~{2'b0,srt_divisor[23:0],6'b0}; +//Quoit -1 +assign div_qt_r1_rem_add_op1[31:0] = {3'b0,srt_divisor[23:0],5'b0}; +//Quoit -2 +assign div_qt_r2_rem_add_op1[31:0] = {2'b0,srt_divisor[23:0],6'b0}; + +//Sqrt remainder add value op1 +//Quoit 1 +assign sqrt_qt_1_rem_add_op1[31:0] = ~({2'b0,total_qt_rt_30[29:0]} | + {3'b0,qt_rt_const_q1[29:1]}); +//Quoit 2 +assign sqrt_qt_2_rem_add_op1[31:0] = ~({1'b0,total_qt_rt_30[29:0],1'b0} | + {1'b0,qt_rt_const_q1[29:0],1'b0}); +//Quoit -1 +assign sqrt_qt_r1_rem_add_op1[31:0] = {2'b0,total_qt_rt_minus_30[29:0]} | + {1'b0,qt_rt_const_q1[29:0],1'b0} | + {2'b0,qt_rt_const_q1[29:0]} | + {3'b0,qt_rt_const_q1[29:1]}; +//Quoit -2 +assign sqrt_qt_r2_rem_add_op1[31:0] = {1'b0, + total_qt_rt_minus_30[29:0],1'b0} | + {qt_rt_const_q1[29:0],2'b0} | + {1'b0,qt_rt_const_q1[29:0],1'b0}; +//Remainder Adder select logic +// &CombBeg; @607 +always @( div_qt_2_rem_add_op1[31:0] + or sqrt_qt_r2_rem_add_op1[31:0] + or sqrt_qt_r1_rem_add_op1[31:0] + or rem_sign + or div_qt_r2_rem_add_op1[31:0] + or div_qt_1_rem_add_op1[31:0] + or sqrt_qt_2_rem_add_op1[31:0] + or fdsu_ex2_sqrt + or div_qt_r1_rem_add_op1[31:0] + or sqrt_qt_1_rem_add_op1[31:0]) +begin +case({rem_sign,fdsu_ex2_sqrt}) + 2'b01: + begin + rem_add1_op1[31:0] = sqrt_qt_1_rem_add_op1[31:0]; + rem_add2_op1[31:0] = sqrt_qt_2_rem_add_op1[31:0]; + end + 2'b00: + begin + rem_add1_op1[31:0] = div_qt_1_rem_add_op1[31:0]; + rem_add2_op1[31:0] = div_qt_2_rem_add_op1[31:0]; + end + 2'b11: + begin + rem_add1_op1[31:0] = sqrt_qt_r1_rem_add_op1[31:0]; + rem_add2_op1[31:0] = sqrt_qt_r2_rem_add_op1[31:0]; + end + 2'b10: + begin + rem_add1_op1[31:0] = div_qt_r1_rem_add_op1[31:0]; + rem_add2_op1[31:0] = div_qt_r2_rem_add_op1[31:0]; + end + default : + begin + rem_add1_op1[31:0] = 32'b0; + rem_add2_op1[31:0] = 32'b0; + end + endcase +// &CombEnd; @635 +end +assign srt_remainder_shift[31:0] = {srt_remainder[31], + srt_remainder[28:0],2'b0}; +//Remainder add +assign cur_doub_rem_1[31:0] = srt_remainder_shift[31:0] + + rem_add1_op1[31:0] + + {31'b0, ~rem_sign}; +assign cur_doub_rem_2[31:0] = srt_remainder_shift[31:0] + + rem_add2_op1[31:0] + + {31'b0, ~rem_sign}; +assign cur_rem_1[31:0] = cur_doub_rem_1[31:0]; +assign cur_rem_2[31:0] = cur_doub_rem_2[31:0]; +//Generate srt remainder update value +// &CombBeg; @648 +always @( cur_rem_2[31:0] + or bound1_cmp_sign + or srt_remainder_shift[31:0] + or bound2_cmp_sign + or cur_rem_1[31:0]) +begin +case({bound1_cmp_sign,bound2_cmp_sign}) + 2'b00: cur_rem[31:0] = cur_rem_2[31:0]; //+-2 + 2'b01: cur_rem[31:0] = cur_rem_1[31:0]; //+-1 + default: cur_rem[31:0] = srt_remainder_shift[31:0]; //0 +endcase +// &CombEnd; @654 +end +assign srt_remainder_nxt[31:0] = cur_rem[31:0]; + +//Remainder is zero signal in EX3 +assign srt_remainder_zero = ~|srt_remainder[31:0]; +// &Force("output","srt_remainder_zero"); @659 +assign srt_remainder_sign = srt_remainder[31]; + +// &Force("output", "ex2_uf"); @662 +// &ModuleEnd; @663 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_top.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_top.v new file mode 100644 index 000000000..00ba00a50 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_top.v @@ -0,0 +1,461 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @23 +module pa_fdsu_top( + cp0_fpu_icg_en, + cp0_fpu_xx_dqnan, + cp0_yy_clk_en, + cpurst_b, + ctrl_fdsu_ex1_sel, + ctrl_xx_ex1_cmplt_dp, + ctrl_xx_ex1_inst_vld, + ctrl_xx_ex1_stall, + ctrl_xx_ex1_warm_up, + ctrl_xx_ex2_warm_up, + ctrl_xx_ex3_warm_up, + dp_xx_ex1_cnan, + dp_xx_ex1_id, + dp_xx_ex1_inf, + dp_xx_ex1_qnan, + dp_xx_ex1_rm, + dp_xx_ex1_snan, + dp_xx_ex1_zero, + fdsu_fpu_debug_info, + fdsu_fpu_ex1_cmplt, + fdsu_fpu_ex1_cmplt_dp, + fdsu_fpu_ex1_fflags, + fdsu_fpu_ex1_special_sel, + fdsu_fpu_ex1_special_sign, + fdsu_fpu_ex1_stall, + fdsu_fpu_no_op, + fdsu_frbus_data, + fdsu_frbus_fflags, + fdsu_frbus_freg, + fdsu_frbus_wb_vld, + forever_cpuclk, + frbus_fdsu_wb_grant, + idu_fpu_ex1_dst_freg, + idu_fpu_ex1_eu_sel, + idu_fpu_ex1_func, + idu_fpu_ex1_srcf0, + idu_fpu_ex1_srcf1, + pad_yy_icg_scan_en, + rtu_xx_ex1_cancel, + rtu_xx_ex2_cancel, + rtu_yy_xx_async_flush, + rtu_yy_xx_flush +); + +// &Ports; @24 +input cp0_fpu_icg_en; +input cp0_fpu_xx_dqnan; +input cp0_yy_clk_en; +input cpurst_b; +input ctrl_fdsu_ex1_sel; +input ctrl_xx_ex1_cmplt_dp; +input ctrl_xx_ex1_inst_vld; +input ctrl_xx_ex1_stall; +input ctrl_xx_ex1_warm_up; +input ctrl_xx_ex2_warm_up; +input ctrl_xx_ex3_warm_up; +input [2 :0] dp_xx_ex1_cnan; +input [2 :0] dp_xx_ex1_id; +input [2 :0] dp_xx_ex1_inf; +input [2 :0] dp_xx_ex1_qnan; +input [2 :0] dp_xx_ex1_rm; +input [2 :0] dp_xx_ex1_snan; +input [2 :0] dp_xx_ex1_zero; +input forever_cpuclk; +input frbus_fdsu_wb_grant; +input [4 :0] idu_fpu_ex1_dst_freg; +input [2 :0] idu_fpu_ex1_eu_sel; +input [9 :0] idu_fpu_ex1_func; +input [31:0] idu_fpu_ex1_srcf0; +input [31:0] idu_fpu_ex1_srcf1; +input pad_yy_icg_scan_en; +input rtu_xx_ex1_cancel; +input rtu_xx_ex2_cancel; +input rtu_yy_xx_async_flush; +input rtu_yy_xx_flush; +output [4 :0] fdsu_fpu_debug_info; +output fdsu_fpu_ex1_cmplt; +output fdsu_fpu_ex1_cmplt_dp; +output [4 :0] fdsu_fpu_ex1_fflags; +output [7 :0] fdsu_fpu_ex1_special_sel; +output [3 :0] fdsu_fpu_ex1_special_sign; +output fdsu_fpu_ex1_stall; +output fdsu_fpu_no_op; +output [31:0] fdsu_frbus_data; +output [4 :0] fdsu_frbus_fflags; +output [4 :0] fdsu_frbus_freg; +output fdsu_frbus_wb_vld; + +// &Regs; @25 + +// &Wires; @26 +wire cp0_fpu_icg_en; +wire cp0_fpu_xx_dqnan; +wire cp0_yy_clk_en; +wire cpurst_b; +wire ctrl_fdsu_ex1_sel; +wire ctrl_xx_ex1_cmplt_dp; +wire ctrl_xx_ex1_inst_vld; +wire ctrl_xx_ex1_stall; +wire ctrl_xx_ex1_warm_up; +wire ctrl_xx_ex2_warm_up; +wire ctrl_xx_ex3_warm_up; +wire [2 :0] dp_xx_ex1_cnan; +wire [2 :0] dp_xx_ex1_id; +wire [2 :0] dp_xx_ex1_inf; +wire [2 :0] dp_xx_ex1_qnan; +wire [2 :0] dp_xx_ex1_rm; +wire [2 :0] dp_xx_ex1_snan; +wire [2 :0] dp_xx_ex1_zero; +wire ex1_div; +wire [23:0] ex1_divisor; +wire [12:0] ex1_expnt_adder_op0; +wire [12:0] ex1_expnt_adder_op1; +wire ex1_of_result_lfn; +wire ex1_op0_id; +wire ex1_op0_norm; +wire ex1_op0_sign; +wire ex1_op1_id; +wire ex1_op1_id_vld; +wire ex1_op1_norm; +wire ex1_op1_sel; +wire [12:0] ex1_oper_id_expnt; +wire [12:0] ex1_oper_id_expnt_f; +wire [51:0] ex1_oper_id_frac; +wire [51:0] ex1_oper_id_frac_f; +wire ex1_pipedown; +wire ex1_pipedown_gate; +wire [31:0] ex1_remainder; +wire ex1_result_sign; +wire [2 :0] ex1_rm; +wire ex1_save_op0; +wire ex1_save_op0_gate; +wire ex1_sqrt; +wire ex1_srt_skip; +wire [9 :0] ex2_expnt_adder_op0; +wire ex2_of; +wire ex2_pipe_clk; +wire ex2_pipedown; +wire ex2_potnt_of; +wire ex2_potnt_uf; +wire ex2_result_inf; +wire ex2_result_lfn; +wire ex2_rslt_denorm; +wire [9 :0] ex2_srt_expnt_rst; +wire ex2_srt_first_round; +wire ex2_uf; +wire ex2_uf_srt_skip; +wire [9 :0] ex3_expnt_adjust_result; +wire [25:0] ex3_frac_final_rst; +wire ex3_pipedown; +wire ex3_rslt_denorm; +wire fdsu_ex1_sel; +wire fdsu_ex3_id_srt_skip; +wire fdsu_ex3_rem_sign; +wire fdsu_ex3_rem_zero; +wire [23:0] fdsu_ex3_result_denorm_round_add_num; +wire fdsu_ex4_denorm_to_tiny_frac; +wire [25:0] fdsu_ex4_frac; +wire fdsu_ex4_nx; +wire [1 :0] fdsu_ex4_potnt_norm; +wire fdsu_ex4_result_nor; +wire [4 :0] fdsu_fpu_debug_info; +wire fdsu_fpu_ex1_cmplt; +wire fdsu_fpu_ex1_cmplt_dp; +wire [4 :0] fdsu_fpu_ex1_fflags; +wire [7 :0] fdsu_fpu_ex1_special_sel; +wire [3 :0] fdsu_fpu_ex1_special_sign; +wire fdsu_fpu_ex1_stall; +wire fdsu_fpu_no_op; +wire [31:0] fdsu_frbus_data; +wire [4 :0] fdsu_frbus_fflags; +wire [4 :0] fdsu_frbus_freg; +wire fdsu_frbus_wb_vld; +wire fdsu_yy_div; +wire [9 :0] fdsu_yy_expnt_rst; +wire fdsu_yy_of; +wire fdsu_yy_of_rm_lfn; +wire fdsu_yy_op0_norm; +wire fdsu_yy_op1_norm; +wire fdsu_yy_potnt_of; +wire fdsu_yy_potnt_uf; +wire fdsu_yy_result_inf; +wire fdsu_yy_result_lfn; +wire fdsu_yy_result_sign; +wire [2 :0] fdsu_yy_rm; +wire fdsu_yy_rslt_denorm; +wire fdsu_yy_sqrt; +wire fdsu_yy_uf; +wire [4 :0] fdsu_yy_wb_freg; +wire forever_cpuclk; +wire frbus_fdsu_wb_grant; +wire [4 :0] idu_fpu_ex1_dst_freg; +wire [2 :0] idu_fpu_ex1_eu_sel; +wire [9 :0] idu_fpu_ex1_func; +wire [31:0] idu_fpu_ex1_srcf0; +wire [31:0] idu_fpu_ex1_srcf1; +wire pad_yy_icg_scan_en; +wire rtu_xx_ex1_cancel; +wire rtu_xx_ex2_cancel; +wire rtu_yy_xx_async_flush; +wire rtu_yy_xx_flush; +wire srt_remainder_zero; +wire srt_sm_on; +wire [29:0] total_qt_rt_30; + + + +// &Instance("pa_fdsu_special"); @29 +pa_fdsu_special x_pa_fdsu_special ( + .cp0_fpu_xx_dqnan (cp0_fpu_xx_dqnan ), + .dp_xx_ex1_cnan (dp_xx_ex1_cnan ), + .dp_xx_ex1_id (dp_xx_ex1_id ), + .dp_xx_ex1_inf (dp_xx_ex1_inf ), + .dp_xx_ex1_qnan (dp_xx_ex1_qnan ), + .dp_xx_ex1_snan (dp_xx_ex1_snan ), + .dp_xx_ex1_zero (dp_xx_ex1_zero ), + .ex1_div (ex1_div ), + .ex1_op0_id (ex1_op0_id ), + .ex1_op0_norm (ex1_op0_norm ), + .ex1_op0_sign (ex1_op0_sign ), + .ex1_op1_id (ex1_op1_id ), + .ex1_op1_norm (ex1_op1_norm ), + .ex1_result_sign (ex1_result_sign ), + .ex1_sqrt (ex1_sqrt ), + .ex1_srt_skip (ex1_srt_skip ), + .fdsu_fpu_ex1_fflags (fdsu_fpu_ex1_fflags ), + .fdsu_fpu_ex1_special_sel (fdsu_fpu_ex1_special_sel ), + .fdsu_fpu_ex1_special_sign (fdsu_fpu_ex1_special_sign) +); + +// &Instance("pa_fdsu_prepare"); @30 +pa_fdsu_prepare x_pa_fdsu_prepare ( + .dp_xx_ex1_rm (dp_xx_ex1_rm ), + .ex1_div (ex1_div ), + .ex1_divisor (ex1_divisor ), + .ex1_expnt_adder_op0 (ex1_expnt_adder_op0), + .ex1_expnt_adder_op1 (ex1_expnt_adder_op1), + .ex1_of_result_lfn (ex1_of_result_lfn ), + .ex1_op0_id (ex1_op0_id ), + .ex1_op0_sign (ex1_op0_sign ), + .ex1_op1_id (ex1_op1_id ), + .ex1_op1_id_vld (ex1_op1_id_vld ), + .ex1_op1_sel (ex1_op1_sel ), + .ex1_oper_id_expnt (ex1_oper_id_expnt ), + .ex1_oper_id_expnt_f (ex1_oper_id_expnt_f), + .ex1_oper_id_frac (ex1_oper_id_frac ), + .ex1_oper_id_frac_f (ex1_oper_id_frac_f ), + .ex1_remainder (ex1_remainder ), + .ex1_result_sign (ex1_result_sign ), + .ex1_rm (ex1_rm ), + .ex1_sqrt (ex1_sqrt ), + .fdsu_ex1_sel (fdsu_ex1_sel ), + .idu_fpu_ex1_func (idu_fpu_ex1_func ), + .idu_fpu_ex1_srcf0 (idu_fpu_ex1_srcf0 ), + .idu_fpu_ex1_srcf1 (idu_fpu_ex1_srcf1 ) +); + +// &Instance("pa_fdsu_srt"); @32 +// &Instance("pa_fdsu_round"); @33 +// &Instance("pa_fdsu_pack"); @34 +// &Instance("pa_fdsu_srt_single", "x_pa_fdsu_srt"); @36 +pa_fdsu_srt_single x_pa_fdsu_srt ( + .cp0_fpu_icg_en (cp0_fpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .ex1_divisor (ex1_divisor ), + .ex1_expnt_adder_op1 (ex1_expnt_adder_op1 ), + .ex1_oper_id_frac (ex1_oper_id_frac ), + .ex1_oper_id_frac_f (ex1_oper_id_frac_f ), + .ex1_pipedown (ex1_pipedown ), + .ex1_pipedown_gate (ex1_pipedown_gate ), + .ex1_remainder (ex1_remainder ), + .ex1_save_op0 (ex1_save_op0 ), + .ex1_save_op0_gate (ex1_save_op0_gate ), + .ex2_expnt_adder_op0 (ex2_expnt_adder_op0 ), + .ex2_of (ex2_of ), + .ex2_pipe_clk (ex2_pipe_clk ), + .ex2_pipedown (ex2_pipedown ), + .ex2_potnt_of (ex2_potnt_of ), + .ex2_potnt_uf (ex2_potnt_uf ), + .ex2_result_inf (ex2_result_inf ), + .ex2_result_lfn (ex2_result_lfn ), + .ex2_rslt_denorm (ex2_rslt_denorm ), + .ex2_srt_expnt_rst (ex2_srt_expnt_rst ), + .ex2_srt_first_round (ex2_srt_first_round ), + .ex2_uf (ex2_uf ), + .ex2_uf_srt_skip (ex2_uf_srt_skip ), + .ex3_frac_final_rst (ex3_frac_final_rst ), + .ex3_pipedown (ex3_pipedown ), + .fdsu_ex3_id_srt_skip (fdsu_ex3_id_srt_skip ), + .fdsu_ex3_rem_sign (fdsu_ex3_rem_sign ), + .fdsu_ex3_rem_zero (fdsu_ex3_rem_zero ), + .fdsu_ex3_result_denorm_round_add_num (fdsu_ex3_result_denorm_round_add_num), + .fdsu_ex4_frac (fdsu_ex4_frac ), + .fdsu_yy_div (fdsu_yy_div ), + .fdsu_yy_of_rm_lfn (fdsu_yy_of_rm_lfn ), + .fdsu_yy_op0_norm (fdsu_yy_op0_norm ), + .fdsu_yy_op1_norm (fdsu_yy_op1_norm ), + .fdsu_yy_sqrt (fdsu_yy_sqrt ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .srt_remainder_zero (srt_remainder_zero ), + .srt_sm_on (srt_sm_on ), + .total_qt_rt_30 (total_qt_rt_30 ) +); + +// &Instance("pa_fdsu_round_single", "x_pa_fdsu_round"); @37 +pa_fdsu_round_single x_pa_fdsu_round ( + .cp0_fpu_icg_en (cp0_fpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .ex3_expnt_adjust_result (ex3_expnt_adjust_result ), + .ex3_frac_final_rst (ex3_frac_final_rst ), + .ex3_pipedown (ex3_pipedown ), + .ex3_rslt_denorm (ex3_rslt_denorm ), + .fdsu_ex3_id_srt_skip (fdsu_ex3_id_srt_skip ), + .fdsu_ex3_rem_sign (fdsu_ex3_rem_sign ), + .fdsu_ex3_rem_zero (fdsu_ex3_rem_zero ), + .fdsu_ex3_result_denorm_round_add_num (fdsu_ex3_result_denorm_round_add_num), + .fdsu_ex4_denorm_to_tiny_frac (fdsu_ex4_denorm_to_tiny_frac ), + .fdsu_ex4_nx (fdsu_ex4_nx ), + .fdsu_ex4_potnt_norm (fdsu_ex4_potnt_norm ), + .fdsu_ex4_result_nor (fdsu_ex4_result_nor ), + .fdsu_yy_expnt_rst (fdsu_yy_expnt_rst ), + .fdsu_yy_result_inf (fdsu_yy_result_inf ), + .fdsu_yy_result_lfn (fdsu_yy_result_lfn ), + .fdsu_yy_result_sign (fdsu_yy_result_sign ), + .fdsu_yy_rm (fdsu_yy_rm ), + .fdsu_yy_rslt_denorm (fdsu_yy_rslt_denorm ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .total_qt_rt_30 (total_qt_rt_30 ) +); + +// &Instance("pa_fdsu_pack_single", "x_pa_fdsu_pack"); @38 +pa_fdsu_pack_single x_pa_fdsu_pack ( + .fdsu_ex4_denorm_to_tiny_frac (fdsu_ex4_denorm_to_tiny_frac), + .fdsu_ex4_frac (fdsu_ex4_frac ), + .fdsu_ex4_nx (fdsu_ex4_nx ), + .fdsu_ex4_potnt_norm (fdsu_ex4_potnt_norm ), + .fdsu_ex4_result_nor (fdsu_ex4_result_nor ), + .fdsu_frbus_data (fdsu_frbus_data ), + .fdsu_frbus_fflags (fdsu_frbus_fflags ), + .fdsu_frbus_freg (fdsu_frbus_freg ), + .fdsu_yy_expnt_rst (fdsu_yy_expnt_rst ), + .fdsu_yy_of (fdsu_yy_of ), + .fdsu_yy_of_rm_lfn (fdsu_yy_of_rm_lfn ), + .fdsu_yy_potnt_of (fdsu_yy_potnt_of ), + .fdsu_yy_potnt_uf (fdsu_yy_potnt_uf ), + .fdsu_yy_result_inf (fdsu_yy_result_inf ), + .fdsu_yy_result_lfn (fdsu_yy_result_lfn ), + .fdsu_yy_result_sign (fdsu_yy_result_sign ), + .fdsu_yy_rslt_denorm (fdsu_yy_rslt_denorm ), + .fdsu_yy_uf (fdsu_yy_uf ), + .fdsu_yy_wb_freg (fdsu_yy_wb_freg ) +); + + +// &Instance("pa_fdsu_ctrl"); @41 +pa_fdsu_ctrl x_pa_fdsu_ctrl ( + .cp0_fpu_icg_en (cp0_fpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .ctrl_fdsu_ex1_sel (ctrl_fdsu_ex1_sel ), + .ctrl_xx_ex1_cmplt_dp (ctrl_xx_ex1_cmplt_dp ), + .ctrl_xx_ex1_inst_vld (ctrl_xx_ex1_inst_vld ), + .ctrl_xx_ex1_stall (ctrl_xx_ex1_stall ), + .ctrl_xx_ex1_warm_up (ctrl_xx_ex1_warm_up ), + .ctrl_xx_ex2_warm_up (ctrl_xx_ex2_warm_up ), + .ctrl_xx_ex3_warm_up (ctrl_xx_ex3_warm_up ), + .ex1_div (ex1_div ), + .ex1_expnt_adder_op0 (ex1_expnt_adder_op0 ), + .ex1_of_result_lfn (ex1_of_result_lfn ), + .ex1_op0_id (ex1_op0_id ), + .ex1_op0_norm (ex1_op0_norm ), + .ex1_op1_id_vld (ex1_op1_id_vld ), + .ex1_op1_norm (ex1_op1_norm ), + .ex1_op1_sel (ex1_op1_sel ), + .ex1_oper_id_expnt (ex1_oper_id_expnt ), + .ex1_oper_id_expnt_f (ex1_oper_id_expnt_f ), + .ex1_pipedown (ex1_pipedown ), + .ex1_pipedown_gate (ex1_pipedown_gate ), + .ex1_result_sign (ex1_result_sign ), + .ex1_rm (ex1_rm ), + .ex1_save_op0 (ex1_save_op0 ), + .ex1_save_op0_gate (ex1_save_op0_gate ), + .ex1_sqrt (ex1_sqrt ), + .ex1_srt_skip (ex1_srt_skip ), + .ex2_expnt_adder_op0 (ex2_expnt_adder_op0 ), + .ex2_of (ex2_of ), + .ex2_pipe_clk (ex2_pipe_clk ), + .ex2_pipedown (ex2_pipedown ), + .ex2_potnt_of (ex2_potnt_of ), + .ex2_potnt_uf (ex2_potnt_uf ), + .ex2_result_inf (ex2_result_inf ), + .ex2_result_lfn (ex2_result_lfn ), + .ex2_rslt_denorm (ex2_rslt_denorm ), + .ex2_srt_expnt_rst (ex2_srt_expnt_rst ), + .ex2_srt_first_round (ex2_srt_first_round ), + .ex2_uf (ex2_uf ), + .ex2_uf_srt_skip (ex2_uf_srt_skip ), + .ex3_expnt_adjust_result (ex3_expnt_adjust_result), + .ex3_pipedown (ex3_pipedown ), + .ex3_rslt_denorm (ex3_rslt_denorm ), + .fdsu_ex1_sel (fdsu_ex1_sel ), + .fdsu_fpu_debug_info (fdsu_fpu_debug_info ), + .fdsu_fpu_ex1_cmplt (fdsu_fpu_ex1_cmplt ), + .fdsu_fpu_ex1_cmplt_dp (fdsu_fpu_ex1_cmplt_dp ), + .fdsu_fpu_ex1_stall (fdsu_fpu_ex1_stall ), + .fdsu_fpu_no_op (fdsu_fpu_no_op ), + .fdsu_frbus_wb_vld (fdsu_frbus_wb_vld ), + .fdsu_yy_div (fdsu_yy_div ), + .fdsu_yy_expnt_rst (fdsu_yy_expnt_rst ), + .fdsu_yy_of (fdsu_yy_of ), + .fdsu_yy_of_rm_lfn (fdsu_yy_of_rm_lfn ), + .fdsu_yy_op0_norm (fdsu_yy_op0_norm ), + .fdsu_yy_op1_norm (fdsu_yy_op1_norm ), + .fdsu_yy_potnt_of (fdsu_yy_potnt_of ), + .fdsu_yy_potnt_uf (fdsu_yy_potnt_uf ), + .fdsu_yy_result_inf (fdsu_yy_result_inf ), + .fdsu_yy_result_lfn (fdsu_yy_result_lfn ), + .fdsu_yy_result_sign (fdsu_yy_result_sign ), + .fdsu_yy_rm (fdsu_yy_rm ), + .fdsu_yy_rslt_denorm (fdsu_yy_rslt_denorm ), + .fdsu_yy_sqrt (fdsu_yy_sqrt ), + .fdsu_yy_uf (fdsu_yy_uf ), + .fdsu_yy_wb_freg (fdsu_yy_wb_freg ), + .forever_cpuclk (forever_cpuclk ), + .frbus_fdsu_wb_grant (frbus_fdsu_wb_grant ), + .idu_fpu_ex1_dst_freg (idu_fpu_ex1_dst_freg ), + .idu_fpu_ex1_eu_sel (idu_fpu_ex1_eu_sel ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .rtu_xx_ex1_cancel (rtu_xx_ex1_cancel ), + .rtu_xx_ex2_cancel (rtu_xx_ex2_cancel ), + .rtu_yy_xx_async_flush (rtu_yy_xx_async_flush ), + .rtu_yy_xx_flush (rtu_yy_xx_flush ), + .srt_remainder_zero (srt_remainder_zero ), + .srt_sm_on (srt_sm_on ) +); + + + +// &ModuleEnd; @44 +endmodule + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v new file mode 100644 index 000000000..f774511be --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v @@ -0,0 +1,299 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +module pa_fpu_dp( + cp0_fpu_icg_en, + cp0_fpu_xx_rm, + cp0_yy_clk_en, + ctrl_xx_ex1_inst_vld, + ctrl_xx_ex1_stall, + ctrl_xx_ex1_warm_up, + dp_frbus_ex2_data, + dp_frbus_ex2_fflags, + dp_xx_ex1_cnan, + dp_xx_ex1_id, + dp_xx_ex1_inf, + dp_xx_ex1_norm, + dp_xx_ex1_qnan, + dp_xx_ex1_snan, + dp_xx_ex1_zero, + ex2_inst_wb, + fdsu_fpu_ex1_fflags, + fdsu_fpu_ex1_special_sel, + fdsu_fpu_ex1_special_sign, + forever_cpuclk, + idu_fpu_ex1_eu_sel, + idu_fpu_ex1_func, + idu_fpu_ex1_gateclk_vld, + idu_fpu_ex1_rm, + idu_fpu_ex1_srcf0, + idu_fpu_ex1_srcf1, + idu_fpu_ex1_srcf2, + pad_yy_icg_scan_en +); + +input cp0_fpu_icg_en; +input [2 :0] cp0_fpu_xx_rm; +input cp0_yy_clk_en; +input ctrl_xx_ex1_inst_vld; +input ctrl_xx_ex1_stall; +input ctrl_xx_ex1_warm_up; +input [4 :0] fdsu_fpu_ex1_fflags; +input [7 :0] fdsu_fpu_ex1_special_sel; +input [3 :0] fdsu_fpu_ex1_special_sign; +input forever_cpuclk; +input [2 :0] idu_fpu_ex1_eu_sel; +input [9 :0] idu_fpu_ex1_func; +input idu_fpu_ex1_gateclk_vld; +input [2 :0] idu_fpu_ex1_rm; +input [31:0] idu_fpu_ex1_srcf0; +input [31:0] idu_fpu_ex1_srcf1; +input [31:0] idu_fpu_ex1_srcf2; +input pad_yy_icg_scan_en; +output [31:0] dp_frbus_ex2_data; +output [4 :0] dp_frbus_ex2_fflags; +output [2 :0] dp_xx_ex1_cnan; +output [2 :0] dp_xx_ex1_id; +output [2 :0] dp_xx_ex1_inf; +output [2 :0] dp_xx_ex1_norm; +output [2 :0] dp_xx_ex1_qnan; +output [2 :0] dp_xx_ex1_snan; +output [2 :0] dp_xx_ex1_zero; +output ex2_inst_wb; + +reg [4 :0] ex1_fflags; +reg [31:0] ex1_special_data; +reg [8 :0] ex1_special_sel; +reg [3 :0] ex1_special_sign; +reg [4 :0] ex2_fflags; +reg [31:0] ex2_result; +reg [31:0] ex2_special_data; +reg [6 :0] ex2_special_sel; +reg [3 :0] ex2_special_sign; + +wire cp0_fpu_icg_en; +wire [2 :0] cp0_fpu_xx_rm; +wire cp0_yy_clk_en; +wire ctrl_xx_ex1_inst_vld; +wire ctrl_xx_ex1_stall; +wire ctrl_xx_ex1_warm_up; +wire [31:0] dp_frbus_ex2_data; +wire [4 :0] dp_frbus_ex2_fflags; +wire [2 :0] dp_xx_ex1_cnan; +wire [2 :0] dp_xx_ex1_id; +wire [2 :0] dp_xx_ex1_inf; +wire [2 :0] dp_xx_ex1_norm; +wire [2 :0] dp_xx_ex1_qnan; +wire [2 :0] dp_xx_ex1_snan; +wire [2 :0] dp_xx_ex1_zero; +wire [2 :0] ex1_decode_rm; +wire ex1_double; +wire [2 :0] ex1_eu_sel; +wire [9 :0] ex1_func; +wire [2 :0] ex1_global_rm; +wire [2 :0] ex1_rm; +wire ex1_single; +wire [31:0] ex1_special_data_final; +wire [63:0] ex1_src0; +wire [63:0] ex1_src1; +wire [63:0] ex1_src2; +wire ex1_src2_vld; +wire [2 :0] ex1_src_cnan; +wire [2 :0] ex1_src_id; +wire [2 :0] ex1_src_inf; +wire [2 :0] ex1_src_norm; +wire [2 :0] ex1_src_qnan; +wire [2 :0] ex1_src_snan; +wire [2 :0] ex1_src_zero; +wire ex2_data_clk; +wire ex2_data_clk_en; +wire ex2_inst_wb; +wire [4 :0] fdsu_fpu_ex1_fflags; +wire [7 :0] fdsu_fpu_ex1_special_sel; +wire [3 :0] fdsu_fpu_ex1_special_sign; +wire forever_cpuclk; +wire [2 :0] idu_fpu_ex1_eu_sel; +wire [9 :0] idu_fpu_ex1_func; +wire idu_fpu_ex1_gateclk_vld; +wire [2 :0] idu_fpu_ex1_rm; +wire [31:0] idu_fpu_ex1_srcf0; +wire [31:0] idu_fpu_ex1_srcf1; +wire [31:0] idu_fpu_ex1_srcf2; +wire pad_yy_icg_scan_en; + + +parameter DOUBLE_WIDTH =64; +parameter SINGLE_WIDTH =32; +parameter FUNC_WIDTH =10; +//========================================================== +// EX1 special data path +//========================================================== +assign ex1_eu_sel[2:0] = idu_fpu_ex1_eu_sel[2:0]; //3'h4 +assign ex1_func[FUNC_WIDTH-1:0] = idu_fpu_ex1_func[FUNC_WIDTH-1:0]; +assign ex1_global_rm[2:0] = cp0_fpu_xx_rm[2:0]; +assign ex1_decode_rm[2:0] = idu_fpu_ex1_rm[2:0]; + +assign ex1_rm[2:0] = (ex1_decode_rm[2:0]==3'b111) + ? ex1_global_rm[2:0] : ex1_decode_rm[2:0]; + +assign ex1_src2_vld = idu_fpu_ex1_eu_sel[1] && ex1_func[0]; + +assign ex1_src0[DOUBLE_WIDTH-1:0] = { {SINGLE_WIDTH{1'b1}},idu_fpu_ex1_srcf0[SINGLE_WIDTH-1:0]}; +assign ex1_src1[DOUBLE_WIDTH-1:0] = { {SINGLE_WIDTH{1'b1}},idu_fpu_ex1_srcf1[SINGLE_WIDTH-1:0]}; +assign ex1_src2[DOUBLE_WIDTH-1:0] = ex1_src2_vld ? { {SINGLE_WIDTH{1'b1}},idu_fpu_ex1_srcf2[SINGLE_WIDTH-1:0]} + : { {SINGLE_WIDTH{1'b1}},{SINGLE_WIDTH{1'b0}} }; + +assign ex1_double = 1'b0; +assign ex1_single = 1'b1; + +//========================================================== +// EX1 special src data judge +//========================================================== +pa_fpu_src_type x_pa_fpu_ex1_srcf0_type ( + .inst_double (ex1_double ), + .inst_single (ex1_single ), + .src_cnan (ex1_src_cnan[0]), + .src_id (ex1_src_id[0] ), + .src_in (ex1_src0 ), + .src_inf (ex1_src_inf[0] ), + .src_norm (ex1_src_norm[0]), + .src_qnan (ex1_src_qnan[0]), + .src_snan (ex1_src_snan[0]), + .src_zero (ex1_src_zero[0]) +); + +pa_fpu_src_type x_pa_fpu_ex1_srcf1_type ( + .inst_double (ex1_double ), + .inst_single (ex1_single ), + .src_cnan (ex1_src_cnan[1]), + .src_id (ex1_src_id[1] ), + .src_in (ex1_src1 ), + .src_inf (ex1_src_inf[1] ), + .src_norm (ex1_src_norm[1]), + .src_qnan (ex1_src_qnan[1]), + .src_snan (ex1_src_snan[1]), + .src_zero (ex1_src_zero[1]) +); + +pa_fpu_src_type x_pa_fpu_ex1_srcf2_type ( + .inst_double (ex1_double ), + .inst_single (ex1_single ), + .src_cnan (ex1_src_cnan[2]), + .src_id (ex1_src_id[2] ), + .src_in (ex1_src2 ), + .src_inf (ex1_src_inf[2] ), + .src_norm (ex1_src_norm[2]), + .src_qnan (ex1_src_qnan[2]), + .src_snan (ex1_src_snan[2]), + .src_zero (ex1_src_zero[2]) +); + +assign dp_xx_ex1_cnan[2:0] = ex1_src_cnan[2:0]; +assign dp_xx_ex1_snan[2:0] = ex1_src_snan[2:0]; +assign dp_xx_ex1_qnan[2:0] = ex1_src_qnan[2:0]; +assign dp_xx_ex1_norm[2:0] = ex1_src_norm[2:0]; +assign dp_xx_ex1_zero[2:0] = ex1_src_zero[2:0]; +assign dp_xx_ex1_inf[2:0] = ex1_src_inf[2:0]; +assign dp_xx_ex1_id[2:0] = ex1_src_id[2:0]; + +//========================================================== +// EX1 special result judge +//========================================================== + +always @( fdsu_fpu_ex1_special_sign[3:0] + or fdsu_fpu_ex1_fflags[4:0] + or ex1_eu_sel[2:0] + or fdsu_fpu_ex1_special_sel[7:0]) +begin +case(ex1_eu_sel[2:0]) //3'h4 + 3'b100: begin//FDSU + ex1_fflags[4:0] = fdsu_fpu_ex1_fflags[4:0]; + ex1_special_sel[8:0] ={1'b0,fdsu_fpu_ex1_special_sel[7:0]}; + ex1_special_sign[3:0] = fdsu_fpu_ex1_special_sign[3:0]; + end +default: begin//FDSU + ex1_fflags[4:0] = {5{1'b0}}; + ex1_special_sel[8:0] = {9{1'b0}}; + ex1_special_sign[3:0] = {4{1'b0}}; + end +endcase +end + +always @( ex1_special_sel[8:5] + or ex1_src0[31:0] + or ex1_src1[31:0] + or ex1_src2[31:0]) +begin +case(ex1_special_sel[8:5]) + 4'b0001: ex1_special_data[SINGLE_WIDTH-1:0] = ex1_src0[SINGLE_WIDTH-1:0]; + 4'b0010: ex1_special_data[SINGLE_WIDTH-1:0] = ex1_src1[SINGLE_WIDTH-1:0]; + 4'b0100: ex1_special_data[SINGLE_WIDTH-1:0] = ex1_src2[SINGLE_WIDTH-1:0]; +default : ex1_special_data[SINGLE_WIDTH-1:0] = ex1_src2[SINGLE_WIDTH-1:0]; +endcase +end + +assign ex1_special_data_final[SINGLE_WIDTH-1:0] = ex1_special_data[SINGLE_WIDTH-1:0]; + +//========================================================== +// EX1-EX2 data pipedown +//========================================================== +assign ex2_data_clk_en = idu_fpu_ex1_gateclk_vld || ctrl_xx_ex1_warm_up; + +gated_clk_cell x_fpu_data_ex2_gated_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex2_data_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex2_data_clk_en ), + .module_en (cp0_fpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +always @(posedge ex2_data_clk) +begin + if(ctrl_xx_ex1_inst_vld && !ctrl_xx_ex1_stall || ctrl_xx_ex1_warm_up) + begin + ex2_fflags[4:0] <= ex1_fflags[4:0]; + ex2_special_sign[3:0] <= ex1_special_sign[3:0]; + ex2_special_sel[6:0] <={ex1_special_sel[8],|ex1_special_sel[7:5],ex1_special_sel[4:0]}; + ex2_special_data[SINGLE_WIDTH-1:0] <= ex1_special_data_final[SINGLE_WIDTH-1:0]; + end +end + +assign ex2_inst_wb = (|ex2_special_sel[6:0]); + +always @( ex2_special_sel[6:0] + or ex2_special_data[31:0] + or ex2_special_sign[3:0]) +begin +case(ex2_special_sel[6:0]) + 7'b0000_001: ex2_result[SINGLE_WIDTH-1:0] = { ex2_special_sign[0],ex2_special_data[SINGLE_WIDTH-2:0]};//src2 + 7'b0000_010: ex2_result[SINGLE_WIDTH-1:0] = { ex2_special_sign[1], {31{1'b0}} };//zero + 7'b0000_100: ex2_result[SINGLE_WIDTH-1:0] = { ex2_special_sign[2], {8{1'b1}},{23{1'b0}} };//inf + 7'b0001_000: ex2_result[SINGLE_WIDTH-1:0] = { ex2_special_sign[3], {7{1'b1}},1'b0,{23{1'b1}} };//lfn + 7'b0010_000: ex2_result[SINGLE_WIDTH-1:0] = { 1'b0, {8{1'b1}},1'b1, {22{1'b0}} };//cnan + 7'b0100_000: ex2_result[SINGLE_WIDTH-1:0] = { ex2_special_data[31],{8{1'b1}}, 1'b1, ex2_special_data[21:0]};//propagate qnan + 7'b1000_000: ex2_result[SINGLE_WIDTH-1:0] = ex2_special_data[SINGLE_WIDTH-1:0]; //ex1 falu special result + default: ex2_result[SINGLE_WIDTH-1:0] = {SINGLE_WIDTH{1'b0}}; +endcase +end + +assign dp_frbus_ex2_data[SINGLE_WIDTH-1:0] = ex2_result[SINGLE_WIDTH-1:0]; +assign dp_frbus_ex2_fflags[4:0] = ex2_fflags[4:0]; + +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v new file mode 100644 index 000000000..ad586cf72 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v @@ -0,0 +1,90 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +module pa_fpu_frbus( + ctrl_frbus_ex2_wb_req, + dp_frbus_ex2_data, + dp_frbus_ex2_fflags, + fdsu_frbus_data, + fdsu_frbus_fflags, + fdsu_frbus_wb_vld, + fpu_idu_fwd_data, + fpu_idu_fwd_fflags, + fpu_idu_fwd_vld +); + +input ctrl_frbus_ex2_wb_req; +input [31:0] dp_frbus_ex2_data; +input [4 :0] dp_frbus_ex2_fflags; +input [31:0] fdsu_frbus_data; +input [4 :0] fdsu_frbus_fflags; +input fdsu_frbus_wb_vld; +output [31:0] fpu_idu_fwd_data; +output [4 :0] fpu_idu_fwd_fflags; +output fpu_idu_fwd_vld; + +reg [31:0] frbus_wb_data; +reg [4 :0] frbus_wb_fflags; + +wire ctrl_frbus_ex2_wb_req; +wire [31:0] fdsu_frbus_data; +wire [4 :0] fdsu_frbus_fflags; +wire fdsu_frbus_wb_vld; +wire [31:0] fpu_idu_fwd_data; +wire [4 :0] fpu_idu_fwd_fflags; +wire fpu_idu_fwd_vld; +wire frbus_ex2_wb_vld; +wire frbus_fdsu_wb_vld; +wire frbus_wb_vld; +wire [3 :0] frbus_source_vld; + + +//========================================================== +// Input Signal Rename +//========================================================== +assign frbus_fdsu_wb_vld = fdsu_frbus_wb_vld; +assign frbus_ex2_wb_vld = ctrl_frbus_ex2_wb_req; +assign frbus_source_vld[3:0] = {1'b0, 1'b0, frbus_ex2_wb_vld, frbus_fdsu_wb_vld}; +assign frbus_wb_vld = frbus_ex2_wb_vld | frbus_fdsu_wb_vld; + +always @( frbus_source_vld[3:0] + or fdsu_frbus_data[31:0] + or dp_frbus_ex2_data[31:0] + or fdsu_frbus_fflags[4:0] + or dp_frbus_ex2_fflags[4:0]) +begin + case(frbus_source_vld[3:0]) + 4'b0001: begin // DIV + frbus_wb_data[31:0] = fdsu_frbus_data[31:0]; + frbus_wb_fflags[4:0] = fdsu_frbus_fflags[4:0]; + end + 4'b0010: begin // EX2 + frbus_wb_data[31:0] = dp_frbus_ex2_data[31:0]; + frbus_wb_fflags[4:0] = dp_frbus_ex2_fflags[4:0]; + end + default: begin + frbus_wb_data[31:0] = {31{1'b0}}; + frbus_wb_fflags[4:0] = 5'b0; + end + endcase +end + +assign fpu_idu_fwd_vld = frbus_wb_vld; +assign fpu_idu_fwd_fflags[4:0] = frbus_wb_fflags[4:0]; +assign fpu_idu_fwd_data[31:0] = frbus_wb_data[31:0]; + +endmodule + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v new file mode 100644 index 000000000..6df3a584a --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v @@ -0,0 +1,92 @@ +/*Copyright 2020-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @24 +module pa_fpu_src_type( + inst_double, + inst_single, + src_cnan, + src_id, + src_in, + src_inf, + src_norm, + src_qnan, + src_snan, + src_zero +); + +// &Ports; @25 +input inst_double; +input inst_single; +input [63:0] src_in; +output src_cnan; +output src_id; +output src_inf; +output src_norm; +output src_qnan; +output src_snan; +output src_zero; + +// &Regs; @26 + +// &Wires; @27 +wire inst_double; +wire inst_single; +wire [63:0] src; +wire src_cnan; +wire src_expn_max; +wire src_expn_zero; +wire src_frac_msb; +wire src_frac_zero; +wire src_id; +wire [63:0] src_in; +wire src_inf; +wire src_norm; +wire src_qnan; +wire src_snan; +wire src_zero; + + +// &Depend("cpu_cfig.h"); @29 +assign src[63:0] = src_in[63:0]; + +assign src_cnan = !(&src[63:32]) && inst_single; + +assign src_expn_zero = !(|src[62:52]) && inst_double || + !(|src[30:23]) && inst_single; + +assign src_expn_max = (&src[62:52]) && inst_double || + (&src[30:23]) && inst_single; + +assign src_frac_zero = !(|src[51:0]) && inst_double || + !(|src[22:0]) && inst_single; + +assign src_frac_msb = src[51] && inst_double || src[22] && inst_single; + +assign src_snan = src_expn_max && !src_frac_msb && !src_frac_zero && !src_cnan; +assign src_qnan = src_expn_max && src_frac_msb || src_cnan; +assign src_zero = src_expn_zero && src_frac_zero && !src_cnan; +assign src_id = src_expn_zero && !src_frac_zero && !src_cnan; +assign src_inf = src_expn_max && src_frac_zero && !src_cnan; +assign src_norm =!(src_expn_zero && src_frac_zero) && + ! src_expn_max && !src_cnan; + +// &Force("output","src_cnan"); @53 + +// &ModuleEnd; @55 +endmodule + + + diff --git a/rtl/vendor/pulp_platform_fpnew/vendor/opene906/LICENSE b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/rtl/vendor/pulp_platform_fpnew/vendor/opene906/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From d3d06b7746c9e34af51da9706446f230ae86d71c Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Mon, 19 Jun 2023 16:56:51 +0200 Subject: [PATCH 25/38] Updated CVFPU instantiation to align to 0.8.0 release Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_fp_wrapper.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtl/cv32e40p_fp_wrapper.sv b/rtl/cv32e40p_fp_wrapper.sv index 15be96e5e..042fa0f22 100644 --- a/rtl/cv32e40p_fp_wrapper.sv +++ b/rtl/cv32e40p_fp_wrapper.sv @@ -97,6 +97,7 @@ module cv32e40p_fp_wrapper fpnew_top #( .Features (FPU_FEATURES), .Implementation(FPU_IMPLEMENTATION), + .PulpDivsqrt (1'b0), .TagType (logic) ) i_fpnew_bulk ( .clk_i (clk_i), @@ -110,6 +111,7 @@ module cv32e40p_fp_wrapper .int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)), .vectorial_op_i(fpu_vec_op), .tag_i (1'b0), + .simd_mask_i ('b0), .in_valid_i (apu_req_i), .in_ready_o (apu_gnt_o), .flush_i (1'b0), From 4858cda191438daa3adc33d0ab68e7dcc071490d Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Mon, 19 Jun 2023 16:58:00 +0200 Subject: [PATCH 26/38] Updated manifest to align to CVFPU 0.8.0 release Signed-off-by: Pascal Gouedo --- cv32e40p_fpu_manifest.flist | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cv32e40p_fpu_manifest.flist b/cv32e40p_fpu_manifest.flist index 8916f07a7..aca8c41b2 100644 --- a/cv32e40p_fpu_manifest.flist +++ b/cv32e40p_fpu_manifest.flist @@ -60,14 +60,20 @@ ${DESIGN_RTL_DIR}/cv32e40p_core.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_common_cells/src/cf_math_pkg.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_common_cells/src/rr_arb_tree.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_common_cells/src/lzc.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/control_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv -${DESIGN_RTL_DIR}/vendor/pulp_platform_fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_pkg.sv +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ctrl.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_ff1.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_pack_single.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_prepare.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_round_single.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_special.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_srt_single.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fdsu/rtl/pa_fdsu_top.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v +${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_divsqrt_th_32.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_classifier.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_rounding.sv ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_cast_multi.sv From 5c16628e556a2aebcfbe69d6b3cc9187011fe34e Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Wed, 21 Jun 2023 16:59:02 +0200 Subject: [PATCH 27/38] Added a constraint about HWloop start, and and setup instructions alignment. Corrected assembly example wrt assembly alignment directives. Signed-off-by: Pascal Gouedo --- docs/source/corev_hw_loop.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/source/corev_hw_loop.rst b/docs/source/corev_hw_loop.rst index 848e63d88..6168c713f 100644 --- a/docs/source/corev_hw_loop.rst +++ b/docs/source/corev_hw_loop.rst @@ -50,7 +50,9 @@ must generate a fatal error`` with a meaningfull message related to Hardware Loo The HWLoop constraints are: -- Start and End addresses of an HWLoop must be 32-bit aligned. +- HWLoop start, end and setup instructions addresses must be 32-bit aligned (short or long commands). + +- Start and End addresses of an HWLoop body must be 32-bit aligned. - End Address must be strictly greater than Start Address. @@ -102,11 +104,15 @@ Below an assembly code example of a nested HWLoop that computes a matrix additio "add %[i],x0, x0;" "add %[j],x0, x0;" "cv.count 1, %[N];" + ".balign 4;" "cv.endi 1, endO;" "cv.starti 1, startO;" + "any instructions here" + ".balign 4;" "cv.endi 0, endZ;" "cv.starti 0, startZ;" - ".align 4;" + "any instructions here" + ".balign 4;" ".option norvc;" "startO:;" " cv.count 0, %[N];" From 34f55d8a3d31e1f1db62985c9035f4fc90418c12 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Fri, 23 Jun 2023 14:24:18 +0200 Subject: [PATCH 28/38] Changed parameters values for Zfinx CSR presence. Signed-off-by: Pascal Gouedo --- docs/source/control_status_registers.rst | 312 +++++++++++------------ 1 file changed, 156 insertions(+), 156 deletions(-) diff --git a/docs/source/control_status_registers.rst b/docs/source/control_status_registers.rst index 891c7c2f3..3c3f951a5 100644 --- a/docs/source/control_status_registers.rst +++ b/docs/source/control_status_registers.rst @@ -64,159 +64,159 @@ instruction exception. :widths: 13 17 15 55 :class: no-scrollbar-table - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | **CSR Address** | **Name** | **Privilege** | **Description** | - +=================+===================+===============+==============================================================+ - | **User CSRs** | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x001 | ``fflags`` | URW | Floating-point accrued exceptions. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x002 | ``frm`` | URW | Floating-point dynamic rounding mode. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x003 | ``fcsr`` | URW | Floating-point control and status register. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC00 | ``cycle`` | URO | (HPM) Cycle Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC02 | ``instret`` | URO | (HPM) Instructions-Retired Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC03 | ``hpmcounter3`` | URO | (HPM) Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC1F | ``hpmcounter31`` | URO | (HPM) Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC80 | ``cycleh`` | URO | (HPM) Upper 32 bits Cycle Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC82 | ``instreth`` | URO | (HPM) Upper 32 bits Instructions-Retired Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC83 | ``hpmcounterh3`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xC9F | ``hpmcounterh31`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | **User Custom CSRs** | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC0 | ``lpstart0`` | URO | Hardware Loop 0 Start. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC1 | ``lpend0`` | URO | Hardware Loop 0 End. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC2 | ``lpcount0`` | URO | Hardware Loop 0 Counter. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC4 | ``lpstart1`` | URO | Hardware Loop 1 Start. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC5 | ``lpend1`` | URO | Hardware Loop 1 End. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCC6 | ``lpcount1`` | URO | Hardware Loop 1 Counter. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCD0 | ``uhartid`` | URO | Hardware Thread ID | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCD1 | ``privlv`` | URO | Privilege Level | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xCD2 | ``zfinx`` | URO | ``ZFINX`` ISA | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | **Machine CSRs** | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x300 | ``mstatus`` | MRW | Machine Status | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x301 | ``misa`` | MRW | Machine ISA | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x304 | ``mie`` | MRW | Machine Interrupt Enable register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x305 | ``mtvec`` | MRW | Machine Trap-Handler Base Address | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x320 | ``mcountinhibit`` | MRW | (HPM) Machine Counter-Inhibit register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x323 | ``mhpmevent3`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x33F | ``mhpmevent31`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 31 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x340 | ``mscratch`` | MRW | Machine Scratch | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x341 | ``mepc`` | MRW | Machine Exception Program Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x342 | ``mcause`` | MRW | Machine Trap Cause | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x343 | ``mtval`` | MRW | Machine Trap Value | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x344 | ``mip`` | MRW | Machine Interrupt Pending register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A0 | ``tselect`` | MRW | Trigger Select register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A1 | ``tdata1`` | MRW | Trigger Data register 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A2 | ``tdata2`` | MRW | Trigger Data register 2 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A3 | ``tdata3`` | MRW | Trigger Data register 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A4 | ``tinfo`` | MRO | Trigger Info | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7A8 | ``mcontext`` | MRW | Machine Context register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7AA | ``scontext`` | MRW | Machine Context register | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7B0 | ``dcsr`` | DRW | Debug Control and Status | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7B1 | ``dpc`` | DRW | Debug PC | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7B2 | ``dscratch0`` | DRW | Debug Scratch register 0 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0x7B3 | ``dscratch1`` | DRW | Debug Scratch register 1 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB00 | ``mcycle`` | MRW | (HPM) Machine Cycle Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB02 | ``minstret`` | MRW | (HPM) Machine Instructions-Retired Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB03 | ``mhpmcounter3`` | MRW | (HPM) Machine Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB1F | ``mhpmcounter31`` | MRW | (HPM) Machine Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB80 | ``mcycleh`` | MRW | (HPM) Upper 32 bits Machine Cycle Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB82 | ``minstreth`` | MRW | (HPM) Upper 32 bits Machine Instructions-Retired Counter | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB83 | ``mhpmcounterh3`` | MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xB9F | ``mhpmcounterh31``| MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 31| - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xF11 | ``mvendorid`` | MRO | Machine Vendor ID | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xF12 | ``marchid`` | MRO | Machine Architecture ID | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xF13 | ``mimpid`` | MRO | Machine Implementation ID | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ - | 0xF14 | ``mhartid`` | MRO | Hardware Thread ID | - +-----------------+-------------------+---------------+--------------------------------------------------------------+ + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | **CSR Address** | **Name** | **Privilege** | **Description** | + +=================+===================+===============+================================================================+ + | **User CSRs** | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x001 | ``fflags`` | URW | Floating-point accrued exceptions. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x002 | ``frm`` | URW | Floating-point dynamic rounding mode. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x003 | ``fcsr`` | URW | Floating-point control and status register. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC00 | ``cycle`` | URO | (HPM) Cycle Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC02 | ``instret`` | URO | (HPM) Instructions-Retired Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC03 | ``hpmcounter3`` | URO | (HPM) Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC1F | ``hpmcounter31`` | URO | (HPM) Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC80 | ``cycleh`` | URO | (HPM) Upper 32 bits Cycle Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC82 | ``instreth`` | URO | (HPM) Upper 32 bits Instructions-Retired Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC83 | ``hpmcounterh3`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xC9F | ``hpmcounterh31`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | **User Custom CSRs** | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC0 | ``lpstart0`` | URO | Hardware Loop 0 Start. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC1 | ``lpend0`` | URO | Hardware Loop 0 End. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC2 | ``lpcount0`` | URO | Hardware Loop 0 Counter. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC4 | ``lpstart1`` | URO | Hardware Loop 1 Start. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC5 | ``lpend1`` | URO | Hardware Loop 1 End. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCC6 | ``lpcount1`` | URO | Hardware Loop 1 Counter. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCD0 | ``uhartid`` | URO | Hardware Thread ID | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCD1 | ``privlv`` | URO | Privilege Level | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xCD2 | ``zfinx`` | URO | ``ZFINX`` ISA | + | | | | | + | | | | Only present if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1) | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | **Machine CSRs** | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x300 | ``mstatus`` | MRW | Machine Status | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x301 | ``misa`` | MRW | Machine ISA | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x304 | ``mie`` | MRW | Machine Interrupt Enable register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x305 | ``mtvec`` | MRW | Machine Trap-Handler Base Address | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x320 | ``mcountinhibit`` | MRW | (HPM) Machine Counter-Inhibit register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x323 | ``mhpmevent3`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x33F | ``mhpmevent31`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 31 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x340 | ``mscratch`` | MRW | Machine Scratch | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x341 | ``mepc`` | MRW | Machine Exception Program Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x342 | ``mcause`` | MRW | Machine Trap Cause | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x343 | ``mtval`` | MRW | Machine Trap Value | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x344 | ``mip`` | MRW | Machine Interrupt Pending register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A0 | ``tselect`` | MRW | Trigger Select register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A1 | ``tdata1`` | MRW | Trigger Data register 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A2 | ``tdata2`` | MRW | Trigger Data register 2 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A3 | ``tdata3`` | MRW | Trigger Data register 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A4 | ``tinfo`` | MRO | Trigger Info | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7A8 | ``mcontext`` | MRW | Machine Context register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7AA | ``scontext`` | MRW | Machine Context register | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7B0 | ``dcsr`` | DRW | Debug Control and Status | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7B1 | ``dpc`` | DRW | Debug PC | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7B2 | ``dscratch0`` | DRW | Debug Scratch register 0 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0x7B3 | ``dscratch1`` | DRW | Debug Scratch register 1 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB00 | ``mcycle`` | MRW | (HPM) Machine Cycle Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB02 | ``minstret`` | MRW | (HPM) Machine Instructions-Retired Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB03 | ``mhpmcounter3`` | MRW | (HPM) Machine Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB1F | ``mhpmcounter31`` | MRW | (HPM) Machine Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB80 | ``mcycleh`` | MRW | (HPM) Upper 32 bits Machine Cycle Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB82 | ``minstreth`` | MRW | (HPM) Upper 32 bits Machine Instructions-Retired Counter | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB83 | ``mhpmcounterh3`` | MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xB9F | ``mhpmcounterh31``| MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xF11 | ``mvendorid`` | MRO | Machine Vendor ID | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xF12 | ``marchid`` | MRO | Machine Architecture ID | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xF13 | ``mimpid`` | MRO | Machine Implementation ID | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ + | 0xF14 | ``mhartid`` | MRO | Hardware Thread ID | + +-----------------+-------------------+---------------+----------------------------------------------------------------+ .. only:: USER @@ -440,7 +440,7 @@ Detailed: | | | | | | | SD set to 1 if **FS** = 11 meaning Floating point State is dirty so save/restore is needed in case of context switch. | | | | | - | | | Hardwired to 0 if ``FPU`` = 0 or ``ZFINX`` = 1. | + | | | Hardwired to 0 if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1). | +-------------+-----------+-------------------------------------------------------------------------------------------------------------------------+ | 30:15 | RO | Unimplemented, hardwired to 0. | +-------------+-----------+-------------------------------------------------------------------------------------------------------------------------+ @@ -454,7 +454,7 @@ Detailed: | | | | | | | 11 = Dirty | | | | | - | | | Hardwired to 0 if ``FPU`` = 0 or ``ZFINX`` = 1. | + | | | Hardwired to 0 if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1). | +-------------+-----------+-------------------------------------------------------------------------------------------------------------------------+ | 12:11 | RO | **MPP:** Machine Previous Priviledge mode | | | | | @@ -1705,7 +1705,7 @@ Detailed: ZFINX ISA (``zfinx``) ~~~~~~~~~~~~~~~~~~~~~ -CSR Address: 0xCD2 (only present if ``FPU`` = 1) +CSR Address: 0xCD2 (only present if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1)) Reset Value: Defined From 02eea3a800e6d8edca1b18377b5dfec426cd98ab Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Fri, 23 Jun 2023 14:34:42 +0200 Subject: [PATCH 29/38] Aligned RTL to PR #823 (Zfinx CSR presence). Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/cv32e40p_decoder.sv b/rtl/cv32e40p_decoder.sv index 406f52341..ca304c570 100644 --- a/rtl/cv32e40p_decoder.sv +++ b/rtl/cv32e40p_decoder.sv @@ -2923,7 +2923,7 @@ module cv32e40p_decoder // ZFINX CSR_ZFINX : - if (!FPU || csr_op != CSR_OP_READ) begin + if ((FPU && !ZFINX) || csr_op != CSR_OP_READ) begin csr_illegal = 1'b1; end From 00c12f8418604b4f9d95d84dbabed66110164853 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Fri, 23 Jun 2023 15:26:50 +0200 Subject: [PATCH 30/38] Final resolution of issue #170 Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_decoder.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/cv32e40p_decoder.sv b/rtl/cv32e40p_decoder.sv index ca304c570..c27c384fe 100644 --- a/rtl/cv32e40p_decoder.sv +++ b/rtl/cv32e40p_decoder.sv @@ -2774,11 +2774,11 @@ module cv32e40p_decoder case (instr_rdata_i[31:20]) // Floating point CSR_FFLAGS : - if (FPU == 0) csr_illegal = 1'b1; + if (FPU == 0 || fs_off_i == 1'b1) csr_illegal = 1'b1; CSR_FRM, CSR_FCSR : - if (FPU == 0) begin + if (FPU == 0 || fs_off_i == 1'b1) begin csr_illegal = 1'b1; end else begin // FRM updated value needed by following FPU instruction From f31c20b7f6646ecdfe673f954d7007c54a65d2a9 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Mon, 26 Jun 2023 15:05:30 +0200 Subject: [PATCH 31/38] Added a condition so that new Zfinx CSR only present when COREV_PULP = 1 Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/cv32e40p_decoder.sv b/rtl/cv32e40p_decoder.sv index c27c384fe..d03027bae 100644 --- a/rtl/cv32e40p_decoder.sv +++ b/rtl/cv32e40p_decoder.sv @@ -2923,7 +2923,7 @@ module cv32e40p_decoder // ZFINX CSR_ZFINX : - if ((FPU && !ZFINX) || csr_op != CSR_OP_READ) begin + if (!COREV_PULP || (FPU && !ZFINX) || csr_op != CSR_OP_READ) begin csr_illegal = 1'b1; end From 37b43acb9da3cf6ed73393287cd2506b1538fe0f Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Mon, 26 Jun 2023 15:27:38 +0200 Subject: [PATCH 32/38] Added a condition so that new Zfinx CSR only present when COREV_PULP = 1 Signed-off-by: Pascal Gouedo --- docs/source/control_status_registers.rst | 309 ++++++++++++----------- 1 file changed, 155 insertions(+), 154 deletions(-) diff --git a/docs/source/control_status_registers.rst b/docs/source/control_status_registers.rst index 3c3f951a5..c8565e3fc 100644 --- a/docs/source/control_status_registers.rst +++ b/docs/source/control_status_registers.rst @@ -64,159 +64,160 @@ instruction exception. :widths: 13 17 15 55 :class: no-scrollbar-table - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | **CSR Address** | **Name** | **Privilege** | **Description** | - +=================+===================+===============+================================================================+ - | **User CSRs** | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x001 | ``fflags`` | URW | Floating-point accrued exceptions. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x002 | ``frm`` | URW | Floating-point dynamic rounding mode. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x003 | ``fcsr`` | URW | Floating-point control and status register. | - | | | | | - | | | | Only present if ``FPU`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC00 | ``cycle`` | URO | (HPM) Cycle Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC02 | ``instret`` | URO | (HPM) Instructions-Retired Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC03 | ``hpmcounter3`` | URO | (HPM) Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC1F | ``hpmcounter31`` | URO | (HPM) Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC80 | ``cycleh`` | URO | (HPM) Upper 32 bits Cycle Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC82 | ``instreth`` | URO | (HPM) Upper 32 bits Instructions-Retired Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC83 | ``hpmcounterh3`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xC9F | ``hpmcounterh31`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | **User Custom CSRs** | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC0 | ``lpstart0`` | URO | Hardware Loop 0 Start. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC1 | ``lpend0`` | URO | Hardware Loop 0 End. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC2 | ``lpcount0`` | URO | Hardware Loop 0 Counter. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC4 | ``lpstart1`` | URO | Hardware Loop 1 Start. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC5 | ``lpend1`` | URO | Hardware Loop 1 End. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCC6 | ``lpcount1`` | URO | Hardware Loop 1 Counter. | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCD0 | ``uhartid`` | URO | Hardware Thread ID | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCD1 | ``privlv`` | URO | Privilege Level | - | | | | | - | | | | Only present if ``COREV_PULP`` = 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xCD2 | ``zfinx`` | URO | ``ZFINX`` ISA | - | | | | | - | | | | Only present if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1) | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | **Machine CSRs** | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x300 | ``mstatus`` | MRW | Machine Status | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x301 | ``misa`` | MRW | Machine ISA | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x304 | ``mie`` | MRW | Machine Interrupt Enable register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x305 | ``mtvec`` | MRW | Machine Trap-Handler Base Address | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x320 | ``mcountinhibit`` | MRW | (HPM) Machine Counter-Inhibit register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x323 | ``mhpmevent3`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x33F | ``mhpmevent31`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 31 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x340 | ``mscratch`` | MRW | Machine Scratch | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x341 | ``mepc`` | MRW | Machine Exception Program Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x342 | ``mcause`` | MRW | Machine Trap Cause | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x343 | ``mtval`` | MRW | Machine Trap Value | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x344 | ``mip`` | MRW | Machine Interrupt Pending register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A0 | ``tselect`` | MRW | Trigger Select register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A1 | ``tdata1`` | MRW | Trigger Data register 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A2 | ``tdata2`` | MRW | Trigger Data register 2 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A3 | ``tdata3`` | MRW | Trigger Data register 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A4 | ``tinfo`` | MRO | Trigger Info | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7A8 | ``mcontext`` | MRW | Machine Context register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7AA | ``scontext`` | MRW | Machine Context register | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7B0 | ``dcsr`` | DRW | Debug Control and Status | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7B1 | ``dpc`` | DRW | Debug PC | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7B2 | ``dscratch0`` | DRW | Debug Scratch register 0 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0x7B3 | ``dscratch1`` | DRW | Debug Scratch register 1 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB00 | ``mcycle`` | MRW | (HPM) Machine Cycle Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB02 | ``minstret`` | MRW | (HPM) Machine Instructions-Retired Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB03 | ``mhpmcounter3`` | MRW | (HPM) Machine Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB1F | ``mhpmcounter31`` | MRW | (HPM) Machine Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB80 | ``mcycleh`` | MRW | (HPM) Upper 32 bits Machine Cycle Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB82 | ``minstreth`` | MRW | (HPM) Upper 32 bits Machine Instructions-Retired Counter | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB83 | ``mhpmcounterh3`` | MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 3 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | . . . . | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xB9F | ``mhpmcounterh31``| MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 31 | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xF11 | ``mvendorid`` | MRO | Machine Vendor ID | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xF12 | ``marchid`` | MRO | Machine Architecture ID | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xF13 | ``mimpid`` | MRO | Machine Implementation ID | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ - | 0xF14 | ``mhartid`` | MRO | Hardware Thread ID | - +-----------------+-------------------+---------------+----------------------------------------------------------------+ + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | **CSR Address** | **Name** | **Privilege** | **Description** | + +=================+===================+===============+====================================================================+ + | **User CSRs** | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x001 | ``fflags`` | URW | Floating-point accrued exceptions. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x002 | ``frm`` | URW | Floating-point dynamic rounding mode. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x003 | ``fcsr`` | URW | Floating-point control and status register. | + | | | | | + | | | | Only present if ``FPU`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC00 | ``cycle`` | URO | (HPM) Cycle Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC02 | ``instret`` | URO | (HPM) Instructions-Retired Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC03 | ``hpmcounter3`` | URO | (HPM) Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC1F | ``hpmcounter31`` | URO | (HPM) Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC80 | ``cycleh`` | URO | (HPM) Upper 32 bits Cycle Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC82 | ``instreth`` | URO | (HPM) Upper 32 bits Instructions-Retired Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC83 | ``hpmcounterh3`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xC9F | ``hpmcounterh31`` | URO | (HPM) Upper 32 bits Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | **User Custom CSRs** | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC0 | ``lpstart0`` | URO | Hardware Loop 0 Start. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC1 | ``lpend0`` | URO | Hardware Loop 0 End. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC2 | ``lpcount0`` | URO | Hardware Loop 0 Counter. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC4 | ``lpstart1`` | URO | Hardware Loop 1 Start. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC5 | ``lpend1`` | URO | Hardware Loop 1 End. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCC6 | ``lpcount1`` | URO | Hardware Loop 1 Counter. | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCD0 | ``uhartid`` | URO | Hardware Thread ID | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCD1 | ``privlv`` | URO | Privilege Level | + | | | | | + | | | | Only present if ``COREV_PULP`` = 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xCD2 | ``zfinx`` | URO | ``ZFINX`` ISA | + | | | | | + | | | | Only present if | + | | | | ``COREV_PULP`` = 1 & (``FPU`` = 0 | (``FPU`` = 1 & ``ZFINX`` = 1)) | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | **Machine CSRs** | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x300 | ``mstatus`` | MRW | Machine Status | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x301 | ``misa`` | MRW | Machine ISA | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x304 | ``mie`` | MRW | Machine Interrupt Enable register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x305 | ``mtvec`` | MRW | Machine Trap-Handler Base Address | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x320 | ``mcountinhibit`` | MRW | (HPM) Machine Counter-Inhibit register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x323 | ``mhpmevent3`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x33F | ``mhpmevent31`` | MRW | (HPM) Machine Performance-Monitoring Event Selector 31 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x340 | ``mscratch`` | MRW | Machine Scratch | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x341 | ``mepc`` | MRW | Machine Exception Program Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x342 | ``mcause`` | MRW | Machine Trap Cause | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x343 | ``mtval`` | MRW | Machine Trap Value | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x344 | ``mip`` | MRW | Machine Interrupt Pending register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A0 | ``tselect`` | MRW | Trigger Select register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A1 | ``tdata1`` | MRW | Trigger Data register 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A2 | ``tdata2`` | MRW | Trigger Data register 2 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A3 | ``tdata3`` | MRW | Trigger Data register 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A4 | ``tinfo`` | MRO | Trigger Info | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7A8 | ``mcontext`` | MRW | Machine Context register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7AA | ``scontext`` | MRW | Machine Context register | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7B0 | ``dcsr`` | DRW | Debug Control and Status | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7B1 | ``dpc`` | DRW | Debug PC | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7B2 | ``dscratch0`` | DRW | Debug Scratch register 0 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0x7B3 | ``dscratch1`` | DRW | Debug Scratch register 1 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB00 | ``mcycle`` | MRW | (HPM) Machine Cycle Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB02 | ``minstret`` | MRW | (HPM) Machine Instructions-Retired Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB03 | ``mhpmcounter3`` | MRW | (HPM) Machine Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB1F | ``mhpmcounter31`` | MRW | (HPM) Machine Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB80 | ``mcycleh`` | MRW | (HPM) Upper 32 bits Machine Cycle Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB82 | ``minstreth`` | MRW | (HPM) Upper 32 bits Machine Instructions-Retired Counter | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB83 | ``mhpmcounterh3`` | MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 3 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | . . . . | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xB9F | ``mhpmcounterh31``| MRW | (HPM) Upper 32 bits Machine Performance-Monitoring Counter 31 | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xF11 | ``mvendorid`` | MRO | Machine Vendor ID | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xF12 | ``marchid`` | MRO | Machine Architecture ID | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xF13 | ``mimpid`` | MRO | Machine Implementation ID | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ + | 0xF14 | ``mhartid`` | MRO | Hardware Thread ID | + +-----------------+-------------------+---------------+--------------------------------------------------------------------+ .. only:: USER @@ -1705,7 +1706,7 @@ Detailed: ZFINX ISA (``zfinx``) ~~~~~~~~~~~~~~~~~~~~~ -CSR Address: 0xCD2 (only present if ``FPU`` = 0 or (``FPU`` = 1 and ``ZFINX`` = 1)) +CSR Address: 0xCD2 (only present if ``COREV_PULP`` = 1 & (``FPU`` = 0 | (``FPU`` = 1 & ``ZFINX`` = 1)) ) Reset Value: Defined From ee78c3b6291dbb22c9e4d76e1093446031452efb Mon Sep 17 00:00:00 2001 From: Massimiliano Giacometti Date: Mon, 26 Jun 2023 22:34:29 +0200 Subject: [PATCH 33/38] add skip_aws branch --- .github/workflows/aws.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/aws.yml b/.github/workflows/aws.yml index 414f01980..c313bc796 100644 --- a/.github/workflows/aws.yml +++ b/.github/workflows/aws.yml @@ -23,4 +23,11 @@ jobs: uses: openhwgroup/aws-codebuild-run-build@master with: project-name: cv32e40p - hide-log: true \ No newline at end of file + hide-log: true + skip_aws: + name: no AWS + if: ${{ (github.actor == 'davideschiavone' || github.actor == 'MikeOpenHWGroup' || github.actor == 'zarubaf') $$ github.event.label.name == 'Component:Doc' }} + runs-on: ubuntu-latest + steps: + - name: info + run: echo "Documentation only, skipping LEC on AWS" From ee701f0632c8f934a377df27687409e0d0dcd91c Mon Sep 17 00:00:00 2001 From: Massimiliano Giacometti Date: Tue, 27 Jun 2023 00:08:17 +0200 Subject: [PATCH 34/38] fix --- .github/workflows/aws.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/aws.yml b/.github/workflows/aws.yml index c313bc796..1b0545631 100644 --- a/.github/workflows/aws.yml +++ b/.github/workflows/aws.yml @@ -7,7 +7,7 @@ on: jobs: aws: name: AWS Pipeline (private) - if: ${{ github.actor == 'davideschiavone' || github.actor == 'MikeOpenHWGroup' || github.actor == 'zarubaf' }} + if: ${{ (github.actor == 'davideschiavone' || github.actor == 'MikeOpenHWGroup' || github.actor == 'zarubaf') && github.event.label.name != 'Component:Doc' }} runs-on: ubuntu-latest # These permissions are needed to interact with GitHub's OIDC Token endpoint. permissions: @@ -26,7 +26,7 @@ jobs: hide-log: true skip_aws: name: no AWS - if: ${{ (github.actor == 'davideschiavone' || github.actor == 'MikeOpenHWGroup' || github.actor == 'zarubaf') $$ github.event.label.name == 'Component:Doc' }} + if: ${{ (github.actor == 'davideschiavone' || github.actor == 'MikeOpenHWGroup' || github.actor == 'zarubaf') && github.event.label.name == 'Component:Doc' }} runs-on: ubuntu-latest steps: - name: info From 967d549a8bc2edc135befafa4c1ab0eb3a098ebe Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Tue, 27 Jun 2023 14:24:06 +0200 Subject: [PATCH 35/38] Issues #721 and #723 resolution Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_core.sv | 2 ++ rtl/cv32e40p_load_store_unit.sv | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index 25179aea8..bd1f1bf51 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -901,6 +901,8 @@ module cv32e40p_core .data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline .data_misaligned_o (data_misaligned), + .apu_busy_i (apu_busy), + .p_elw_start_o (p_elw_start), .p_elw_finish_o(p_elw_finish), diff --git a/rtl/cv32e40p_load_store_unit.sv b/rtl/cv32e40p_load_store_unit.sv index 024d43699..83ed526bb 100644 --- a/rtl/cv32e40p_load_store_unit.sv +++ b/rtl/cv32e40p_load_store_unit.sv @@ -59,6 +59,8 @@ module cv32e40p_load_store_unit #( input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline output logic data_misaligned_o, // misaligned access was detected -> to controller + input logic apu_busy_i, + input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage output logic [5:0] data_atop_o, // atomic instruction signal -> core output @@ -74,6 +76,8 @@ module cv32e40p_load_store_unit #( localparam DEPTH = 2; // Maximum number of outstanding transactions + logic data_req_ex_filtered; // data request from ex stage filtered when it is misaligned and there is an on-going APU instruction + // Transaction request (to cv32e40p_obi_interface) logic trans_valid; logic trans_ready; @@ -348,12 +352,14 @@ module cv32e40p_load_store_unit #( // Busy if there are ongoing (or potentially outstanding) transfers assign busy_o = (cnt_q != 2'b00) || trans_valid; + assign data_req_ex_filtered = data_req_ex_i & !(apu_busy_i & (data_misaligned_o | data_misaligned_ex_i)); + ////////////////////////////////////////////////////////////////////////////// // Transaction request generation // // Assumes that corresponding response is at least 1 cycle after request // - // - Only request transaction when EX stage requires data transfer (data_req_ex_i), and + // - Only request transaction when EX stage requires data transfer (data_req_ex_filtered), and // - maximum number of outstanding transactions will not be exceeded (cnt_q < DEPTH) ////////////////////////////////////////////////////////////////////////////// @@ -370,12 +376,12 @@ module cv32e40p_load_store_unit #( // OBI compatible (avoids combinatorial path from data_rvalid_i to data_req_o). // Multiple trans_* transactions can be issued (and accepted) before a response // (resp_*) is received. - assign trans_valid = data_req_ex_i && (cnt_q < DEPTH); + assign trans_valid = data_req_ex_filtered && (cnt_q < DEPTH); end else begin : gen_pulp_obi // Legacy PULP OBI behavior, i.e. only issue subsequent transaction if preceding transfer // is about to finish (re-introducing timing critical path from data_rvalid_i to data_req_o) - assign trans_valid = (cnt_q == 2'b00) ? data_req_ex_i && (cnt_q < DEPTH) : - data_req_ex_i && (cnt_q < DEPTH) && resp_valid; + assign trans_valid = (cnt_q == 2'b00) ? data_req_ex_filtered && (cnt_q < DEPTH) : + data_req_ex_filtered && (cnt_q < DEPTH) && resp_valid; end endgenerate @@ -385,7 +391,7 @@ module cv32e40p_load_store_unit #( // LSU EX stage readyness requires two criteria to be met: // - // - A data request (data_req_ex_i) has been forwarded/accepted (trans_valid && trans_ready) + // - A data request (data_req_ex_filtered) has been forwarded/accepted (trans_valid && trans_ready) // - The LSU WB stage is available such that EX and WB can be updated in lock step // // Default (if there is not even a data request) LSU EX is signaled to be ready, else @@ -394,10 +400,11 @@ module cv32e40p_load_store_unit #( // in case there is already at least one outstanding transaction (so WB is full) the EX // and WB stage can only signal readiness in lock step (so resp_valid is used as well). - assign lsu_ready_ex_o = (data_req_ex_i == 1'b0) ? 1'b1 : - (cnt_q == 2'b00) ? ( trans_valid && trans_ready) : - (cnt_q == 2'b01) ? (resp_valid && trans_valid && trans_ready) : - resp_valid; + assign lsu_ready_ex_o = !(apu_busy_i & (data_misaligned_o | data_misaligned_ex_i)) & + ((data_req_ex_i == 1'b0) ? 1'b1 : + (cnt_q == 2'b00) ? ( trans_valid && trans_ready) : + (cnt_q == 2'b01) ? (resp_valid && trans_valid && trans_ready) : + resp_valid); // Update signals for EX/WB registers (when EX has valid data itself and is ready for next) assign ctrl_update = lsu_ready_ex_o && data_req_ex_i; From 00bb726c53ed45ab6ff9674bb9f71693141e6b22 Mon Sep 17 00:00:00 2001 From: Pascal Gouedo Date: Tue, 27 Jun 2023 14:43:42 +0200 Subject: [PATCH 36/38] verible !!! Signed-off-by: Pascal Gouedo --- rtl/cv32e40p_core.sv | 2 +- rtl/cv32e40p_load_store_unit.sv | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rtl/cv32e40p_core.sv b/rtl/cv32e40p_core.sv index bd1f1bf51..528ac1c8f 100644 --- a/rtl/cv32e40p_core.sv +++ b/rtl/cv32e40p_core.sv @@ -901,7 +901,7 @@ module cv32e40p_core .data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline .data_misaligned_o (data_misaligned), - .apu_busy_i (apu_busy), + .apu_busy_i(apu_busy), .p_elw_start_o (p_elw_start), .p_elw_finish_o(p_elw_finish), diff --git a/rtl/cv32e40p_load_store_unit.sv b/rtl/cv32e40p_load_store_unit.sv index 83ed526bb..8df1d8498 100644 --- a/rtl/cv32e40p_load_store_unit.sv +++ b/rtl/cv32e40p_load_store_unit.sv @@ -364,11 +364,11 @@ module cv32e40p_load_store_unit #( ////////////////////////////////////////////////////////////////////////////// // For last phase of misaligned transfer the address needs to be word aligned (as LSB of data_be will be set) - assign trans_addr = data_misaligned_ex_i ? {data_addr_int[31:2], 2'b00} : data_addr_int; - assign trans_we = data_we_ex_i; - assign trans_be = data_be; + assign trans_addr = data_misaligned_ex_i ? {data_addr_int[31:2], 2'b00} : data_addr_int; + assign trans_we = data_we_ex_i; + assign trans_be = data_be; assign trans_wdata = data_wdata; - assign trans_atop = data_atop_ex_i; + assign trans_atop = data_atop_ex_i; // Transaction request generation generate From f64a91174b3340651577e3e831bbb6ee669fda99 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Tue, 27 Jun 2023 10:37:56 +0800 Subject: [PATCH 37/38] Better dpc monitoring --- bhv/cv32e40p_rvfi.sv | 13 +++++++++++-- bhv/pipe_freeze_trace.sv | 5 +++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index 63d48bde0..c03cac387 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -1104,8 +1104,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; //those event are for debug purpose event e_dev_send_wb_1, e_dev_send_wb_2; event e_dev_commit_rf_to_ex_1, e_dev_commit_rf_to_ex_2, e_dev_commit_rf_to_ex_3; + event e_if_2_id_1, e_if_2_id_2; event e_ex_to_wb_1, e_ex_to_wb_2; event e_id_to_ex_1, e_id_to_ex_2; + event e_commit_dpc; //used to match memory response to memory request and corresponding instruction integer cnt_data_req, cnt_data_resp; @@ -1443,7 +1445,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, frm) `CSR_FROM_PIPE(id, fcsr) - + if (r_pipe_freeze_trace.csr.we) begin + `CSR_FROM_PIPE(id, dpc) + end if (s_fflags_we_non_apu) begin trace_id.m_fflags_we_non_apu = 1'b1; end @@ -1575,6 +1579,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; // trace_id.m_instret_cnt = r_instret_cnt; + `CSR_FROM_PIPE(id, dpc) + ->e_if_2_id_1; end else begin if (trace_id.m_valid) begin `CSR_FROM_PIPE(id, dscratch0) @@ -1593,6 +1599,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; + `CSR_FROM_PIPE(id, dpc) + ->e_if_2_id_2; // trace_id.m_instret_cnt = r_instret_cnt; end @@ -1613,8 +1621,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (!s_id_done) begin - `CSR_FROM_PIPE(id, dpc) + // `CSR_FROM_PIPE(id, dpc) dcsr_to_id(); + ->e_commit_dpc; end if (r_pipe_freeze_trace.pc_set) begin diff --git a/bhv/pipe_freeze_trace.sv b/bhv/pipe_freeze_trace.sv index 220f80261..b5d433df1 100644 --- a/bhv/pipe_freeze_trace.sv +++ b/bhv/pipe_freeze_trace.sv @@ -336,7 +336,7 @@ function compute_csr_we(); r_pipe_freeze_trace.csr.fflags_we = 1'b0; r_pipe_freeze_trace.csr.frm_we = 1'b0; r_pipe_freeze_trace.csr.fcsr_we = 1'b0; - + r_pipe_freeze_trace.csr.dpc_we = csr_dpc_we_i; if (r_pipe_freeze_trace.csr.we) begin case (r_pipe_freeze_trace.csr.addr) CSR_MSTATUS: r_pipe_freeze_trace.csr.mstatus_we = 1'b1; @@ -349,6 +349,7 @@ function compute_csr_we(); CSR_FFLAGS: r_pipe_freeze_trace.csr.fflags_we = 1'b1; CSR_FRM: r_pipe_freeze_trace.csr.frm_we = 1'b1; CSR_FCSR: r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + CSR_DPC: r_pipe_freeze_trace.csr.dpc_we = 1'b1; endcase end // CSR_MCAUSE: r_pipe_freeze_trace.csr.mcause_we = r_pipe_freeze_trace.csr.mcause_n != r_pipe_freeze_trace.csr.mcause_q; //for debug purpose @@ -625,7 +626,7 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.dpc_n = csr_dpc_n_i; r_pipe_freeze_trace.csr.dpc_q = csr_dpc_q_i; - r_pipe_freeze_trace.csr.dpc_we = csr_dpc_we_i; + // r_pipe_freeze_trace.csr.dpc_we = csr_dpc_we_i; r_pipe_freeze_trace.csr.dscratch0_n = csr_dscratch0_n_i; r_pipe_freeze_trace.csr.dscratch0_q = csr_dscratch0_q_i; r_pipe_freeze_trace.csr.dscratch0_we = csr_dscratch0_we_i; From f804833c673d8a587f0bfbb5d0c3cac85d63fb17 Mon Sep 17 00:00:00 2001 From: Yoann Pruvost Date: Wed, 28 Jun 2023 11:36:13 +0800 Subject: [PATCH 38/38] Adding rvfi instruction counter for trap on fpu instructions --- bhv/cv32e40p_rvfi.sv | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/bhv/cv32e40p_rvfi.sv b/bhv/cv32e40p_rvfi.sv index c03cac387..33e45ed31 100644 --- a/bhv/cv32e40p_rvfi.sv +++ b/bhv/cv32e40p_rvfi.sv @@ -1169,18 +1169,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end endfunction - insn_trace_t lsu_trace_q[$]; - insn_trace_t trace_lsu_req, trace_lsu_resp; - bit s_is_misaligned_resp; - - // function void lsu_resp(); - // if(s_is_misaligned_resp) begin - - // end else if(trace_lsu_resp.size() > 0) begin - // trace_lsu_resp = lsu_trace_q.pop_front(); - // end - // endfunction - task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; @@ -1200,6 +1188,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_increase_instret_1; bit s_increase_instret_2; + bit s_test_for_dret; + trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1233,6 +1223,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_increase_instret_1 = 1'b0; s_increase_instret_2 = 1'b0; + s_test_for_dret = 1'b0; + $display("*****Starting pipeline computing*****\n"); forever begin wait(e_pipe_monitor_ok.triggered); @@ -1339,8 +1331,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (!r_pipe_freeze_trace.data_rvalid) begin s_skip_wb = 1'b1; end - // end else if (r_pipe_freeze_trace.data_rvalid && (lsu_trace_q.size() > 0)) begin - // lsu_resp(); end if (trace_wb.m_valid && !s_skip_wb) begin if (r_pipe_freeze_trace.rf_we_wb) begin @@ -1421,8 +1411,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end - s_ex_valid_adjusted = r_pipe_freeze_trace.ex_valid && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); - + s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid || s_test_for_dret) && (r_pipe_freeze_trace.ctrl_fsm_cs == DECODE) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); + s_test_for_dret = r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF; //EX_STAGE if (trace_id.m_valid) begin mtvec_to_id(); @@ -1465,6 +1455,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_apu_req = new(); trace_apu_req.copy_full(trace_id); csr_to_apu_req(); + if(s_increase_instret_2) begin + trace_apu_req.m_instret_cnt = r_instret_cnt + 1; + end else begin + trace_apu_req.m_instret_cnt = r_instret_cnt; + end trace_apu_req.set_to_apu(); apu_trace_q.push_back(trace_apu_req); trace_id.m_valid = 1'b0; @@ -1556,7 +1551,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; end - ->e_id_to_ex_2; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); @@ -1577,8 +1571,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, dscratch1) mstatus_to_id(); - // trace_id.m_instret_cnt = r_instret_cnt; - `CSR_FROM_PIPE(id, dpc) ->e_if_2_id_1; end else begin @@ -1601,7 +1593,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_id_done = 1'b0; `CSR_FROM_PIPE(id, dpc) ->e_if_2_id_2; - // trace_id.m_instret_cnt = r_instret_cnt; end trace_if.m_insn = r_pipe_freeze_trace.instr_if; //Instr comes from if, buffer for one cycle @@ -1621,7 +1612,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (!s_id_done) begin - // `CSR_FROM_PIPE(id, dpc) dcsr_to_id(); ->e_commit_dpc; end