diff --git a/Makefile b/Makefile index ca1a02b0c..f65c4640b 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ environment.yml: python-requirements.txt ## @section Installation ## Generates mcu files core-v-mini-mcu files and build the design with fusesoc -## @param CPU=[cv32e20(default),cv32e40p,cv32e40x] +## @param CPU=[cv32e20(default),cv32e40p,cv32e40x,cv32e40px] ## @param BUS=[onetoM(default),NtoM] ## @param MEMORY_BANKS=[2(default) to (16 - MEMORY_BANKS_IL)] ## @param MEMORY_BANKS_IL=[0(default),2,4,8] @@ -193,7 +193,7 @@ questasim-sim-opt-upf: questasim-sim $(MAKE) -C build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-modelsim opt-upf ## VCS simulation -## @param CPU=cv32e20(default),cv32e40p,cv32e40x +## @param CPU=cv32e20(default),cv32e40p,cv32e40x,cv32e40px ## @param BUS=onetoM(default),NtoM vcs-sim: $(FUSESOC) --cores-root . run --no-export --target=sim --tool=vcs $(FUSESOC_FLAGS) --build openhwgroup.org:systems:core-v-mini-mcu ${FUSESOC_PARAM} 2>&1 | tee buildsim.log diff --git a/docs/source/How_to/eXtendingHEEP.md b/docs/source/How_to/eXtendingHEEP.md index 0f875a5f1..41e9bed70 100644 --- a/docs/source/How_to/eXtendingHEEP.md +++ b/docs/source/How_to/eXtendingHEEP.md @@ -2,14 +2,14 @@ `X-HEEP` is meant to be extended with your own custom IPs. `X-HEEP` itself posseses a hardware-software framework capable of working standalone. If you want to extend it, you will need to merge your hardware and software with `X-HEEP`'s. -For this purpose we support the [CORE-V-XIF](https://docs.openhwgroup.org/projects/openhw-group-core-v-xif/en/latest/intro.html) interface with the [cv32e40x](https://github.com/openhwgroup/cv32e40x) RISCV-CPU, and we expose master and slave ports to/from the bus. +For this purpose we support the [CV-X-IF](https://docs.openhwgroup.org/projects/openhw-group-core-v-xif/en/latest/intro.html) interface with the [cv32e40x](https://github.com/openhwgroup/cv32e40x) or [cv32e40px](https://github.com/esl-epfl/cv32e40px) RISCV-CPU, and we expose master and slave ports to/from the bus. -> `X-HEEP` currently uses the revision [`0.9.0`](https://github.com/openhwgroup/cv32e40x/commit/f17028f2369373d9443e4636f2826218e8d54e0f) of OpenHW Groups's `cv32e40x` core to implement the `CORE-V-XIF`. It is recommended to use the same revision in peripheral IPs to prevent conflicts during RTL compilation. +> We recommend using the `cv32e40px` for pairing with your CV-X-IF compliant coprocessor. If you choose to use the `cv32e40x`, `X-HEEP` currently uses the revision [`0.9.0`](https://github.com/openhwgroup/cv32e40x/commit/f17028f2369373d9443e4636f2826218e8d54e0f). It is recommended to use the same revision in peripheral IPs to prevent conflicts during RTL compilation. Here you can find a list of `X-HEEP` based open-source examples. If you want to include your project in this list, please open an issue with a link to your repository. * [CGRA-X-HEEP](https://github.com/esl-epfl/cgra_x_heep): A CGRA loosely coupled with X-HEEP. -* [F-HEEP](https://github.com/davidmallasen/F-HEEP): System integrating [fpu_ss](https://github.com/pulp-platform/fpu_ss) into X-HEEP via the eXtension interface and cv32e40x. +* [F-HEEP](https://github.com/davidmallasen/F-HEEP): System integrating [fpu_ss](https://github.com/pulp-platform/fpu_ss) into X-HEEP via the eXtension interface and cv32e40px. * [KALIPSO](https://github.com/vlsi-lab/ntt_intt_kyber) and [KRONOS](https://github.com/vlsi-lab/keccak_integration/tree/keccak_xheep): Loosely-coupled, post-quantum cryptography accelerators for NTT/INTT and Keccak hash function integrated into X-HEEP. @@ -98,7 +98,7 @@ To achieve this: * Create a new top-level repository (`BASE`) and vendorize (or add as git submodules) both your `CORE-V-XIF/OBI` compliant coprocessor/accelerator and `X-HEEP`. * Copy the `x-heep/hw/system/x_heep_system.sv` as your new top-level module. Then modify it as needed to include your co-processor and connect it to the `core_v_mini_mcu` with the `XIF`. The `XIF` SystemVerilog interface must be instantiated in the top-level module, where `X-HEEP` and your co-processor are connected. See the `X-HEEP` [testbench](./../../../tb/testharness.sv) as an example. -* Before building software remember to run `make mcu-gen CPU=cv32e40x`. +* Before building software remember to run `make mcu-gen CPU=cv32e40px`. To add this new top-level module to the simulation/synthesis flow you can extend the [FuseSoC](https://fusesoc.readthedocs.io/en/stable/user/index.html) support of `X-HEEP`. @@ -328,7 +328,7 @@ include $(XHEEP_MAKE) * The `verilator-sim` rule will override the `X-HEEP` Makefile's one. * Any other target will be passed straight to `X-HEEP`'s Makefile. For example ```sh -make mcu-gen CPU=cv32e40x +make mcu-gen CPU=cv32e40px ``` @@ -401,3 +401,10 @@ If you plan to store source files in a different location that the one proposed, make app PROJECT=your_app SOURCE= ``` Consider that inside this `sw` folder the same structure than the one proposed is required. + + +## Inter-process communication using Verilator's DPI + +The following [repository](https://github.com/specs-feup/x-heep) uses X-HEEP and the Verilator simulator to model a CPU-CGRA hybrid system. This architecture simulates the CPU integrated into the X-HEEP system, and an external Java process simulates the accelerator. Both components require a communication channel to exchange instructions and data. Using the existing infrastructure to to interact with an external OS process is not feasible at first sight, given that the X-HEEP ecosystem's pipeline encapsulates most of the simulation build and execution, with all modules supplied directly to Verilator. + +To circumvent this issue, this project uses [Direct Programming Interface (DPI)](https://verilator.org/guide/latest/connecting.html) calls (defined in `hw/ip_examples/cgraitf/cgraitfdpi.c`) to establish a connection and communicate with an external process through a Unix Domain Socket. This behavior mirrors the UART module (used as the skeleton code) that connects and outputs _printf_ information to the pseudo-terminal. These calls are embedded in a mock CGRA peripheral/interface, located in `hw/ip_examples/cgraitf/cgraitf.sv`. The module overrides reads and writes to the specified peripheral address, with the proper socket-based mechanism (_send_ or _recv_). The _simple_accelerator_ module could also be similarly customized to perform the same operations, using X-HEEP's interfaces and memory access protocols. A given user program executed in the CPU (such as `sw/applications/cgra_itf/main.c`) must then select assignments to or from the address to trigger the appropriate action. \ No newline at end of file diff --git a/docs/source/Peripherals/Timer.md b/docs/source/Peripherals/Timer.md new file mode 100644 index 000000000..052faf0de --- /dev/null +++ b/docs/source/Peripherals/Timer.md @@ -0,0 +1,107 @@ + +# Timer SDK + +This SDK provides utilities for execution time measurements using HW timers. It includes functions to start, stop, reset, and configure timers, as well as to enable timer interrupts and measure elapsed time. + +## Usage + +The SDK provides a set of functions to interact with the HW Timer for various timing operations. + +### Initialize Timer for Counting Cycles + +This function configures the counter at the running clock frequency to count the number of clock cycles. Call this function before any of the other timer SDK functions. + +```c +void timer_cycles_init(); +``` + +### Start Timer + +Start the HW timer. + +```c +void timer_start(); +``` + +### Get Current Timer Value + +Retrieve the current value of the HW timer without stopping it. + +```c +uint32_t timer_get_cycles(); +``` + +### Complete timer reset + +Completely resets the HW counter, disabling all IRQs, counters, and comparators. +```c +void timer_reset(); +``` + +### Stop and Reset Timer + +Retrieve the current value of the HW timer and stop it. + +```c +uint32_t timer_stop(); +``` + +### Set Timer Threshold + +Set the timer to go off once the counter value reaches the specified threshold. If the timer interrupts and the timer IRQ have been enabled, when the timer reaches that value an interrupt will be called. + +```c +void timer_arm_set(uint32_t threshold); +``` + +### Set Timer Threshold and Start + +Set the timer to go off once the counter value reaches the specified threshold, and start the timer. If the timer interrupts and the timer IRQ have been enabled, when the timer reaches that value an interrupt will be called. + +```c +void timer_arm_start(uint32_t threshold); +``` + +### Enable Timer IRQ + +Enable the timer interrupt request. + +```c +void timer_irq_enable(); +``` + +### Clear Timer IRQ + +Clear the timer interrupt request. + +```c +void timer_irq_clear(); +``` + +### Enable Timer Machine-level Interrupts + +Enable the timer machine-level interrupts for the X-Heep platform. + +```c +void enable_timer_interrupt(); +``` + +### Wait for Microseconds + +Block execution for a specified number of microseconds. This function is not precise for small numbers of microseconds. Enable timer interrupts with `enable_timer_interrupt()` before using this function. + +```c +void timer_wait_us(uint32_t ms); +``` + +### Get Execution Time in Microseconds + +Get the time taken to execute a certain number of cycles, returned as a float representing the time in microseconds. + +```c +float get_time_from_cycles(uint32_t cycles); +``` + +## Example Usage + +An example of utilization of the timer SDK can be found in `sw/applications/example_timer_sdk/main.c`. diff --git a/hw/core-v-mini-mcu/peripheral_subsystem.sv b/hw/core-v-mini-mcu/peripheral_subsystem.sv index fd7ac4c31..4fa663e66 100644 --- a/hw/core-v-mini-mcu/peripheral_subsystem.sv +++ b/hw/core-v-mini-mcu/peripheral_subsystem.sv @@ -452,7 +452,7 @@ module peripheral_subsystem .reg_req_t(reg_pkg::reg_req_t), .reg_rsp_t(reg_pkg::reg_rsp_t) ) i2s_i ( - .clk_i, + .clk_i(clk_cg), .rst_ni, .reg_req_i(peripheral_slv_req[core_v_mini_mcu_pkg::I2S_IDX]), .reg_rsp_o(peripheral_slv_rsp[core_v_mini_mcu_pkg::I2S_IDX]), diff --git a/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl b/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl index 12fc624fd..4025cd88a 100644 --- a/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl +++ b/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl @@ -546,7 +546,7 @@ module peripheral_subsystem .reg_req_t(reg_pkg::reg_req_t), .reg_rsp_t(reg_pkg::reg_rsp_t) ) pdm2pcm_i ( - .clk_i, + .clk_i(clk_cg), .rst_ni, .reg_req_i(peripheral_slv_req[core_v_mini_mcu_pkg::PDM2PCM_IDX]), .reg_rsp_o(peripheral_slv_rsp[core_v_mini_mcu_pkg::PDM2PCM_IDX]), @@ -569,7 +569,7 @@ module peripheral_subsystem .reg_req_t(reg_pkg::reg_req_t), .reg_rsp_t(reg_pkg::reg_rsp_t) ) i2s_i ( - .clk_i, + .clk_i(clk_cg), .rst_ni, .reg_req_i(peripheral_slv_req[core_v_mini_mcu_pkg::I2S_IDX]), .reg_rsp_o(peripheral_slv_rsp[core_v_mini_mcu_pkg::I2S_IDX]), diff --git a/hw/fpga/prim_xilinx_clk.sv b/hw/fpga/prim_xilinx_clk.sv index 35165b880..2893d666c 100644 --- a/hw/fpga/prim_xilinx_clk.sv +++ b/hw/fpga/prim_xilinx_clk.sv @@ -9,15 +9,12 @@ module xilinx_clk_gating ( output logic clk_o ); - logic clk_en; - - // Use a latch based clock gate instead of BUFGCE. Otherwise we quickly run out of BUFGCTRL cells on the FPGAs. - always_latch begin - if (clk_i == 1'b0) clk_en <= en_i | test_en_i; - end - - assign clk_o = clk_i & clk_en; - + // In Zynq7000, just bypass the clock gating because there are not enough BUFGs that can be + // cascaded with the BUFG of the MMCM. + // In the Zynq UltraScale+, it can be implemented as BUFGCE without trouble, since there + // are > 500 BUFGCEs and the rules for cascading are more relaxed. + // NOTE: This **cannot** be substituted by a latch+and + assign clk_o = clk_i; endmodule diff --git a/hw/vendor/esl_epfl_cv32e40px.lock.hjson b/hw/vendor/esl_epfl_cv32e40px.lock.hjson index 2871b8ed4..e307c5ae8 100644 --- a/hw/vendor/esl_epfl_cv32e40px.lock.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/esl-epfl/cv32e40px.git - rev: 15b9dd6077513342cf44e6853a5fc33098f2e73b + rev: 10b08065c50d44b5355c1535cb8f740e68e4f106 } } diff --git a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson index d0e289ae0..4fc67fa65 100644 --- a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/esl-epfl/cv32e40px.git", - rev: "15b9dd6077513342cf44e6853a5fc33098f2e73b", + rev: "10b08065c50d44b5355c1535cb8f740e68e4f106", }, exclude_from_upstream: [ diff --git a/hw/vendor/esl_epfl_cv32e40px/README.md b/hw/vendor/esl_epfl_cv32e40px/README.md index 0b23f8412..2272b6807 100644 --- a/hw/vendor/esl_epfl_cv32e40px/README.md +++ b/hw/vendor/esl_epfl_cv32e40px/README.md @@ -1,8 +1,8 @@ [![Build Status](https://travis-ci.com/pulp-platform/riscv.svg?branch=master)](https://travis-ci.com/pulp-platform/riscv) -# OpenHW Group CORE-V CV32E40P RISC-V IP +# OpenHW Group CORE-V CV32E40PX RISC-V IP -CV32E40P is a small and efficient, 32-bit, in-order RISC-V core with a 4-stage pipeline that implements +CV32E40PX is a small and efficient, 32-bit, in-order RISC-V core with a 4-stage pipeline that implements the RV32IM\[F|Zfinx\]C instruction set architecture, and the PULP custom extensions for achieving higher code density, performance, and energy efficiency \[[1](https://doi.org/10.1109/TVLSI.2017.2654506)\], \[[2](https://doi.org/10.1109/PATMOS.2017.8106976)\]. It started its life as a fork of the OR10N CPU core that is based on the OpenRISC ISA. @@ -14,12 +14,12 @@ when it has been contributed to [OpenHW Group](https://www.openhwgroup.org/). ## Documentation -The CV32E40P user manual can be found in the _docs_ folder and it is +The CV32E40PX user manual can be found in the _docs_ folder and it is captured in reStructuredText, rendered to html using [Sphinx](https://docs.readthedocs.io/en/stable/intro/getting-started-with-sphinx.html). These documents are viewable using readthedocs and can be viewed [here](https://docs.openhwgroup.org/projects/cv32e40p-user-manual/). ## Verification -The verification environment for the CV32E40P is _not_ in this Repository. There is a small, simple testbench here which is +The verification environment for the CV32E40PX is _not_ in this Repository. There is a small, simple testbench here which is useful for experimentation only and should not be used to validate any changes to the RTL prior to pushing to the master branch of this repo. @@ -31,7 +31,7 @@ The Makefiles supported in the **core-v-verif** project automatically clone the ## Changelog A changelog is generated automatically in the documentation from the individual pull requests. -In order to enable automatic changelog generation within the CV32E40P documentation, the committer is required to label each pull request +In order to enable automatic changelog generation within the CV32E40PX documentation, the committer is required to label each pull request that touches any file in 'rtl' (or any of its subdirectories) with *Component:RTL* and label each pull request that touches any file in 'docs' (or any of its subdirectories) with *Component:Doc*. Pull requests that are not labeled or labeled with *ignore-for-release* are ignored for the changelog generation. @@ -40,7 +40,7 @@ Only the person who actually performs the merge can add these labels (you need c 1 label is applied and therefore pull requests that touches both RTL and documentation files in the same pull request are not allowed. ## Constraints -Example synthesis constraints for the CV32E40P are provided. +Example synthesis constraints for the CV32E40PX are provided. ## Contributing @@ -71,7 +71,7 @@ Run `./util/format-verible` to format all the files. ## Issues and Troubleshooting -If you find any problems or issues with CV32E40P or the documentation, please check out the [issue +If you find any problems or issues with CV32E40PX or the documentation, please check out the [issue tracker](https://github.com/openhwgroup/cv32e40p/issues) and create a new issue if your problem is not yet tracked. diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv index e9807a381..19e210670 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv @@ -110,6 +110,10 @@ module cv32e40px_controller import cv32e40px_pkg::*; output logic apu_stall_o, + // X-IF signals + output logic x_branch_or_async_taken_o, + output logic x_control_illegal_reset_o, + // jump/branch signals input logic branch_taken_ex_i, // branch taken signal from EX ALU input logic [1:0] ctrl_transfer_insn_in_id_i, // jump is being calculated in ALU @@ -240,7 +244,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; logic debug_req_q; logic debug_req_pending; - // qualify wfi vs nosleep locally + // qualify wfi vs nosleep locally logic wfi_active; @@ -327,6 +331,9 @@ module cv32e40px_controller import cv32e40px_pkg::*; // ensures that the target is kept constant even if pc_id is no more HWLP_END hwlp_targ_addr_o = ((hwlp_start1_leq_pc && hwlp_end1_geq_pc) && !(hwlp_start0_leq_pc && hwlp_end0_geq_pc)) ? hwlp_start_addr_i[1] : hwlp_start_addr_i[0]; + x_branch_or_async_taken_o = 1'b0; + x_control_illegal_reset_o = 1'b0; + unique case (ctrl_fsm_cs) // We were just reset, wait for fetch_enable RESET: @@ -438,6 +445,8 @@ module cv32e40px_controller import cv32e40px_pkg::*; pc_mux_o = PC_BRANCH; pc_set_o = 1'b1; + x_branch_or_async_taken_o = 1'b1; + // if we want to debug, flush the pipeline // the current_pc_if will take the value of the next instruction to // be executed (NPC) @@ -496,6 +505,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; halt_id_o = 1'b1; ctrl_fsm_ns = DBG_FLUSH; debug_req_entry_n = 1'b1; + x_branch_or_async_taken_o = 1'b1; end else if (irq_req_ctrl_i && ~debug_mode_q) begin @@ -511,6 +521,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; exc_pc_mux_o = EXC_PC_IRQ; exc_cause_o = irq_id_ctrl_i; csr_irq_sec_o = irq_sec_ctrl_i; + x_branch_or_async_taken_o = 1'b1; // IRQ interface irq_ack_o = 1'b1; @@ -534,6 +545,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; halt_id_o = 1'b0; ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE; illegal_insn_n = 1'b1; + x_control_illegal_reset_o = 1'b1; end else begin @@ -679,6 +691,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; illegal_insn_i | ecall_insn_i: begin ctrl_fsm_ns = FLUSH_EX; + x_control_illegal_reset_o = illegal_insn_i; end (~ebrk_force_debug_mode & ebrk_insn_i): @@ -728,6 +741,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; halt_id_o = 1'b1; ctrl_fsm_ns = DBG_FLUSH; debug_req_entry_n = 1'b1; + x_branch_or_async_taken_o = 1'b1; end else if (irq_req_ctrl_i && ~debug_mode_q) begin @@ -743,6 +757,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; exc_pc_mux_o = EXC_PC_IRQ; exc_cause_o = irq_id_ctrl_i; csr_irq_sec_o = irq_sec_ctrl_i; + x_branch_or_async_taken_o = 1'b1; // IRQ interface irq_ack_o = 1'b1; @@ -768,6 +783,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; halt_id_o = 1'b1; ctrl_fsm_ns = FLUSH_EX; illegal_insn_n = 1'b1; + x_control_illegal_reset_o = 1'b1; end else begin @@ -865,6 +881,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; illegal_insn_i | ecall_insn_i: begin ctrl_fsm_ns = FLUSH_EX; + x_control_illegal_reset_o = illegal_insn_i; end (~ebrk_force_debug_mode & ebrk_insn_i): @@ -1207,7 +1224,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; exc_pc_mux_o = EXC_PC_DBD; csr_save_cause_o = 1'b1; debug_csr_save_o = 1'b1; - if (debug_force_wakeup_q) + if (debug_force_wakeup_q) debug_cause_o = DBG_CAUSE_HALTREQ; else if (debug_single_step_i) debug_cause_o = DBG_CAUSE_STEP; // pri 0 @@ -1479,7 +1496,7 @@ endgenerate assign debug_wfi_no_sleep_o = debug_mode_q || debug_req_pending || debug_single_step_i || trigger_match_i || COREV_CLUSTER; - // Gate off wfi + // Gate off wfi assign wfi_active = wfi_i & ~debug_wfi_no_sleep_o; // sticky version of debug_req (must be on clk_ungated_i such that incoming pulse before core is enabled is not missed) @@ -1600,7 +1617,7 @@ endgenerate // Ensure DBG_TAKEN_IF can only be enterred if in single step mode or woken // up from sleep by debug_req_i - + a_single_step_dbg_taken_if : assert property (@(posedge clk) disable iff (!rst_n) (ctrl_fsm_ns==DBG_TAKEN_IF) |-> ((~debug_mode_q && debug_single_step_i) || debug_force_wakeup_n)); // Ensure DBG_FLUSH state is only one cycle. This implies that cause is either trigger, debug_req_entry, or ebreak diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv index 9f1f668b5..883b5ff1c 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv @@ -436,6 +436,8 @@ module cv32e40px_id_stage // X-Interface logic illegal_insn; logic x_illegal_insn; + logic x_branch_or_async_taken; + logic x_control_illegal_reset; logic [4:0] waddr_id; logic [4:0] waddr_ex; logic [4:0] waddr_wb; @@ -1124,7 +1126,7 @@ module cv32e40px_id_stage .mem_instr_waddr_ex_i(regfile_waddr_ex_o[4:0]), .mem_instr_we_ex_i (regfile_we_ex_o), .regs_used_i (regs_used), - .branch_or_jump_i (pc_set_o), + .branch_or_jump_i (x_branch_or_async_taken), .instr_valid_i (instr_valid_i), .x_rs_addr_i (x_rs_addr), .x_ex_fwd_o (x_ex_fwd), @@ -1136,14 +1138,15 @@ module cv32e40px_id_stage .wb_ready_i (wb_ready_i), // additional status signals - .x_stall_o (x_stall), - .x_illegal_insn_o (x_illegal_insn), - .x_illegal_insn_dec_i(illegal_insn_dec), - .id_ready_i (id_ready_o), - .ex_valid_i (ex_valid_i), - .ex_ready_i (ex_ready_i), - .current_priv_lvl_i (current_priv_lvl_i), - .data_req_dec_i (data_req_id) + .x_stall_o (x_stall), + .x_illegal_insn_o (x_illegal_insn), + .x_illegal_insn_dec_i (illegal_insn_dec), + .x_control_illegal_reset_i(x_control_illegal_reset), + .id_ready_i (id_ready_o), + .ex_valid_i (ex_valid_i), + .ex_ready_i (ex_ready_i), + .current_priv_lvl_i (current_priv_lvl_i), + .data_req_dec_i (data_req_id) ); @@ -1445,6 +1448,8 @@ module cv32e40px_id_stage .apu_write_dep_i (apu_write_dep_i), .apu_stall_o(apu_stall), + .x_branch_or_async_taken_o(x_branch_or_async_taken), + .x_control_illegal_reset_o(x_control_illegal_reset), // jump/branch control .branch_taken_ex_i (branch_taken_ex), diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv index dc8907979..f326b3f90 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv @@ -90,6 +90,7 @@ module cv32e40px_x_disp output logic x_stall_o, output logic x_illegal_insn_o, input logic x_illegal_insn_dec_i, + input logic x_control_illegal_reset_i, input logic id_ready_i, input logic ex_valid_i, input logic ex_ready_i, @@ -108,6 +109,7 @@ module cv32e40px_x_disp logic x_if_memory_instr; logic illegal_forwarding_prevention; logic x_issue_illegal; + logic x_illegal_insn_q, x_illegal_insn_n; // issue interface assign x_issue_valid_o = x_illegal_insn_dec_i & ~branch_or_jump_i & ~instr_offloaded_q & instr_valid_i & ~illegal_forwarding_prevention; @@ -182,7 +184,7 @@ module cv32e40px_x_disp assign x_wb_fwd_o[3] = (x_rs_addr_i[0] | 5'b00001) == waddr_wb_i & we_wb_i & ex_valid_i & x_issue_resp_dualread_i[0]; assign x_wb_fwd_o[4] = (x_rs_addr_i[1] | 5'b00001) == waddr_wb_i & we_wb_i & ex_valid_i & x_issue_resp_dualread_i[1]; assign x_wb_fwd_o[5] = (x_rs_addr_i[2] | 5'b00001) == waddr_wb_i & we_wb_i & ex_valid_i & x_issue_resp_dualread_i[2]; - assign dep = ~x_illegal_insn_o & ((regs_used_i[0] & scoreboard_q[x_rs_addr_i[0]] & (x_result_rd_i != x_rs_addr_i[0])) + assign dep = ~x_illegal_insn_n & ((regs_used_i[0] & scoreboard_q[x_rs_addr_i[0]] & (x_result_rd_i != x_rs_addr_i[0])) | (regs_used_i[1] & scoreboard_q[x_rs_addr_i[1]] & (x_result_rd_i != x_rs_addr_i[1])) | (regs_used_i[2] & scoreboard_q[x_rs_addr_i[2]] & (x_result_rd_i != x_rs_addr_i[2])) | (((regs_used_i[0] & x_issue_resp_dualread_i[0]) & scoreboard_q[x_rs_addr_i[0] | 5'b00001] & (x_result_rd_i != (x_rs_addr_i[0] | 5'b00001))) & x_issue_resp_dualread_i[0]) @@ -195,7 +197,7 @@ module cv32e40px_x_disp assign x_wb_fwd_o[0] = x_rs_addr_i[0] == waddr_wb_i & we_wb_i & ex_valid_i; assign x_wb_fwd_o[1] = x_rs_addr_i[1] == waddr_wb_i & we_wb_i & ex_valid_i; assign x_wb_fwd_o[2] = x_rs_addr_i[2] == waddr_wb_i & we_wb_i & ex_valid_i; - assign dep = ~x_illegal_insn_o & ((regs_used_i[0] & scoreboard_q[x_rs_addr_i[0]] & (x_result_rd_i != x_rs_addr_i[0])) + assign dep = ~x_illegal_insn_n & ((regs_used_i[0] & scoreboard_q[x_rs_addr_i[0]] & (x_result_rd_i != x_rs_addr_i[0])) | (regs_used_i[1] & scoreboard_q[x_rs_addr_i[1]] & (x_result_rd_i != x_rs_addr_i[1])) | (regs_used_i[2] & scoreboard_q[x_rs_addr_i[2]] & (x_result_rd_i != x_rs_addr_i[2]))); end @@ -247,11 +249,12 @@ module cv32e40px_x_disp // illegal instruction assignment assign x_issue_illegal = x_illegal_insn_dec_i & ~instr_offloaded_q & instr_valid_i; always_comb begin - x_illegal_insn_o = 1'b0; + x_illegal_insn_n = 1'b0; if (x_issue_illegal & x_issue_ready_i & ~x_issue_resp_accept_i) begin - x_illegal_insn_o = 1'b1; + x_illegal_insn_n = 1'b1; end end + assign x_illegal_insn_o = x_illegal_insn_q; // scoreboard and status signal register always_ff @(posedge clk_i or negedge rst_ni) begin @@ -260,11 +263,17 @@ module cv32e40px_x_disp instr_offloaded_q <= 1'b0; id_q <= '0; mem_counter_q <= '0; + x_illegal_insn_q <= 1'b0; end else begin scoreboard_q <= scoreboard_d; instr_offloaded_q <= instr_offloaded_d; id_q <= id_d; mem_counter_q <= mem_counter_d; + if (x_control_illegal_reset_i) begin + x_illegal_insn_q <= 1'b0; + end else begin + x_illegal_insn_q <= x_illegal_insn_n; + end end end diff --git a/pad_cfg.hjson b/pad_cfg.hjson index d681e63c0..3cb3312d9 100644 --- a/pad_cfg.hjson +++ b/pad_cfg.hjson @@ -8,7 +8,7 @@ // The pads contains the list of all the pads available in the design. // Each pad is defined by its name and can have the following attributes: // num: (mandatory) - the number of pads of this type -// type: (mandatory) - the type of the pad +// type: (mandatory) - the type of the pad // num_offset: (optional) - the offset to the first pad of this type (default 0) // mapping: (optional) - the mapping of the pad in the design. Useful for ASICs (default top) // active: (optional) - the active level of the pad (default high) @@ -16,6 +16,12 @@ // mux: (optional) - the muxing options for the pad // skip_declaration: (optional) - skip the declaration of the pad in the top level (default False) // keep_internal: (optional) - keep the pad internal to the design (default False) +// layout_attributes: (optional) - collection of attributes related to the physical (ASIC) layout of the pads +// index: (mandatory) index of the pad on its side of the I/O ring +// orient: (optional) - orientation of the pad +// cell: (mandatory for type "supply") - specific cell to use if not a default pad cell (ex. for VDD/VSS pads) +// offset: (optional) - offset from edge (in um) +// skip: (optional) - distance from neighboring pad (in um) // // Add this field at the same level of pads (not inside) if you want to define PADs attributes // attributes: { diff --git a/sw/CMakeLists.txt b/sw/CMakeLists.txt index 549a2a064..cdb2a8f76 100644 --- a/sw/CMakeLists.txt +++ b/sw/CMakeLists.txt @@ -175,7 +175,7 @@ if( app_found EQUAL 0 ) if(NOT ${file_path} MATCHES ".*/crt/.*") SET(add 1) endif() - elseif( ( ${file_path} MATCHES "${ROOT_PROJECT}/applications/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES "${ROOT_PROJECT}applications/${PROJECT}/.*${MAINFILE}\." ) AND ( NOT ${file_path} MATCHES "exclude" ) ) + elseif( ( ${file_path} MATCHES "${ROOT_PROJECT}applications/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES "${ROOT_PROJECT}applications/${PROJECT}/.*${MAINFILE}\." ) AND ( NOT ${file_path} MATCHES "exclude" ) ) SET(add 1) endif() diff --git a/sw/applications/example_data_processing_from_flash/gen_stimuly.py b/sw/applications/example_data_processing_from_flash/gen_stimuly.py new file mode 100644 index 000000000..5a4a6cd23 --- /dev/null +++ b/sw/applications/example_data_processing_from_flash/gen_stimuly.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +## Copyright 2024 EPFL +## Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +# type " python gen_stimuly.py " in the terminal to generate the matrices.h file + +import sys +import random +import numpy as np + +def write_arr(f, name, arr, ctype, size): + f.write("const " + ctype + " " + name + "[] = {\n") + + for row in arr: + for elem in row[:-1]: + f.write('%d,' % (elem)) + f.write('%d,\n' % (row[-1])) + + f.write('};\n\n') + return + +def write_arr_flash_only(f, name, arr, ctype, size): + f.write( ctype + " __attribute__((section(\".xheep_data_flash_only\"))) __attribute__ ((aligned (16)))" + name + "[] = {\n") + + for row in arr: + for elem in row[:-1]: + f.write('%d,' % (elem)) + f.write('%d,\n' % (row[-1])) + + f.write('};\n\n') + return + + +################################################################################ +f = open('matrices.h', 'w') +f.write('#ifndef MATRICES_H_\n') +f.write('#define MATRICES_H_\n') +f.write('// This file is automatically generated\n') + + +SIZE = 64 +RANGE = 10 + +m_a = [] +m_b = [] +m_exp = [] + +# Generate random 8 bit integers from -RANGE to RANGE for A and B +A = np.random.randint(0, RANGE, size=(SIZE, SIZE), dtype=np.int32) +B = np.random.randint(0, RANGE, size=(SIZE, SIZE), dtype=np.int32) +C = np.zeros((SIZE, SIZE), dtype=np.int32) + +# Test the function with A and B +C = np.matmul(A,B) + +write_arr_flash_only(f, 'A', A, 'int32_t', SIZE) +write_arr(f, 'B', B, 'int32_t', SIZE) +write_arr(f, 'C', C, 'int32_t', SIZE) + +f.write('#define MATRIX_SIZE %d\n' % SIZE) + + +f.write('#endif') diff --git a/sw/applications/example_data_processing_from_flash/main.c b/sw/applications/example_data_processing_from_flash/main.c new file mode 100644 index 000000000..01f7dab7d --- /dev/null +++ b/sw/applications/example_data_processing_from_flash/main.c @@ -0,0 +1,87 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// File: sw/applications/example_data_processing_from_flash/main.c +// Author: Francesco Poluzzi +// Date: 29/07/2024 + +/** + * @file main.c + * @brief Example of data processing (matrix multiplication) reading data from flash memory + * + * Simple example that read a matrix from flash memory in many step and performs + * matrix multiplication. This is useful for applications where the + * data size does not fit in the available SRAM memory, so some data needs to be + * stored as "flash_only" and read trough the spi interface. This usually requires + * filling a buffer and tiling the data processing. +*/ + +#include +#include +#include + +#include "x-heep.h" +#include "w25q128jw.h" +#include "main.h" +#include "dma_sdk.h" + +#define TILING_ROWS 2 + + /* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +int32_t buffer_data[MATRIX_SIZE*TILING_ROWS] = {0}; +int32_t output_matrix[MATRIX_SIZE*MATRIX_SIZE] = {0}; + +int main(int argc, char *argv[]) { + + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + + #ifdef TARGET_SIM + PRINTF("This application is meant to run on FPGA only\n"); + return EXIT_SUCCESS; + #endif + + if ( get_spi_flash_mode(&soc_ctrl) == SOC_CTRL_SPI_FLASH_MODE_SPIMEMIO ) { + PRINTF("This application cannot work with the memory mapped SPI FLASH" + "module - do not use the FLASH_EXEC linker script for this application\n"); + return EXIT_SUCCESS; + } + + // Pick the correct spi device based on simulation type + spi_host_t* spi = spi_flash; + + // Init SPI host and SPI<->Flash bridge parameters + if (w25q128jw_init(spi) != FLASH_OK){ + PRINTF("Error initializing SPI flash\n"); + return EXIT_FAILURE; + } + + for (int i = 0; i < MATRIX_SIZE; i+=TILING_ROWS) { + // read first half matrix A from flash and perform matmul + if(fill_buffer(&A[i*MATRIX_SIZE], buffer_data, MATRIX_SIZE*TILING_ROWS)!=FLASH_OK){ + PRINTF("Error reading from flash\n"); + return EXIT_FAILURE; + } + matmul(buffer_data, B, &output_matrix[i*MATRIX_SIZE], TILING_ROWS, MATRIX_SIZE, MATRIX_SIZE); + } + + for(int i = 0; i < MATRIX_SIZE*MATRIX_SIZE; i++){ + if (output_matrix[i] != C[i]){ + PRINTF("Result[%d][%d]:golden model %d : %d\n", (i/MATRIX_SIZE), (i % MATRIX_SIZE), output_matrix[i], C[i]); + return EXIT_FAILURE; + } + } + PRINTF("All tests passed!\n"); + return EXIT_SUCCESS; +} diff --git a/sw/applications/example_data_processing_from_flash/main.h b/sw/applications/example_data_processing_from_flash/main.h new file mode 100644 index 000000000..0e588e78b --- /dev/null +++ b/sw/applications/example_data_processing_from_flash/main.h @@ -0,0 +1,42 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// File: sw/applications/example_data_processing_from_flash/main.h +// Author: Francesco Poluzzi +// Date: 29/07/2024 + +#ifndef MAIN_H_ +#define MAIN_H_ + +#include +#include +#include + +#include "x-heep.h" +#include "w25q128jw.h" +#include "matrices.h" + +w25q_error_codes_t fill_buffer(uint32_t *source, uint32_t *buffer, uint32_t len); +void matmul(int32_t *A, int32_t *B, int32_t *res, int rowsA, int colsA, int colsB); + +w25q_error_codes_t fill_buffer(uint32_t *source, uint32_t *buffer, uint32_t len){ + uint32_t *source_flash = heep_get_flash_address_offset(source); + w25q_error_codes_t status = w25q128jw_read_standard(source_flash, buffer, len*4); + return status; +} + +void matmul(int32_t *A, int32_t *B, int32_t *res, int rowsA, int colsA, int colsB) { + for (int i = 0; i < rowsA; i++) { + for (int j = 0; j < colsB; j++) { + int32_t sum = 0; + for (int k = 0; k < colsA; k++) { + sum += A[i*colsA + k] * B[k*colsB + j]; + } + res[i*colsB + j] = sum; + } + } +} + + +#endif // DATA_H_ diff --git a/sw/applications/example_data_processing_from_flash/matrices.h b/sw/applications/example_data_processing_from_flash/matrices.h new file mode 100644 index 000000000..323d1a06b --- /dev/null +++ b/sw/applications/example_data_processing_from_flash/matrices.h @@ -0,0 +1,206 @@ +#ifndef MATRICES_H_ +#define MATRICES_H_ +// This file is automatically generated +int32_t __attribute__((section(".xheep_data_flash_only"))) __attribute__ ((aligned (16)))A[] = { +8,0,9,3,0,0,7,3,4,4,4,0,5,1,9,3,9,1,0,4,1,1,9,4,6,9,8,1,3,3,2,6,5,6,8,7,4,3,3,1,1,9,8,9,6,3,1,6,4,3,6,1,9,0,6,7,5,0,6,5,1,5,6,2, +3,1,8,2,9,8,1,3,0,2,4,6,6,5,7,1,5,5,5,8,6,5,4,9,2,6,7,1,7,2,5,0,9,9,7,6,2,1,6,5,4,9,5,3,1,5,6,9,3,4,4,1,1,7,2,5,8,0,6,4,3,1,8,2, +3,8,8,0,7,5,1,9,1,2,2,2,0,6,4,6,2,4,7,6,0,9,9,8,5,1,9,5,9,8,2,2,7,7,7,5,9,0,3,4,5,3,9,0,2,2,0,7,2,3,1,1,5,5,3,6,4,4,3,7,5,6,0,4, +9,4,4,2,0,4,3,5,7,5,7,5,3,6,5,4,5,7,4,2,4,0,2,8,6,8,4,0,1,3,8,5,4,8,5,1,6,0,7,6,3,1,1,6,2,5,4,9,4,9,5,1,3,7,0,1,0,2,6,3,8,0,1,6, +3,7,8,9,5,1,7,5,5,8,9,7,2,1,2,1,0,3,5,5,3,0,3,0,8,3,3,5,6,3,7,2,2,6,6,1,7,1,7,6,7,1,7,2,0,6,5,1,7,6,6,4,6,3,7,2,1,0,5,9,1,1,1,1, +4,5,6,7,5,3,5,0,2,9,1,7,1,5,6,6,8,8,1,4,5,8,5,2,5,8,8,5,9,5,2,5,6,4,1,2,9,7,1,1,0,6,9,1,2,2,6,3,7,6,6,1,2,0,1,1,8,9,7,0,4,9,0,3, +5,9,0,0,6,3,8,2,4,5,2,9,1,4,4,8,9,3,5,7,1,6,8,8,7,9,3,7,7,4,8,4,9,6,3,1,5,0,6,7,9,6,9,6,4,9,0,6,0,2,1,4,9,4,7,6,0,1,8,2,8,4,2,9, +5,0,0,9,3,7,3,3,8,4,2,0,1,9,5,5,0,0,4,9,8,7,1,3,3,4,3,0,0,3,3,0,2,5,1,8,6,0,2,0,4,2,3,2,9,5,5,2,8,9,4,2,9,7,7,8,8,3,8,9,3,9,1,9, +6,8,4,2,5,3,3,5,1,0,0,8,1,3,9,9,5,5,9,0,5,9,8,2,4,1,4,3,0,6,7,9,9,2,1,8,1,5,1,9,6,3,9,5,0,1,3,6,4,7,5,0,4,6,3,1,4,9,2,1,8,2,8,5, +9,8,5,1,8,0,0,0,1,5,2,6,2,3,2,6,6,7,3,6,6,3,5,9,8,8,1,7,4,4,1,2,0,2,4,3,0,0,5,5,3,8,6,1,4,6,4,8,5,9,7,9,8,1,0,9,8,1,9,6,4,4,0,2, +1,6,0,4,7,8,1,6,8,8,9,4,7,8,1,3,5,4,1,2,8,3,6,4,2,0,1,9,3,9,6,9,2,2,3,3,2,7,1,0,2,8,9,9,9,8,7,4,2,9,4,8,2,0,9,0,3,2,5,6,4,9,3,1, +0,9,9,8,5,4,5,4,1,9,3,3,0,1,6,3,8,6,6,6,3,8,5,7,8,9,4,1,8,2,6,0,5,3,6,7,1,9,8,7,9,0,5,5,3,7,0,1,2,9,4,3,8,3,2,6,1,0,5,5,6,0,4,8, +8,6,2,6,1,4,4,2,0,3,9,8,9,3,1,8,2,9,5,9,9,4,6,0,9,9,9,4,8,7,7,4,4,1,7,9,9,7,3,2,1,6,5,6,5,4,7,9,0,1,0,5,6,9,1,5,1,1,7,5,8,6,7,6, +7,6,5,8,6,2,3,1,2,8,3,9,5,9,3,9,8,8,2,4,7,0,7,4,1,1,9,8,9,7,4,9,3,4,8,3,6,1,1,7,4,9,7,6,8,3,9,1,2,8,2,1,1,6,0,1,2,1,2,2,2,1,0,1, +3,2,7,7,9,2,1,4,0,0,4,4,4,1,4,3,2,1,1,6,4,0,4,6,2,4,9,1,1,3,6,9,0,0,0,0,4,4,6,7,5,9,0,4,1,7,7,0,5,9,4,5,4,1,9,1,3,0,8,4,9,1,8,3, +9,9,3,9,1,9,4,5,3,6,0,5,8,4,3,9,0,2,7,6,1,9,7,4,0,0,9,4,1,6,3,6,7,9,3,9,4,6,6,1,1,7,0,8,0,3,1,0,5,9,3,9,4,7,7,1,1,7,7,1,1,0,2,1, +0,8,7,7,8,0,9,0,4,3,9,1,3,8,3,8,4,2,1,2,2,9,0,2,4,6,1,3,1,8,9,0,1,7,9,0,1,7,2,3,7,7,7,0,0,1,4,4,8,4,8,7,8,9,9,8,9,3,1,6,5,4,5,2, +6,1,9,3,1,9,0,6,0,0,1,5,3,0,0,4,7,7,2,4,8,8,9,3,8,8,9,4,5,3,8,4,1,2,7,5,4,4,9,3,7,5,2,9,4,5,0,3,2,3,7,2,2,8,7,1,4,4,9,2,1,7,7,6, +6,3,0,2,7,7,2,8,8,7,1,6,8,1,5,6,7,7,7,4,9,8,4,8,7,6,0,8,0,9,8,0,8,8,1,4,7,8,7,3,3,9,8,6,5,7,8,9,0,8,6,6,7,4,8,3,6,0,2,8,3,9,3,1, +5,9,0,5,3,9,5,4,6,8,4,9,9,6,8,2,5,8,0,1,3,8,5,9,8,3,1,4,9,0,9,8,1,6,6,9,9,5,0,4,9,4,2,5,3,0,2,9,8,1,6,3,7,3,8,2,9,9,9,6,0,7,4,5, +7,0,7,7,1,7,7,3,2,5,4,5,7,5,7,2,8,7,2,0,2,8,3,9,7,5,1,0,5,8,7,2,1,9,2,3,3,7,7,1,4,3,2,2,8,3,0,7,4,6,1,5,0,3,6,5,0,9,4,2,3,1,5,0, +9,4,2,7,3,2,5,8,5,1,7,2,1,9,1,6,7,3,7,0,2,7,9,4,3,1,9,6,1,3,2,5,9,9,1,9,0,5,4,2,3,3,3,3,1,7,2,2,9,9,8,4,1,1,1,1,6,6,5,4,0,3,1,2, +1,9,3,6,6,3,4,9,3,4,2,2,4,3,5,4,0,6,5,1,1,1,2,7,9,4,0,0,3,7,1,8,9,0,7,7,1,3,7,7,5,7,9,7,0,0,8,1,9,0,3,9,3,9,0,8,4,5,4,5,6,0,9,4, +5,6,4,1,1,8,9,4,3,2,5,9,5,4,5,6,9,4,1,7,1,3,0,0,5,9,5,2,3,2,5,5,7,2,2,9,3,1,2,8,5,0,1,0,8,9,2,9,3,4,6,9,5,4,7,4,7,9,6,1,0,8,2,8, +5,3,4,4,8,1,5,5,2,4,2,1,1,5,6,3,2,4,9,1,1,5,3,1,3,3,8,0,4,8,8,4,7,0,1,1,6,2,1,0,8,2,0,4,4,4,5,4,0,7,7,8,5,5,0,0,0,1,8,8,7,4,7,9, +1,7,2,6,1,6,7,6,2,9,6,2,5,7,7,8,1,5,7,3,3,4,9,8,7,6,5,5,2,8,7,8,7,1,6,0,5,2,2,4,2,3,3,2,0,0,3,4,5,2,2,9,2,9,7,7,9,5,9,0,1,5,3,0, +7,4,1,9,4,5,0,6,7,4,1,2,7,4,0,8,8,3,7,0,5,3,2,2,4,2,7,0,0,5,0,4,6,9,6,2,2,4,5,3,4,4,2,8,7,3,9,4,5,8,5,2,9,4,1,3,2,9,2,2,4,3,9,8, +6,7,8,8,8,8,9,1,1,5,0,5,5,4,2,0,3,0,4,2,1,0,2,6,2,8,1,4,1,6,8,0,0,5,1,3,8,0,2,3,6,6,3,3,4,8,9,2,8,3,1,3,7,2,1,4,9,9,4,4,3,1,2,1, +0,9,9,6,9,3,9,7,5,3,9,3,0,2,9,6,2,2,5,6,8,2,9,6,8,3,6,7,4,4,3,8,6,1,2,6,6,0,9,0,0,9,3,9,9,8,4,8,6,8,0,6,1,3,0,8,4,3,5,3,0,6,2,2, +1,7,1,4,3,5,6,3,7,2,3,1,6,3,5,2,9,3,7,0,6,9,6,5,3,9,6,0,2,3,1,0,6,2,5,8,2,3,0,0,0,4,3,9,4,6,3,5,2,0,3,8,8,2,8,2,5,6,9,7,3,2,9,9, +5,5,4,0,2,4,0,4,5,2,7,6,6,8,5,3,5,5,2,7,3,8,7,5,5,3,6,0,8,6,3,1,7,4,7,1,0,4,3,0,7,9,5,0,6,0,7,3,3,8,2,5,1,4,4,1,6,4,2,5,5,6,8,0, +8,5,9,6,7,9,8,2,9,1,5,1,0,5,9,6,1,9,8,7,6,9,1,0,1,2,8,4,7,4,5,6,9,1,5,8,7,2,4,3,8,3,8,9,0,6,3,1,6,5,7,0,3,1,3,8,5,7,0,8,8,2,7,3, +7,2,8,3,9,8,9,1,6,5,0,4,5,7,3,9,9,0,9,9,2,6,4,3,3,3,3,9,5,5,7,4,2,4,7,2,3,5,5,9,8,6,3,1,5,7,6,8,9,4,1,8,8,5,4,9,0,7,5,4,5,3,6,9, +0,4,8,3,0,9,2,7,1,2,8,5,6,2,8,5,2,0,4,8,2,5,8,2,0,5,7,8,8,0,8,9,3,7,9,5,4,9,4,2,6,9,7,1,5,4,6,3,2,6,3,9,8,4,1,0,1,6,2,0,5,3,7,6, +6,4,6,1,0,7,3,4,6,2,7,1,3,9,0,5,5,4,7,0,0,3,4,8,2,6,6,9,9,0,2,6,5,9,9,7,7,0,6,2,2,7,8,3,8,6,0,4,7,7,0,9,2,2,1,5,1,8,2,3,7,7,1,9, +0,4,2,8,9,7,3,4,1,5,3,3,0,6,4,1,1,8,1,4,9,6,4,0,8,0,2,7,2,2,1,2,8,3,2,3,7,5,9,1,7,8,5,5,6,2,0,0,1,4,3,5,2,8,4,2,7,7,0,5,7,3,4,0, +1,7,4,5,1,3,9,1,8,2,3,7,0,2,9,3,5,3,3,9,2,6,0,5,7,4,9,7,9,6,9,6,4,1,5,3,7,3,0,2,3,9,7,7,8,2,5,8,4,0,8,3,3,0,4,3,0,5,7,3,7,3,9,7, +4,5,5,0,8,1,5,7,8,1,2,5,8,3,0,6,2,9,6,4,5,5,6,6,0,3,8,0,2,0,9,9,1,7,9,4,8,8,7,5,8,1,5,0,0,4,7,1,2,0,2,0,6,7,8,0,1,3,4,0,8,6,9,5, +6,6,9,6,5,1,2,7,1,6,5,5,5,7,2,8,3,5,6,0,8,7,6,9,3,8,8,8,5,3,4,8,5,6,9,2,7,5,7,3,1,7,2,2,9,6,1,9,2,4,4,3,9,2,5,1,6,7,7,2,9,8,6,0, +9,4,4,3,0,3,4,7,1,0,8,4,8,3,1,4,2,5,9,0,0,1,8,5,1,9,3,0,8,9,9,7,4,2,0,3,7,5,8,8,9,1,6,8,3,2,1,6,7,1,6,9,5,9,9,3,5,2,3,4,7,2,1,1, +4,8,0,1,3,6,6,2,5,2,7,2,5,5,7,0,6,4,6,4,8,0,6,4,0,3,6,7,9,7,4,6,8,9,8,7,8,5,0,5,4,5,4,2,8,6,7,8,2,2,0,8,6,4,3,9,7,1,2,3,5,2,5,2, +8,7,0,9,4,4,0,7,5,9,1,8,7,6,9,3,5,6,8,7,8,8,6,9,4,8,6,1,2,8,5,1,1,3,3,3,6,2,8,0,7,3,1,3,2,4,1,1,3,5,6,1,0,9,7,7,7,9,3,4,3,3,6,7, +3,2,8,4,1,0,4,9,4,2,9,5,7,0,9,0,7,8,1,1,8,9,9,3,4,0,7,9,1,4,7,7,9,6,1,8,1,4,1,2,1,6,8,5,8,9,5,1,2,4,3,2,4,6,9,0,6,7,0,8,5,5,2,4, +2,8,7,9,4,3,4,7,0,1,6,9,2,4,9,0,4,2,4,7,6,2,7,7,6,1,5,7,0,2,4,8,4,9,0,1,4,0,1,5,4,9,1,6,8,6,7,9,6,7,6,5,2,0,1,7,1,0,8,0,1,2,5,2, +0,6,8,8,3,3,5,7,1,1,7,5,2,6,2,0,4,2,7,0,7,9,6,9,1,8,4,3,5,7,1,9,3,3,0,5,6,2,0,5,1,6,8,0,1,2,0,6,7,7,9,6,5,6,2,6,9,4,2,7,4,9,6,8, +3,3,9,1,5,2,9,1,4,4,8,9,3,8,1,4,7,9,0,2,4,8,4,5,6,0,7,4,8,9,6,9,9,3,4,9,6,2,2,6,4,0,5,2,5,3,2,3,4,1,3,4,1,9,5,4,9,3,0,3,4,1,2,8, +4,2,0,9,7,5,7,2,9,0,8,6,0,1,5,6,8,3,4,7,5,9,2,0,8,7,6,6,7,1,3,5,3,7,6,2,1,8,2,0,4,1,6,9,3,0,5,7,1,1,6,8,3,4,1,2,7,6,5,6,6,1,1,2, +6,3,5,8,4,9,2,0,9,7,5,4,3,5,7,0,5,7,7,4,4,0,5,3,4,6,0,0,2,2,7,7,0,5,7,2,6,5,3,5,9,9,4,1,6,7,7,3,4,8,6,6,1,1,7,1,3,4,4,4,4,2,1,1, +7,1,2,0,5,9,2,1,3,9,3,7,4,9,5,1,6,5,7,2,0,5,2,4,8,0,5,2,6,6,7,9,8,7,7,8,5,2,4,6,9,3,4,8,8,1,3,4,5,2,2,7,3,9,2,8,8,8,8,8,1,4,1,4, +8,6,3,2,5,1,1,0,9,9,9,8,3,9,5,4,6,2,0,0,4,6,7,1,0,0,0,2,5,2,5,4,9,1,1,5,6,7,6,0,6,4,3,6,5,2,8,7,3,4,0,7,5,4,6,8,8,9,9,6,2,3,2,9, +0,4,9,6,2,5,0,6,0,6,2,0,5,9,7,9,9,7,9,7,3,0,9,7,9,2,5,0,7,5,7,6,7,7,9,9,1,7,1,6,1,3,9,3,6,4,4,9,8,9,6,4,8,5,8,1,5,9,2,5,4,7,2,7, +1,1,3,6,0,1,9,3,5,0,5,0,0,3,0,3,7,7,5,7,5,8,0,4,9,8,8,2,3,7,1,9,8,3,2,1,4,7,8,4,3,7,6,0,2,7,6,2,4,6,2,4,9,2,5,0,0,0,7,5,3,5,7,1, +7,5,9,1,5,8,8,6,5,1,8,4,3,2,1,9,7,8,1,2,3,5,6,0,6,1,0,3,7,9,1,3,0,2,0,5,4,7,3,9,3,4,6,6,6,8,2,0,4,8,3,1,1,2,0,6,9,9,9,8,3,4,7,7, +4,9,4,5,0,9,6,5,5,2,5,5,5,5,1,9,4,9,8,1,5,2,9,8,1,9,9,0,0,7,4,8,7,1,8,8,7,3,3,2,2,4,5,0,1,2,3,6,9,0,0,2,1,5,5,7,7,0,7,6,5,5,7,6, +9,1,9,3,8,2,2,9,7,9,4,9,5,2,6,5,5,3,7,2,8,8,7,7,9,2,1,4,0,2,2,8,8,5,9,9,0,5,2,1,5,5,3,3,7,0,7,3,5,9,8,2,6,0,3,7,8,8,4,6,6,3,3,3, +0,8,6,6,5,9,4,8,7,8,4,8,3,8,7,9,4,4,5,5,6,0,9,4,9,2,8,7,0,1,2,5,3,6,1,9,2,7,4,6,4,7,9,5,6,0,4,1,1,2,6,5,2,1,1,9,6,2,0,8,0,4,7,5, +8,5,6,2,3,3,9,4,4,1,9,2,7,0,6,8,1,5,6,7,3,9,9,4,5,6,9,6,7,9,0,2,8,5,1,3,9,2,8,3,5,3,9,2,5,6,3,7,0,2,4,8,2,8,4,2,2,1,8,9,2,1,7,4, +3,8,3,6,2,1,1,6,1,1,7,9,3,4,5,4,8,7,2,3,9,8,8,1,8,0,8,5,8,1,1,2,2,3,9,5,6,1,0,3,1,5,2,1,6,4,4,7,0,4,9,8,7,6,6,9,6,3,0,6,7,6,3,5, +2,1,9,4,2,4,3,9,1,9,0,9,9,2,2,1,5,2,2,9,2,4,6,2,3,3,4,0,0,8,8,4,6,2,2,5,6,7,5,6,1,3,9,7,0,1,2,2,5,9,2,1,3,1,8,8,1,3,6,9,6,0,9,2, +5,4,8,8,6,8,9,1,7,6,2,0,7,8,6,1,0,6,9,2,7,6,8,1,3,6,2,1,5,1,0,4,9,1,5,2,4,4,4,5,6,4,4,6,8,5,6,7,8,5,3,1,3,3,6,9,6,0,0,8,1,8,8,6, +9,0,5,0,9,6,4,8,3,9,9,8,2,8,0,0,9,5,2,1,2,0,1,5,2,1,9,6,3,8,7,0,1,0,9,1,0,1,6,0,8,7,4,5,6,4,5,6,5,9,5,8,9,4,2,0,8,2,6,5,4,5,4,5, +8,7,5,6,8,4,8,2,4,6,5,0,2,1,1,0,8,3,0,0,6,7,6,5,5,7,0,9,4,2,1,0,8,1,5,0,4,5,5,1,4,6,6,5,5,2,2,7,9,8,6,6,4,3,9,5,7,6,5,8,5,7,7,3, +2,4,8,3,7,6,5,0,3,8,0,4,8,7,6,3,6,0,3,2,8,7,4,6,2,1,6,3,5,7,5,1,5,3,8,9,9,3,0,7,2,6,7,5,5,7,3,3,9,7,6,3,6,4,3,2,5,3,5,0,1,2,7,3, +6,0,1,2,3,9,7,8,4,6,3,8,3,0,5,2,9,1,9,6,9,6,2,1,0,6,0,1,2,5,6,4,9,8,9,4,1,2,6,6,4,7,1,2,7,3,7,9,9,2,6,0,4,4,2,5,0,0,4,6,2,4,8,0, +}; + +const int32_t B[] = { +1,1,9,1,9,6,7,6,2,8,2,8,5,6,3,0,2,4,0,1,7,5,9,1,7,4,7,3,2,2,8,0,3,1,3,7,1,9,2,5,6,5,2,3,7,1,6,3,3,2,1,4,2,4,8,9,4,0,8,3,6,4,0,9, +6,1,6,9,1,4,3,4,2,3,4,8,0,0,1,2,7,7,9,6,1,5,8,7,6,7,1,8,0,2,7,1,4,5,7,4,2,7,6,0,0,1,7,9,0,4,3,8,8,9,4,3,4,7,6,3,4,0,8,9,5,1,1,7, +3,3,4,7,0,4,1,5,2,1,2,5,6,6,1,4,4,2,8,7,5,4,0,5,8,1,7,2,3,4,7,7,6,3,1,5,6,2,8,5,7,8,7,7,9,3,5,9,9,1,1,1,6,8,5,5,9,7,9,5,8,9,0,9, +2,9,2,2,4,4,6,7,4,7,4,2,4,2,7,2,2,2,9,6,7,2,1,5,0,9,5,2,8,6,8,6,2,7,8,2,0,1,0,2,9,0,3,9,9,6,9,2,1,7,1,1,4,6,7,9,8,8,5,3,1,1,8,5, +2,2,8,2,0,6,7,3,0,1,2,0,5,2,6,9,3,5,2,0,9,7,6,7,2,5,2,3,0,6,0,4,5,1,6,4,7,8,0,1,0,7,1,6,6,5,4,7,2,3,0,3,8,0,7,4,5,0,3,8,6,3,5,0, +9,1,3,8,5,9,4,9,0,2,9,7,1,1,0,8,4,7,8,5,3,2,3,0,9,8,3,7,5,8,8,5,9,5,6,5,2,7,3,3,7,2,2,7,0,0,4,9,8,8,0,6,0,9,3,9,6,5,7,0,8,2,2,3, +4,3,2,5,2,0,4,6,7,1,5,1,6,4,1,0,4,6,1,1,3,8,8,9,1,1,9,9,7,5,0,8,5,5,9,0,4,1,7,5,2,2,7,8,5,1,1,0,0,2,0,1,7,9,2,0,3,7,7,8,6,9,4,0, +6,0,4,1,4,2,0,2,5,4,0,6,9,9,1,1,8,7,5,0,3,3,8,6,1,1,3,1,9,2,4,6,2,5,6,6,8,4,6,1,3,0,7,1,3,3,0,3,3,2,7,4,6,0,6,7,7,7,0,4,9,4,9,4, +1,9,5,5,4,5,7,7,2,4,9,4,6,0,3,5,2,6,8,0,3,0,5,5,1,6,9,2,6,9,7,3,8,0,6,1,0,0,4,4,2,3,3,7,8,1,7,4,7,0,9,2,1,4,2,7,0,3,0,7,7,7,4,0, +5,0,9,9,7,4,0,5,7,3,7,5,2,4,4,9,5,4,5,7,5,1,2,1,3,8,8,5,1,6,6,3,9,8,1,1,9,4,7,9,8,8,4,5,0,9,5,3,0,3,5,7,4,2,5,5,2,9,9,7,7,9,2,3, +4,9,9,2,5,3,1,7,6,2,5,2,5,9,3,1,1,9,0,3,1,1,2,5,0,7,6,0,4,0,0,5,5,9,8,4,1,0,9,8,9,7,1,5,8,5,7,1,1,3,7,1,3,4,7,8,7,8,8,5,2,7,5,5, +0,9,7,2,6,8,5,7,0,3,6,9,1,2,5,7,7,2,9,6,6,9,7,1,0,6,6,4,3,3,7,1,8,0,7,2,8,9,2,1,0,1,7,9,5,7,6,8,5,4,0,1,7,9,8,9,0,5,4,9,8,5,5,9, +9,9,3,4,5,3,7,5,7,3,3,2,1,5,7,5,0,8,4,5,8,9,4,3,9,8,2,6,3,9,3,1,3,6,5,3,9,0,8,4,5,5,5,9,4,0,9,6,0,4,4,6,2,5,8,2,0,9,0,0,0,0,7,9, +4,9,3,6,2,7,0,5,9,6,9,4,8,6,2,1,9,0,2,2,6,9,5,0,5,8,7,6,3,8,6,9,8,9,8,2,8,2,9,4,2,7,2,6,6,9,0,1,9,8,9,0,0,5,3,5,8,7,9,1,4,9,8,1, +3,5,1,8,4,8,7,4,0,4,9,8,0,8,6,7,0,7,9,1,6,6,4,8,2,0,8,9,7,8,9,0,1,4,2,0,9,4,9,5,3,0,4,0,6,8,2,0,3,9,7,4,8,3,4,9,4,6,9,6,2,9,9,6, +5,2,1,9,2,2,6,4,4,3,4,4,6,0,2,9,0,8,4,6,3,4,1,6,6,5,4,4,5,0,0,3,4,4,1,1,4,9,8,0,1,6,2,6,1,3,7,3,4,1,5,9,0,1,9,2,7,3,6,7,1,4,7,7, +7,7,9,0,8,0,2,4,2,3,7,5,7,4,2,0,8,8,3,7,1,4,7,2,7,9,4,3,0,8,1,6,9,8,6,0,3,4,7,8,8,6,7,1,7,9,9,9,6,0,3,9,4,8,8,3,8,9,5,0,4,4,4,7, +7,8,3,4,5,8,1,6,7,5,2,0,9,5,9,7,1,8,6,4,4,9,3,8,6,2,8,3,0,7,6,5,9,6,5,8,0,9,8,7,4,4,1,1,8,7,8,7,2,2,8,8,3,1,9,2,2,5,8,6,3,3,7,8, +1,3,6,6,2,8,8,1,2,5,2,4,8,3,8,5,0,8,4,9,9,3,4,4,3,6,6,7,3,3,8,5,0,9,6,9,0,2,5,8,9,3,1,0,8,8,4,4,3,5,9,8,6,1,2,0,9,4,2,9,4,0,0,1, +8,1,7,6,0,2,2,7,6,0,1,7,6,9,0,9,7,8,4,9,6,2,4,3,3,8,8,8,5,6,3,2,2,7,1,7,8,6,6,1,4,0,8,3,5,0,0,3,5,2,3,4,2,4,9,2,1,0,4,3,3,2,0,5, +3,8,0,3,8,6,8,3,8,8,7,2,5,4,6,5,5,8,2,8,8,1,4,0,8,9,3,2,0,9,7,2,9,4,6,2,5,0,2,5,9,1,7,7,9,9,4,5,3,7,6,3,3,9,4,8,5,4,6,4,0,8,8,2, +0,0,8,9,8,3,3,9,7,2,6,9,2,9,4,6,8,3,8,7,7,6,4,7,8,5,8,6,1,3,0,2,2,9,4,8,0,1,3,6,4,2,9,2,1,4,7,0,3,8,5,6,6,1,6,2,0,0,2,4,1,4,4,4, +6,8,7,2,1,9,3,3,8,5,8,2,7,5,9,8,5,6,5,9,8,6,3,2,7,1,1,2,7,6,7,1,1,6,8,5,0,8,4,4,3,6,8,1,7,6,0,9,2,6,0,2,2,8,6,2,6,9,7,3,3,3,5,4, +6,0,5,0,9,4,5,1,9,8,1,4,5,7,2,2,9,0,3,7,3,2,9,2,6,5,3,7,2,6,1,3,1,5,7,1,4,6,0,6,4,9,1,5,0,1,8,0,5,9,8,5,2,1,0,7,3,2,5,8,1,7,5,3, +0,5,8,3,4,8,3,3,6,1,2,9,6,8,3,6,6,9,8,1,5,2,0,9,6,7,1,7,6,3,0,9,0,3,5,6,2,4,2,8,2,7,5,7,6,5,2,2,0,2,8,9,3,6,7,6,1,5,2,9,0,1,7,5, +9,0,0,5,5,8,3,0,9,7,6,8,5,7,2,4,9,8,2,2,8,3,1,6,5,1,8,6,2,0,8,5,5,8,0,3,1,8,0,0,6,8,4,1,4,1,5,0,0,9,0,0,2,2,3,3,1,8,6,6,3,7,1,3, +6,6,0,9,4,8,7,5,0,3,1,4,0,1,1,2,8,7,4,4,8,3,1,1,6,3,7,9,2,0,8,9,6,5,8,7,0,8,8,6,3,4,9,1,5,0,9,8,8,2,0,4,2,5,7,0,7,5,1,6,5,8,6,9, +9,6,7,5,2,9,9,1,7,1,0,4,4,9,5,8,9,3,2,8,4,0,7,3,5,8,2,6,2,1,7,0,5,0,1,9,3,2,7,7,4,1,6,4,9,6,3,5,6,8,2,5,3,9,1,5,3,2,7,1,9,1,2,8, +1,0,7,9,9,1,7,7,0,5,6,8,6,8,7,4,9,6,3,7,9,3,2,5,2,4,6,7,8,9,7,5,6,9,7,4,3,7,0,1,7,1,1,7,8,4,9,8,7,1,0,2,6,8,4,9,1,9,1,3,9,7,4,5, +1,9,7,9,0,0,4,3,2,4,4,8,0,6,1,8,8,2,0,9,0,7,0,5,4,8,9,8,7,7,8,5,9,2,6,2,2,9,6,7,1,1,1,7,7,4,3,0,8,5,5,0,1,1,0,9,3,8,3,0,9,3,8,7, +4,7,5,5,2,4,5,1,3,4,6,7,7,4,3,0,7,7,8,4,0,3,4,9,0,4,6,6,4,3,9,5,5,5,2,6,2,1,8,0,8,5,0,8,2,5,7,2,6,4,3,4,7,4,3,5,0,0,1,3,0,6,7,4, +1,7,8,7,4,6,1,9,2,1,3,1,4,5,8,6,8,0,2,8,8,7,6,5,5,4,3,7,0,0,3,6,7,8,2,4,0,0,7,0,8,0,8,7,1,2,1,5,4,0,6,9,4,6,0,4,0,6,6,4,6,2,3,8, +3,2,5,5,6,6,0,8,2,5,8,5,8,9,8,6,6,2,5,0,4,4,5,9,6,3,9,5,7,6,1,6,2,6,5,8,2,2,4,1,0,6,2,1,9,7,9,5,9,7,8,3,1,2,1,0,7,5,4,4,6,4,5,8, +1,4,1,3,8,8,1,0,5,0,2,4,8,3,4,0,7,0,0,9,6,6,0,3,8,3,2,8,2,3,5,5,4,9,5,3,7,9,8,6,6,3,4,6,7,7,3,1,1,5,4,2,0,9,5,1,4,5,4,0,8,2,4,8, +5,4,4,9,9,1,0,6,9,8,4,6,0,3,8,9,4,9,5,6,2,7,0,5,7,8,0,3,7,0,1,3,1,4,7,4,7,6,3,4,7,2,4,8,9,2,4,1,7,1,9,9,5,5,4,0,5,4,5,8,6,8,1,2, +9,0,0,2,7,4,2,1,3,1,7,3,6,2,5,9,3,8,5,5,8,8,2,9,3,5,2,6,9,3,0,4,3,7,8,3,7,2,6,5,8,2,9,7,1,1,9,0,8,5,8,0,1,3,1,5,9,8,1,9,1,9,2,9, +6,7,5,6,2,5,7,6,0,9,5,7,4,3,0,6,1,8,7,9,5,7,2,8,1,5,4,3,4,7,4,7,5,0,0,4,2,8,0,2,9,9,5,1,7,3,0,7,3,3,3,0,9,7,6,8,5,6,0,2,1,5,3,5, +7,7,8,2,6,6,6,8,2,9,8,3,7,5,7,9,2,1,4,8,0,4,7,6,1,7,1,0,8,3,5,7,9,1,0,9,5,9,6,6,7,1,8,1,5,6,9,6,0,1,2,9,9,7,8,2,5,8,6,1,5,2,9,3, +1,7,5,1,6,1,4,2,8,5,1,7,0,3,0,4,1,3,2,8,0,3,6,1,8,9,4,1,7,1,2,0,3,8,7,8,1,6,9,6,5,7,6,3,3,3,9,8,2,1,0,6,6,2,4,2,5,0,8,2,6,4,6,3, +2,6,1,2,4,1,3,3,9,5,8,6,2,1,4,7,7,3,1,6,4,3,2,5,8,9,9,6,5,0,0,3,1,1,4,2,7,6,4,8,9,8,1,5,6,7,0,3,3,6,4,8,8,8,8,9,3,8,3,5,5,4,4,5, +2,3,0,1,6,0,0,1,6,3,0,9,6,8,9,5,1,4,0,7,0,5,5,2,2,8,5,1,8,2,9,9,1,9,4,5,9,6,6,5,9,2,7,6,4,4,5,2,9,5,4,6,8,4,0,7,6,1,5,3,0,9,5,1, +2,5,4,4,4,8,2,8,8,6,6,9,6,4,1,7,7,8,1,6,8,7,9,4,2,4,1,4,2,0,5,8,8,9,6,9,7,6,1,0,9,7,1,5,2,0,7,7,3,4,5,1,0,7,3,1,5,4,4,9,6,1,0,8, +2,6,8,2,6,9,8,4,3,6,5,9,8,0,0,6,7,1,8,4,0,1,8,1,2,8,2,2,2,8,1,6,1,5,9,8,9,5,6,3,2,1,4,2,9,3,2,0,9,6,8,8,3,3,7,6,7,0,0,4,8,4,0,6, +9,8,0,2,7,2,8,4,0,6,1,8,3,9,8,4,0,0,6,3,0,5,9,8,0,6,8,5,1,9,7,5,4,8,5,5,1,4,3,2,4,5,5,0,5,5,3,8,0,2,5,2,1,3,1,6,1,6,8,2,8,4,1,3, +1,0,1,5,5,2,9,9,4,1,0,0,3,6,4,0,8,5,0,6,2,9,4,5,6,2,8,8,5,5,6,0,7,4,6,3,6,9,6,1,4,3,5,4,0,3,2,6,6,5,8,1,5,7,3,0,4,5,4,8,0,0,5,4, +5,7,1,4,4,4,2,7,3,2,7,7,8,6,5,1,1,0,3,1,4,6,7,3,7,1,6,3,7,5,6,6,8,4,9,1,2,7,3,0,2,8,8,3,9,3,7,2,8,5,0,9,4,5,1,3,4,6,6,8,6,9,7,7, +1,4,8,6,7,9,7,5,6,7,4,2,0,5,4,8,2,2,4,3,4,2,3,7,2,0,4,9,6,9,1,2,6,3,2,0,4,9,9,4,9,7,7,8,4,3,4,7,8,4,9,1,9,3,1,3,4,2,5,6,2,2,8,6, +3,5,9,4,5,9,7,1,6,7,7,3,1,9,3,2,8,5,7,7,8,1,7,7,0,0,2,3,7,3,6,8,6,3,7,6,6,3,6,0,1,9,9,3,0,9,2,2,3,0,2,6,3,3,2,8,5,6,2,6,9,6,7,0, +6,5,6,4,9,5,8,1,4,2,1,7,0,7,2,9,7,7,3,8,3,2,3,7,0,0,2,7,6,7,0,6,6,8,4,9,8,5,6,3,4,4,7,0,6,6,8,2,0,4,8,2,4,2,6,0,9,7,0,9,5,2,4,1, +4,5,1,2,4,3,7,7,9,5,0,7,3,2,0,8,4,1,6,5,9,8,8,1,2,3,4,0,2,4,0,3,2,6,1,0,9,2,2,3,4,0,7,3,3,0,4,7,7,1,2,9,7,8,5,5,6,3,4,1,8,9,3,5, +1,2,9,2,1,6,5,1,5,7,5,0,2,6,5,7,6,6,8,9,1,8,3,3,6,8,2,3,0,7,8,0,9,9,8,9,7,6,1,8,8,3,7,5,5,7,6,5,5,9,0,8,6,3,1,9,5,6,2,3,4,9,2,7, +5,1,9,8,5,2,6,7,8,7,1,2,1,1,6,8,0,0,1,0,4,3,1,8,3,4,1,8,4,7,3,3,0,3,6,7,7,5,6,3,0,7,6,7,9,3,9,4,1,9,1,2,1,8,8,9,6,6,1,0,8,9,8,6, +9,4,9,5,4,3,6,1,2,9,8,6,9,1,4,4,6,0,3,6,4,6,5,0,4,2,0,1,6,8,2,6,4,6,5,5,6,8,6,8,8,2,9,9,2,9,7,4,9,3,1,3,2,1,2,2,5,8,2,2,4,2,7,0, +5,1,3,4,8,7,2,7,3,2,1,4,0,0,4,8,5,6,6,7,9,0,7,6,0,8,1,2,3,0,5,9,1,3,6,1,8,2,7,1,7,5,9,5,1,9,5,9,1,8,5,0,8,2,9,0,8,1,6,9,1,5,1,8, +4,6,2,3,5,2,7,1,6,9,9,5,2,6,1,3,6,3,3,3,7,6,5,1,4,6,2,3,0,6,0,7,4,6,0,1,9,1,5,3,3,7,3,9,0,2,8,8,8,1,1,9,0,9,7,5,3,8,8,6,7,4,0,9, +7,8,8,4,4,9,3,1,1,5,2,7,5,6,2,2,9,3,0,8,1,7,1,8,5,5,3,0,9,9,6,4,6,0,4,2,3,6,2,8,4,8,4,9,9,6,1,0,0,0,6,1,9,7,7,2,8,4,7,8,4,4,6,2, +2,4,9,4,4,3,2,9,1,4,5,2,2,4,1,9,0,0,8,0,1,5,8,5,9,0,8,3,7,8,3,3,2,1,9,5,6,0,7,6,2,1,8,4,4,4,3,2,9,3,2,8,4,1,6,3,6,2,1,4,3,0,0,5, +9,4,8,3,1,9,6,4,0,9,7,7,6,3,9,4,0,3,2,2,1,9,0,6,5,2,0,7,8,3,3,4,6,0,6,0,0,5,7,8,3,4,6,5,8,2,7,8,9,7,9,0,8,1,9,8,5,7,4,4,9,8,5,7, +3,3,3,1,4,2,9,1,7,3,7,7,9,2,7,3,1,8,6,3,6,2,0,2,0,7,6,9,6,1,9,6,2,3,7,0,1,6,7,0,0,5,8,1,0,2,2,5,8,3,3,4,1,6,8,9,3,8,0,9,2,1,6,6, +0,7,1,3,6,0,7,0,3,9,4,1,5,2,6,3,7,6,1,3,8,4,6,3,3,4,6,6,0,6,5,6,3,6,9,4,4,0,0,8,0,5,3,7,4,0,9,4,2,2,9,1,8,7,5,3,8,6,1,9,9,8,9,0, +4,8,3,0,7,6,1,6,3,7,6,5,0,1,3,4,7,3,7,0,7,8,2,8,4,0,3,3,3,2,6,3,9,1,2,6,5,6,1,7,2,8,8,5,8,0,1,4,3,8,4,4,5,6,3,4,2,9,1,7,4,0,4,5, +8,4,8,6,5,5,1,2,2,9,9,2,4,7,0,1,1,8,6,2,9,1,7,0,8,9,3,2,6,9,1,4,6,6,2,8,1,6,7,3,0,7,7,4,3,9,3,5,6,5,6,5,8,9,5,3,1,0,9,5,4,6,7,6, +7,8,3,7,7,2,5,3,4,1,7,3,8,0,0,5,2,3,6,4,5,2,0,3,3,3,1,2,6,1,1,1,4,7,6,0,4,9,1,2,8,5,0,3,3,2,5,0,1,4,5,9,0,3,6,8,2,2,4,1,2,3,7,3, +9,5,2,1,0,7,2,1,3,5,2,3,0,5,6,2,2,9,0,4,1,6,5,3,0,4,8,2,2,1,2,1,2,2,1,7,7,2,0,3,4,7,3,6,8,0,9,0,0,8,3,2,0,4,6,7,9,6,0,1,7,1,7,6, +}; + +const int32_t C[] = { +1223,1282,1286,1163,1319,1270,1192,1100,1142,1261,1291,1367,1260,1311,985,1232,1313,1316,1148,1320,1277,1275,1145,1199,1157,1203,1225,1211,1177,1281,1164,1239,1209,1454,1377,1145,1189,1335,1306,1108,1302,1233,1421,1194,1339,1065,1322,1063,1169,979,1109,1155,976,1341,1258,1173,1250,1486,1204,1322,1273,1303,1130,1377, +1184,1267,1312,1237,1443,1455,1188,1232,1260,1222,1312,1451,1244,1331,1078,1464,1396,1375,1279,1437,1466,1272,1208,1262,1248,1352,1314,1335,1209,1309,1263,1277,1264,1509,1546,1222,1369,1428,1334,1090,1392,1323,1378,1303,1411,1190,1443,1195,1305,1244,1177,1218,1132,1317,1370,1300,1356,1338,1210,1421,1310,1349,1236,1435, +1162,1146,1395,1317,1216,1378,1119,1124,1091,1251,1202,1479,1236,1287,1005,1364,1472,1348,1223,1445,1362,1273,1148,1252,1226,1323,1226,1294,1205,1235,1192,1299,1135,1351,1441,1303,1192,1422,1313,1142,1138,1213,1390,1262,1400,1148,1242,1149,1390,1197,1230,1127,1164,1269,1347,1231,1377,1294,1109,1386,1400,1285,1202,1375, +1085,1269,1161,1032,1288,1306,1093,1075,1208,1217,1133,1354,1087,1180,994,1148,1230,1282,1151,1255,1213,1176,1122,1190,1027,1230,1284,1184,1022,1073,1237,1162,1222,1300,1273,1038,1101,1271,1263,1053,1266,1276,1277,1185,1288,1092,1256,1066,1070,1100,1139,1110,1041,1201,1214,1307,1153,1334,1146,1282,1220,1292,1202,1281, +953,1262,1262,1113,1165,1134,1165,1034,1155,1150,1092,1340,1127,1129,991,1264,1190,1241,1146,1305,1178,1086,1038,1146,925,1348,1187,1146,1084,1170,1147,1201,1109,1331,1327,1101,1191,1195,1240,1088,1269,1097,1275,1367,1394,1097,1289,1092,1121,1008,1014,1078,1185,1328,1277,1274,1192,1304,1097,1331,1288,1287,1139,1227, +1201,1282,1518,1325,1281,1510,1265,1336,1105,1327,1407,1443,1225,1295,1066,1483,1340,1349,1354,1403,1371,1315,1151,1265,1189,1335,1343,1363,1137,1399,1248,1254,1465,1378,1333,1240,1131,1468,1371,1139,1253,1175,1496,1213,1473,1246,1402,1272,1330,1226,1126,1186,1212,1376,1462,1329,1229,1447,1227,1339,1347,1301,1240,1524, +1377,1419,1524,1328,1441,1503,1332,1263,1372,1402,1486,1771,1459,1449,1219,1424,1629,1493,1295,1539,1427,1451,1453,1399,1276,1583,1497,1480,1279,1354,1389,1403,1376,1571,1552,1346,1348,1654,1440,1173,1324,1504,1554,1499,1542,1332,1499,1277,1431,1402,1203,1382,1206,1560,1479,1448,1313,1516,1372,1582,1508,1404,1374,1553, +1180,1190,1170,1150,1253,1280,1261,1120,1132,1291,1259,1295,1148,1178,936,1279,1199,1307,1136,1266,1352,1219,1137,1104,1051,1323,1299,1210,1202,1416,1149,1190,1192,1345,1331,1059,1264,1186,1238,1078,1212,1136,1433,1338,1238,1081,1314,980,1253,1209,1241,996,1074,1264,1301,1235,1360,1275,1076,1355,1130,1254,1272,1195, +1159,1328,1348,1187,1256,1466,1220,1187,1064,1249,1357,1457,1216,1173,1179,1494,1289,1263,1350,1375,1310,1373,1248,1356,1142,1296,1226,1265,1158,1135,1179,1185,1210,1338,1376,1222,1213,1348,1378,1129,1272,1097,1444,1236,1363,1248,1324,1170,1316,1302,1266,1319,1195,1216,1356,1352,1313,1320,1173,1359,1298,1234,1253,1485, +1134,1192,1510,1136,1287,1364,1271,1096,1336,1308,1148,1414,1194,1245,1032,1431,1381,1276,1130,1384,1336,1264,1240,1175,1239,1336,1183,1249,1064,1289,1208,1102,1247,1302,1428,1224,1265,1470,1235,1161,1156,1305,1444,1343,1369,1177,1340,1132,1237,1177,1092,1239,1110,1324,1379,1318,1259,1278,1188,1466,1312,1262,1205,1355, +1266,1520,1489,1292,1362,1379,1379,1383,1309,1375,1416,1341,1262,1346,1139,1468,1367,1283,1245,1376,1341,1328,1405,1194,1162,1568,1256,1316,1109,1523,1281,1251,1550,1493,1500,1225,1303,1342,1463,1098,1279,1224,1440,1471,1384,1199,1433,1338,1395,1243,1313,1311,1093,1536,1311,1466,1223,1480,1327,1313,1491,1332,1396,1461, +1324,1267,1330,1251,1403,1288,1188,1214,1312,1302,1313,1591,1341,1331,1203,1456,1421,1408,1342,1529,1301,1337,1204,1372,1206,1499,1416,1278,1280,1291,1305,1313,1258,1588,1452,1262,1314,1473,1316,1285,1467,1271,1521,1386,1488,1250,1570,1182,1258,1285,1140,1369,1305,1405,1438,1359,1400,1532,1314,1458,1338,1451,1351,1364, +1508,1591,1551,1501,1560,1630,1454,1467,1378,1516,1488,1608,1383,1444,1297,1613,1483,1759,1421,1635,1655,1449,1270,1550,1260,1637,1451,1499,1405,1380,1486,1452,1522,1566,1612,1445,1283,1683,1534,1209,1565,1462,1665,1582,1626,1309,1625,1376,1302,1344,1358,1280,1260,1595,1679,1500,1399,1654,1363,1613,1365,1399,1545,1702, +1128,1402,1315,1329,1340,1389,1269,1399,1237,1236,1196,1382,1177,1206,1166,1486,1421,1321,1176,1538,1402,1373,1244,1194,1180,1494,1352,1365,1053,1302,1324,1224,1425,1421,1437,1106,1273,1479,1411,1069,1432,1156,1353,1414,1474,1224,1352,1368,1351,1155,1216,1164,1160,1468,1376,1301,1283,1407,1340,1365,1333,1314,1222,1562, +990,1242,1037,945,1056,1066,1122,1041,1072,1106,1025,1176,971,1006,867,1205,1076,1045,1000,1134,1225,1119,1020,1013,914,1099,1048,1067,931,982,1033,1069,1108,1191,1107,972,1094,1217,1063,856,1183,1130,1222,1157,1123,796,1215,1122,1048,969,815,1089,996,1210,1176,1192,1055,1217,1003,1157,1077,1052,1133,1251, +1284,1200,1269,1326,1294,1339,1278,1318,1181,1223,1209,1465,1143,1136,1127,1486,1193,1233,1248,1465,1402,1356,1180,1231,1176,1432,1200,1408,1160,1184,1267,1200,1191,1474,1389,1208,1165,1362,1425,1053,1281,1076,1472,1331,1252,1040,1517,1310,1238,1248,1084,1152,967,1341,1425,1250,1309,1399,1241,1257,1382,1239,1175,1577, +1157,1371,1479,1324,1290,1274,1173,1187,1301,1333,1326,1392,1165,1215,985,1438,1376,1284,1211,1396,1195,1319,1162,1383,1117,1438,1185,1208,1183,1294,1112,1369,1307,1449,1440,1203,1413,1294,1443,1230,1321,1264,1402,1575,1427,1266,1471,1020,1234,1245,1201,1196,1241,1327,1444,1219,1381,1343,1251,1407,1293,1369,1284,1303, +1316,1298,1260,1190,1373,1338,1219,1183,1240,1283,1256,1493,1236,1335,1122,1382,1292,1488,1302,1486,1338,1254,1162,1173,1297,1483,1244,1220,1166,1161,1344,1272,1290,1480,1411,1317,1103,1441,1318,1080,1426,1259,1527,1254,1328,1124,1486,1371,1274,1181,1027,1319,1105,1419,1442,1367,1249,1373,1263,1297,1258,1317,1227,1527, +1421,1598,1719,1432,1649,1684,1592,1420,1513,1671,1607,1690,1512,1581,1246,1737,1578,1567,1514,1660,1608,1495,1644,1490,1460,1731,1507,1492,1354,1732,1465,1436,1612,1657,1650,1527,1527,1676,1606,1398,1527,1522,1674,1585,1600,1498,1731,1449,1478,1458,1463,1573,1379,1600,1567,1585,1439,1563,1439,1575,1620,1505,1612,1670, +1491,1497,1680,1447,1639,1595,1469,1385,1371,1551,1666,1664,1403,1545,1403,1621,1512,1657,1560,1631,1536,1573,1433,1514,1314,1649,1409,1603,1415,1605,1462,1469,1523,1675,1690,1356,1478,1533,1601,1331,1565,1380,1688,1687,1424,1429,1693,1375,1481,1428,1447,1426,1364,1620,1623,1724,1355,1714,1367,1620,1474,1547,1523,1614, +1090,1249,1258,1115,1331,1239,1173,1167,1179,1179,1203,1371,1159,1269,1009,1150,1245,1181,1104,1338,1191,1301,1036,1221,1125,1263,1207,1256,1169,1232,1197,1194,1278,1346,1321,997,1116,1309,1316,1143,1253,1186,1246,1267,1269,1089,1401,1067,1059,1099,1098,1109,1089,1309,1330,1325,1136,1422,1227,1181,1224,1284,1296,1325, +1083,1217,1287,1048,1226,1301,1124,1133,1146,1134,1156,1229,1271,1223,1073,1232,1203,1169,1079,1252,1233,1235,1179,1147,1137,1240,1198,1207,1135,1142,1101,1184,1170,1434,1480,1202,1022,1193,1354,1134,1161,1055,1393,1101,1390,1119,1408,1067,1225,1142,1141,1134,982,1215,1252,1163,1391,1336,1110,1216,1293,1274,1213,1374, +1176,1248,1367,1209,1350,1327,1126,1169,1186,1257,1147,1409,1134,1158,1140,1539,1249,1262,1208,1267,1171,1228,1130,1483,1033,1346,1135,1305,1283,1225,1075,1250,1112,1364,1485,1199,1257,1356,1297,1102,1262,1223,1307,1367,1397,1109,1360,1064,1102,1259,1421,1132,1150,1122,1323,1293,1342,1352,1120,1478,1299,1183,1290,1328, +1358,1196,1378,1279,1255,1332,1182,1268,1170,1229,1412,1412,1192,1322,1065,1309,1291,1437,1206,1262,1288,1387,1202,1326,1237,1347,1386,1400,1304,1272,1176,1260,1337,1353,1438,1130,1291,1371,1493,1071,1218,1283,1566,1423,1335,1169,1460,1183,1396,1247,1090,1229,1152,1443,1453,1404,1296,1502,1227,1393,1356,1435,1306,1545, +970,1102,1135,1132,1034,1115,1110,1029,968,1150,1057,1154,1067,1083,1050,1148,1048,1194,1057,1091,1223,1172,1002,1175,842,1109,1236,1167,1003,1080,1207,1133,1084,1245,1215,1080,1015,1208,1115,949,1090,1113,1203,1106,1225,993,1243,1012,1082,1110,1004,989,1072,1076,1100,1213,1163,1220,937,1160,1132,1206,1271,1157, +1280,1299,1484,1423,1264,1417,1198,1259,1318,1303,1342,1413,1185,1310,1112,1504,1318,1388,1303,1392,1330,1217,1124,1375,1184,1439,1286,1431,1277,1300,1269,1324,1198,1410,1436,1153,1171,1297,1529,1139,1252,1276,1397,1376,1339,1280,1301,1146,1227,1303,1277,1239,1122,1313,1444,1336,1289,1461,1322,1437,1257,1302,1310,1431, +1213,1324,1265,1154,1335,1331,1249,1177,1102,1299,1174,1296,1159,1095,1117,1293,1148,1228,1118,1345,1221,1292,1112,1155,1161,1335,1152,1185,1161,1244,1133,1151,1287,1381,1380,1096,1145,1420,1294,1142,1364,1169,1359,1304,1370,1133,1458,1201,1186,1127,1257,1200,1028,1197,1323,1221,1332,1368,1115,1191,1290,1125,1327,1335, +1100,1107,1206,1076,1070,1217,1152,1037,976,1124,1102,1303,1049,1040,882,1175,1105,1061,1042,1170,1089,1200,1062,1149,1001,1083,1126,1249,1103,1233,1170,1129,1187,1130,1274,933,1092,1276,1166,982,1200,1136,1238,1307,1227,953,1190,1037,1182,1145,936,850,1108,1180,1121,1178,1162,1225,1059,1234,1170,1117,1057,1164, +1304,1421,1468,1398,1297,1510,1405,1379,1285,1250,1266,1486,1317,1492,1127,1487,1396,1510,1326,1485,1468,1341,1380,1525,1189,1391,1401,1442,1377,1416,1318,1342,1377,1510,1619,1288,1212,1407,1511,1086,1297,1355,1607,1403,1533,1261,1387,1344,1291,1234,1317,1152,1280,1513,1394,1379,1461,1464,1440,1650,1456,1457,1417,1467, +1311,1229,1205,1189,1294,1243,1263,1098,1123,1304,1394,1277,1192,1149,1133,1228,1157,1342,1180,1163,1268,1249,1113,1244,1099,1237,1215,1302,1102,1304,1141,1175,1185,1392,1483,1056,1062,1223,1203,1080,1165,1221,1384,1235,1295,1041,1492,1081,1176,1246,1169,1099,897,1234,1279,1242,1253,1482,1052,1325,1231,1235,1262,1267, +1040,1219,1342,1241,1334,1251,1081,1293,1204,1209,1230,1364,1124,1250,975,1353,1284,1270,1159,1251,1291,1236,1125,1121,1154,1273,1190,1220,1133,1240,1114,1134,1208,1417,1376,1149,1219,1342,1313,1052,1175,1109,1288,1249,1294,1029,1356,1157,1294,1130,1236,1131,1045,1275,1329,1190,1158,1222,1145,1239,1212,1243,1217,1354, +1330,1514,1426,1478,1427,1534,1396,1449,1113,1412,1455,1612,1408,1417,1252,1604,1356,1498,1443,1472,1434,1509,1309,1542,1292,1482,1586,1417,1344,1534,1440,1396,1500,1590,1601,1427,1239,1447,1447,1338,1503,1249,1509,1459,1763,1201,1585,1278,1501,1317,1409,1322,1357,1424,1436,1494,1504,1462,1386,1541,1526,1580,1334,1504, +1483,1510,1662,1516,1472,1567,1511,1383,1427,1441,1486,1693,1423,1445,1277,1653,1566,1564,1252,1712,1527,1504,1368,1474,1343,1638,1479,1562,1473,1475,1400,1462,1536,1550,1565,1394,1529,1659,1619,1351,1574,1540,1647,1672,1703,1358,1640,1358,1459,1364,1329,1423,1398,1645,1623,1495,1576,1619,1410,1576,1638,1504,1511,1513, +1378,1291,1411,1401,1335,1462,1228,1371,1250,1265,1322,1481,1251,1293,1165,1519,1344,1376,1263,1528,1354,1316,1168,1275,1161,1352,1151,1340,1317,1165,1286,1229,1292,1526,1369,1343,1357,1465,1506,1065,1534,1125,1546,1396,1442,1091,1466,1272,1352,1308,1147,1242,1180,1487,1382,1336,1331,1464,1227,1231,1398,1345,1305,1532, +1346,1268,1352,1280,1401,1465,1207,1261,1254,1325,1211,1445,1266,1284,1084,1275,1388,1362,1081,1393,1339,1344,1202,1257,1253,1346,1235,1335,1205,1225,1189,1267,1311,1467,1462,1328,1183,1386,1372,1146,1313,1354,1377,1324,1560,1051,1474,1183,1329,1275,1320,1132,1053,1471,1323,1273,1426,1453,1205,1349,1508,1382,1197,1463, +1022,1187,1170,1044,1151,1246,1027,1219,1050,1128,1098,1206,1071,1097,1007,1357,1094,1118,1119,1173,1158,1171,1093,1157,1052,1307,1048,1041,1028,1172,1076,1129,1159,1224,1297,1159,1100,1159,1144,1040,1137,1017,1259,1165,1290,1100,1200,1175,1063,1165,1072,1022,1080,1204,1242,1138,1170,1136,1153,1163,1132,1100,1126,1205, +1228,1387,1427,1403,1300,1425,1408,1294,1135,1292,1385,1536,1215,1375,1090,1372,1493,1488,1375,1443,1291,1323,1196,1449,1011,1387,1405,1524,1237,1315,1375,1284,1439,1430,1453,1275,1203,1455,1334,1082,1336,1212,1446,1398,1447,1109,1415,1167,1347,1258,1260,1182,1166,1460,1312,1474,1164,1448,1142,1484,1380,1336,1312,1462, +1229,1355,1264,1183,1314,1315,1133,1150,1185,1272,1267,1278,1256,1081,1103,1341,1182,1357,1212,1377,1320,1300,1159,1211,1155,1350,1139,1159,1123,1117,1120,1224,1267,1341,1296,1181,1190,1433,1408,1056,1392,1231,1416,1354,1320,1066,1387,1219,1231,1083,1171,1302,1194,1261,1386,1159,1144,1254,1142,1345,1164,1230,1272,1376, +1402,1490,1593,1472,1540,1586,1420,1415,1467,1578,1512,1550,1450,1479,1264,1497,1579,1482,1403,1634,1640,1530,1355,1386,1494,1515,1378,1446,1330,1336,1452,1408,1570,1617,1621,1422,1320,1645,1575,1282,1514,1496,1689,1515,1551,1392,1592,1417,1496,1390,1331,1436,1293,1555,1537,1461,1428,1662,1461,1603,1505,1478,1485,1641, +1190,1351,1391,1151,1359,1261,1272,1151,1193,1410,1220,1442,1176,1321,1145,1331,1298,1288,1138,1432,1265,1303,1234,1344,1097,1368,1299,1257,1225,1254,1342,1328,1216,1474,1431,1321,1194,1382,1412,1126,1384,1312,1382,1311,1391,1184,1442,1234,1173,1217,1092,1163,1187,1294,1353,1395,1298,1525,1167,1290,1331,1287,1237,1428, +1304,1416,1446,1406,1444,1463,1304,1344,1205,1344,1408,1391,1263,1330,1190,1426,1445,1377,1141,1490,1381,1376,1240,1404,1244,1378,1310,1436,1337,1454,1351,1288,1410,1408,1562,1208,1295,1477,1469,1192,1434,1246,1467,1463,1576,1277,1378,1198,1375,1277,1300,1163,1179,1544,1309,1334,1374,1525,1258,1383,1395,1343,1357,1443, +1337,1425,1424,1300,1416,1506,1314,1263,1283,1442,1345,1599,1210,1381,1185,1491,1360,1514,1366,1549,1449,1436,1297,1274,1278,1592,1432,1358,1241,1418,1492,1221,1363,1523,1521,1200,1309,1466,1375,1285,1432,1284,1492,1419,1474,1272,1567,1251,1258,1427,1298,1269,1276,1309,1573,1506,1367,1432,1366,1467,1307,1432,1391,1526, +1245,1479,1359,1192,1393,1455,1247,1338,1192,1281,1363,1339,1314,1433,1229,1360,1377,1332,1284,1331,1385,1395,1346,1348,1263,1295,1340,1301,1159,1368,1222,1257,1361,1454,1507,1271,1256,1206,1537,1251,1334,1178,1573,1343,1532,1225,1547,1291,1358,1286,1343,1219,1201,1442,1395,1281,1329,1515,1276,1401,1372,1362,1373,1679, +1088,1321,1295,1136,1190,1421,1235,1158,1223,1093,1118,1345,1176,1361,1049,1237,1373,1226,1196,1435,1316,1245,1186,1244,1044,1246,1207,1342,1137,1222,1302,1160,1238,1386,1398,1125,1219,1352,1315,976,1279,1157,1435,1272,1340,1212,1151,1148,1147,1207,1109,1078,1104,1456,1245,1357,1238,1354,1232,1349,1244,1180,1291,1399, +1233,1346,1483,1244,1296,1440,1206,1207,1228,1387,1335,1338,1222,1302,1034,1380,1488,1306,1266,1492,1447,1316,1269,1265,1181,1368,1261,1244,1149,1343,1204,1310,1313,1503,1547,1292,1255,1242,1249,1197,1355,1234,1505,1377,1462,1245,1408,1104,1243,1398,1216,1132,1221,1395,1361,1429,1450,1494,1145,1375,1395,1375,1295,1414, +1150,1376,1431,1299,1328,1327,1091,1375,1134,1150,1289,1330,1179,1324,1130,1429,1436,1311,1203,1401,1274,1414,1187,1409,1205,1395,1449,1332,1164,1307,1128,1333,1378,1352,1505,1192,1250,1254,1451,1217,1289,1257,1433,1468,1507,1226,1471,1181,1337,1189,1240,1139,1172,1376,1394,1321,1324,1470,1213,1393,1332,1401,1269,1547, +1086,1274,1427,1184,1336,1347,1301,1288,1076,1234,1239,1313,1176,1249,1188,1370,1234,1309,1243,1253,1244,1162,1137,1358,1024,1365,1262,1301,1150,1258,1228,1221,1250,1336,1448,1214,1060,1271,1290,1135,1225,1045,1454,1293,1480,1147,1399,1131,1121,1131,1116,1150,1134,1318,1381,1327,1183,1343,1102,1294,1311,1241,1247,1276, +1055,1338,1289,1170,1280,1362,1196,1278,1246,1310,1316,1358,1177,1180,1127,1391,1160,1306,1197,1344,1291,1294,1137,1177,1097,1365,1251,1240,1111,1305,1330,1218,1345,1410,1325,1109,1232,1329,1290,1099,1453,1200,1285,1355,1391,1092,1356,1205,1247,1158,1168,1232,1166,1421,1269,1371,1216,1398,1251,1336,1261,1328,1173,1321, +1241,1362,1541,1375,1528,1481,1285,1364,1179,1334,1358,1569,1284,1388,1337,1545,1414,1401,1245,1493,1384,1470,1260,1417,1267,1581,1456,1544,1325,1443,1378,1432,1360,1517,1612,1293,1353,1448,1561,1323,1417,1354,1493,1485,1468,1348,1504,1294,1413,1278,1469,1253,1298,1438,1467,1442,1423,1520,1282,1510,1463,1438,1355,1512, +1191,1405,1548,1192,1355,1385,1281,1306,1116,1400,1443,1404,1205,1296,1223,1375,1174,1249,1177,1294,1233,1342,1279,1290,1032,1370,1345,1204,1249,1404,1243,1196,1357,1300,1416,1119,1227,1290,1448,1139,1231,1347,1457,1401,1401,1252,1517,1270,1319,1150,1263,1046,1187,1352,1436,1425,1350,1436,1271,1350,1371,1359,1370,1384, +1533,1515,1630,1488,1528,1655,1407,1376,1435,1595,1587,1596,1514,1517,1354,1704,1603,1606,1510,1715,1590,1563,1342,1497,1435,1593,1392,1554,1467,1607,1335,1566,1479,1775,1667,1446,1608,1589,1735,1394,1572,1454,1692,1573,1659,1501,1695,1421,1608,1406,1575,1572,1313,1565,1659,1484,1697,1825,1410,1592,1599,1564,1547,1710, +1030,1264,1235,1155,1239,1140,1107,1169,1228,1181,1199,1289,1264,1173,908,1223,1267,1189,1040,1262,1244,1091,1075,1137,1063,1274,1222,1202,1090,1111,969,1306,1205,1427,1291,1148,953,1258,1254,1058,1239,1108,1343,1147,1290,1008,1357,1026,1152,999,1101,1209,994,1239,1195,1079,1115,1298,1073,1232,1165,1155,1254,1254, +1243,1402,1384,1183,1246,1257,1266,1330,1115,1206,1321,1437,1353,1240,1078,1420,1196,1383,1183,1344,1188,1398,1211,1321,1227,1379,1400,1246,1291,1312,1190,1233,1403,1285,1511,1135,1150,1454,1366,1224,1258,1221,1410,1342,1498,1066,1384,1252,1288,1102,1202,1247,1224,1436,1536,1461,1352,1494,1232,1360,1440,1282,1332,1466, +1376,1432,1337,1363,1390,1476,1213,1241,1248,1363,1401,1387,1244,1207,1079,1454,1381,1566,1314,1472,1399,1322,1191,1356,1247,1391,1336,1360,1269,1270,1294,1360,1366,1462,1550,1256,1130,1431,1423,1110,1321,1340,1438,1383,1399,1126,1480,1170,1290,1251,1342,1232,1060,1371,1438,1266,1461,1509,1254,1567,1297,1286,1327,1459, +1238,1391,1654,1282,1535,1583,1349,1371,1349,1438,1396,1503,1400,1483,1352,1670,1451,1452,1393,1511,1507,1553,1343,1431,1373,1456,1367,1364,1333,1462,1290,1251,1432,1511,1557,1338,1462,1445,1472,1429,1484,1300,1658,1546,1579,1316,1561,1295,1411,1270,1516,1399,1360,1430,1484,1458,1469,1569,1336,1635,1471,1468,1405,1548, +1328,1454,1420,1339,1301,1584,1286,1280,1242,1267,1371,1399,1361,1255,1125,1574,1424,1452,1350,1505,1338,1342,1278,1317,1284,1571,1323,1374,1275,1421,1342,1274,1397,1487,1589,1303,1386,1468,1493,1298,1385,1243,1476,1502,1498,1285,1414,1261,1274,1307,1370,1280,1183,1533,1502,1422,1490,1479,1370,1480,1419,1377,1417,1481, +1222,1364,1443,1416,1382,1408,1431,1338,1335,1312,1306,1510,1331,1404,1159,1485,1364,1526,1255,1513,1490,1320,1246,1408,1261,1466,1481,1469,1303,1305,1353,1331,1239,1519,1619,1349,1219,1522,1492,1206,1306,1366,1531,1346,1561,1139,1528,1252,1285,1321,1207,1237,1181,1438,1533,1434,1404,1478,1258,1462,1425,1431,1400,1589, +1207,1377,1473,1278,1359,1422,1204,1284,1197,1383,1316,1329,1130,1287,1145,1371,1422,1446,1296,1427,1422,1393,1162,1300,1248,1385,1149,1205,1164,1347,1224,1188,1328,1362,1530,1244,1269,1378,1281,1228,1304,1222,1561,1470,1516,1302,1417,1272,1254,1233,1184,1200,1226,1510,1500,1344,1353,1468,1197,1424,1285,1343,1358,1503, +1191,1379,1246,1083,1210,1174,1160,1093,1102,1200,1219,1429,1167,1147,923,1368,1318,1166,1231,1362,1215,1256,1084,1156,1002,1320,1218,1141,1110,1254,1116,1175,1212,1297,1225,1082,1283,1247,1229,1081,1218,1143,1336,1235,1273,986,1269,1137,1107,975,1089,1161,1112,1296,1385,1268,1172,1429,1053,1191,1338,1190,1197,1337, +1259,1436,1359,1390,1411,1479,1330,1340,1282,1402,1434,1360,1272,1425,1205,1464,1312,1423,1284,1440,1498,1374,1303,1352,1285,1402,1470,1302,1314,1571,1313,1321,1376,1566,1517,1295,1295,1339,1354,1200,1421,1357,1432,1424,1544,1293,1445,1228,1226,1258,1330,1254,1275,1477,1365,1285,1438,1490,1414,1492,1384,1388,1358,1277, +1203,1315,1492,1177,1365,1245,1219,1234,1241,1305,1174,1385,1160,1270,1017,1348,1315,1297,1118,1263,1216,1289,1330,1083,1083,1391,1298,1197,1140,1290,1254,1276,1399,1398,1534,1247,1368,1413,1429,1207,1287,1267,1412,1361,1376,1170,1480,1240,1385,1103,1117,1211,1131,1291,1320,1385,1357,1369,1198,1306,1478,1417,1300,1326, +1277,1318,1539,1185,1440,1354,1365,1235,1312,1446,1350,1412,1227,1379,1083,1407,1381,1347,1233,1348,1337,1269,1350,1304,1212,1405,1226,1215,1164,1413,1163,1254,1348,1431,1470,1297,1250,1341,1255,1221,1268,1348,1484,1347,1474,1210,1514,1269,1228,1253,1159,1259,1213,1468,1432,1361,1359,1421,1314,1402,1464,1348,1241,1340, +1276,1309,1326,1354,1357,1323,1325,1275,1202,1251,1403,1463,1174,1212,1064,1536,1341,1326,1305,1491,1351,1375,1168,1253,1217,1436,1278,1363,1195,1441,1184,1246,1358,1420,1478,1120,1412,1381,1337,1141,1416,1238,1449,1368,1412,1217,1425,1182,1378,1282,1199,1218,1140,1381,1329,1357,1345,1478,1152,1357,1294,1417,1230,1416, +1071,1204,1308,1204,1446,1269,1175,1132,1209,1188,1315,1368,1169,1283,1042,1331,1380,1409,1113,1452,1307,1186,1166,1205,1120,1335,1265,1279,1207,1236,1194,1144,1336,1483,1401,1154,1251,1352,1293,1073,1423,1138,1343,1257,1280,1237,1323,1077,1118,1105,1228,1177,1158,1301,1205,1194,1234,1343,1162,1376,1295,1242,1234,1213, +}; + +#define MATRIX_SIZE 64 +#endif \ No newline at end of file diff --git a/sw/applications/example_fft/data.h b/sw/applications/example_fft/data.h new file mode 100644 index 000000000..dc65b63d1 --- /dev/null +++ b/sw/applications/example_fft/data.h @@ -0,0 +1,80 @@ +// This file is automatically generated +// Type " python datagen.py " in the terminal to generate the data.h file. Configuration parameters can be changed in the datagen.py file. + +#ifndef DATA_H_ +#define DATA_H_ + +#define FFT_LEN 16 +#define DECIMAL_BITS 10 + +int16_t A[32] = { + 0xfff6, 0x0008, 0x0000, 0x0003, 0x0000, 0x0000, 0xfff9, 0x0004, 0xfff9, 0xfffe, 0xfffb, 0x0003, 0xffff, 0x0006, 0xfffb, 0x0000, 0xfff8, 0x0009, 0xfffd, 0x0006, 0xfffa, 0x0009, 0x0007, 0xfffa, 0x0009, 0x0000, 0xfff7, 0x0009, 0x0004, 0x0003, 0x0001, 0x0007 +}; + +int16_t R[32] = { + 0xffd6, 0x003e, 0x0003, 0x001d, 0xffea, 0x0015, 0xfffe, 0xffe9, 0x0006, 0x0009, 0xffd6, 0x0024, 0xffdc, 0xfffb, 0x000d, 0xffe8, 0x0000, 0x0009, 0xfffe, 0xfff0, 0xfff0, 0x0023, 0xffec, 0x0004, 0xffe2, 0xffec, 0x0018, 0x000d, 0xfffa, 0x0019, 0x000e, 0xffe6 +}; + +int16_t W_radix2[32] = { + 0x0400, + 0x0000, + 0x03b2, + 0xfe78, + 0x02d4, + 0xfd2c, + 0x0188, + 0xfc4e, + 0x0000, + 0xfc00, + 0xfe78, + 0xfc4e, + 0xfd2c, + 0xfd2c, + 0xfc4e, + 0xfe78, + 0xfc00, + 0x0000, + 0xfc4e, + 0x0188, + 0xfd2c, + 0x02d4, + 0xfe78, + 0x03b2, + 0x0000, + 0x0400, + 0x0188, + 0x03b2, + 0x02d4, + 0x02d4, + 0x03b2, + 0x0188 +}; + +int16_t W_radix4[24] = { + 0x0400, + 0x0000, + 0x0400, + 0x0000, + 0x0400, + 0x0000, + 0x03fb, + 0xff9c, + 0x03ec, + 0xff38, + 0x03d4, + 0xfed7, + 0x03ec, + 0xff38, + 0x03b2, + 0xfe78, + 0x0353, + 0xfdc7, + 0x03d4, + 0xfed7, + 0x0353, + 0xfdc7, + 0x028a, + 0xfce8 +}; + +#endif \ No newline at end of file diff --git a/sw/applications/example_fft/datagen.py b/sw/applications/example_fft/datagen.py new file mode 100644 index 000000000..03659f75a --- /dev/null +++ b/sw/applications/example_fft/datagen.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python + +## Copyright 2024 EPFL +## Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +# Author: Francesco Poluzzi +# This script generates the data.h file for the example_fft application, that contains the FFT input and the golden output. +# type " python datagen.py " in the terminal from the example_fft application folder to generate the data.h file. + +import sys +import random +import numpy as np + +# CONFIGURABLE PARAMETERS +SIZE = 16 +decimal_bits = 10 +seed = 9 + +def format_matrix(matrix: np.ndarray, name: str) -> str: + # Ensure the matrix is int16 (signed 16-bit integer) + if matrix.dtype != np.int16: + raise ValueError("Input matrix must be of dtype int16") + + num_bits = matrix.dtype.itemsize * 8 + array_ctype = "int16_t" + + # Convert each element to its 2's complement hexadecimal representation + rows = [] + for row in matrix: + row = np.atleast_1d(row) # Ensure row is array-like for iteration + # Format each element as a 16-bit signed integer in hex format + hex_values = [f"{element if element >= 0 else (1 << num_bits) + element:#06x}" for element in row] + rows.append(hex_values) + + # Format the matrix into a C-style array + matrix_contents = f"{array_ctype} {name}[{matrix.size}] = {{\n" + if len(rows) > 1: + matrix_contents += ',\n'.join([f" {', '.join(row)}" for row in rows]) + else: + matrix_contents += f" {', '.join(rows[0])}" + matrix_contents += '\n};\n\n' + + return matrix_contents + + +def generate_fft_twiddle_factors_radix2(N): + # Number of twiddle factors is N + num_twiddle_factors = N + + # Generate the angles for the twiddle factors + angles = np.linspace(0, -2 * np.pi, num_twiddle_factors, endpoint=False) + + # Compute the real and imaginary parts + real_parts = np.cos(angles) + imaginary_parts = np.sin(angles) + + # Concatenate real and imaginary parts + twiddle_factors = np.empty(2 * num_twiddle_factors) + twiddle_factors[0::2] = real_parts + twiddle_factors[1::2] = imaginary_parts + + return twiddle_factors + +import numpy as np + +def generate_fft_twiddle_factors_radix4(N): + # Ensure N is divisible by 4 for Radix-4 FFT + if N % 4 != 0: + raise ValueError("N must be a multiple of 4 for Radix-4 FFT") + + num_twiddle_groups = N // 4 + angles_k1 = np.linspace(0, -2 * np.pi / N, num_twiddle_groups, endpoint=False) + angles_k2 = 2 * angles_k1 # Corresponds to W_N^{2k} + angles_k3 = 3 * angles_k1 # Corresponds to W_N^{3k} + real_parts_k1 = np.cos(angles_k1) + imag_parts_k1 = np.sin(angles_k1) + real_parts_k2 = np.cos(angles_k2) + imag_parts_k2 = np.sin(angles_k2) + real_parts_k3 = np.cos(angles_k3) + imag_parts_k3 = np.sin(angles_k3) + twiddle_factors = np.empty(6 * num_twiddle_groups) + twiddle_factors[0::6] = real_parts_k1 + twiddle_factors[1::6] = imag_parts_k1 + twiddle_factors[2::6] = real_parts_k2 + twiddle_factors[3::6] = imag_parts_k2 + twiddle_factors[4::6] = real_parts_k3 + twiddle_factors[5::6] = imag_parts_k3 + + return twiddle_factors + + +def write_arr(f, name, arr, ctype, size): + f.write("const " + ctype + " " + name + "[2*FFT_LEN] = {\n") + + for row in arr: + for elem in row[:-1]: + f.write('%d,' % (elem)) + f.write('%d,\n' % (row[-1])) + + f.write('};\n\n') + return + +def generate_random_matrix(num_channels, length, decimal_bits): + """ + Generate a random matrix with num_channels rows and length columns. + """ + + real_part = np.random.uniform(-0.01, 0.01, (num_channels, length)) #* (2**(-decimal_bits)) + imag_part = np.random.uniform(-0.01, 0.01, (num_channels, length)) #* (2**(-decimal_bits)) + + matrix = real_part + 1j * imag_part + + return matrix + +def perform_fft(matrix): + """ + Perform FFT on each row of the matrix and return the result matrix. + """ + return np.fft.fft(matrix, axis=1) + +def convert_to_fixed_point(matrix, decimal_places=8): + """ + Convert the matrix to fixed-point format with a specified number of decimal bits. + Each element in the output matrix is of type 'int16'. + """ + # Scaling factor for conversion + scaling_factor = 1 << decimal_places # 2^decimal_bits (256 for Q1.8 format) + + # Convert to fixed-point representation + real_part = np.real(matrix) * scaling_factor + imag_part = np.imag(matrix) * scaling_factor + + # Clip values to ensure they fit within the range of int16 + real_part = np.clip(real_part, -32768, 32767) + imag_part = np.clip(imag_part, -32768, 32767) + + # Convert to int16 + real_part = real_part.astype(np.int16) + imag_part = imag_part.astype(np.int16) + + # Combine real and imaginary parts into a single matrix + fixed_point_matrix = np.empty((matrix.shape[0], matrix.shape[1] * 2), dtype=np.int16) + fixed_point_matrix[:, 0::2] = real_part + fixed_point_matrix[:, 1::2] = imag_part + + return fixed_point_matrix + +def convert_to_fixed_point_twiddles(array, decimal_bits=8): + # The input 'array' is a 1D array with interleaved real and imaginary parts + fixed_point_array = np.empty_like(array, dtype=np.int16) + + # Scale and convert to fixed-point for real and imaginary parts + scale_factor = 1 << decimal_bits + fixed_point_array = np.round(array * scale_factor).astype(np.int16) + + return fixed_point_array + +################################################################################ +f = open('data.h', 'w') +f.write('// This file is automatically generated\n// Type " python datagen.py " in the terminal to generate the data.h file. Configuration parameters can be changed in the datagen.py file.\n') +f.write('\n#ifndef DATA_H_\n') +f.write('#define DATA_H_\n\n') + +np.random.seed(seed) + +# Generate random input +input = generate_random_matrix(1, SIZE, decimal_bits) + +# Perform FFT +fft_output = perform_fft(input) + +# Comput twiddles +twiddles_radix2 = generate_fft_twiddle_factors_radix2(SIZE) +twiddles_radix4 = generate_fft_twiddle_factors_radix4(SIZE) + +# Convert FFT result to fixed-point format +R = convert_to_fixed_point(fft_output, decimal_bits) +A = convert_to_fixed_point(input, decimal_bits) +W_radix2 = convert_to_fixed_point_twiddles(twiddles_radix2, decimal_bits) +W_radix4 = convert_to_fixed_point_twiddles(twiddles_radix4, decimal_bits) + +print("Input:") +print(input) +print("A (fixed point) :") +print(A*2**-decimal_bits) +print("FFT output:") +print(fft_output) +print("R (fixed point):") +print(R*2**-decimal_bits) +print("Twiddles Radix-2 (fixed point):") +print([hex(x) for x in W_radix2.flatten()]) # Print in hexadecimal format +print("Twiddles Radix-4 (fixed point):") +print([hex(x) for x in W_radix4.flatten()]) # Print in hexadecimal format + +f.write('#define FFT_LEN %d\n' % SIZE) +f.write('#define DECIMAL_BITS %d\n\n' % decimal_bits) + +f.write(format_matrix( A, 'A')) +f.write(format_matrix( R, 'R')) +f.write(format_matrix( W_radix2, 'W_radix2')) +f.write(format_matrix( W_radix4, 'W_radix4')) + +f.write('#endif') \ No newline at end of file diff --git a/sw/applications/example_fft/fft.h b/sw/applications/example_fft/fft.h new file mode 100644 index 000000000..6d079bda9 --- /dev/null +++ b/sw/applications/example_fft/fft.h @@ -0,0 +1,331 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// File: fft.h +// Author: Francesco Poluzzi +// Date: 04/09/2024 +// Description: FFT-related functions for X-Heep + +#ifndef FFT_H +#define FFT_H + +#include +#include +#include +#include "data.h" + +#define PI 3.14159265358979323846 + +uint8_t log_floor(uint16_t N, uint8_t base) { + if (N == 0) { + return 0; // Log of 0 is undefined, return 0 for safety + } + uint8_t log_val = 0; + + if (base == 2) { + while (N >>= 1) { // Right shift by 1 bit (equivalent to dividing by 2) + log_val++; + } + } else if (base == 4) { + while (N >= 4) { // Right shift by 2 bits (equivalent to dividing by 4) + N >>= 2; + log_val++; + } + } + return log_val; +} + +bool is_power_of(uint16_t N, uint8_t base) { + if (N == 0) { + return false; + } + if (base == 2) { + return (N & (N - 1)) == 0; + } else if (base == 4) { + return (N & (N - 1)) == 0 && (N & 0x5555) != 0; + } + return false; +} + +// Function to perform bit reversal based on radix (base) +uint16_t bit_reversal(uint16_t n, uint8_t num_bits, uint8_t base) { + uint16_t result = 0; + + // Calculate the number of bits to shift based on the base + uint8_t shiftamt = (base == 2) ? 1 : 2; // Shift by 1 for Radix-2, by 2 for Radix-4 + uint16_t mask = base - 1; // Mask is 1 for Radix-2, 3 for Radix-4 + + // Perform the bit reversal by repeatedly shifting and applying the mask + for (uint8_t i = 0; i < num_bits; i++) { + result <<= shiftamt; // Shift result to the left by `shiftamt` bits + result |= n & mask; // Add the masked bits from `n` to `result` + n >>= shiftamt; // Shift `n` to the right by `shiftamt` bits + } + + return result; +} + +// Function to generate bit-reversed sequence +void get_bit_reversed_seq(uint16_t *seq, uint16_t N, uint8_t num_bits, uint8_t base) { + for (uint16_t i = 0; i < N; i++) { + seq[i] = bit_reversal(i, num_bits, base); + } +} + +void compute_twiddle_factors_radix2(int16_t* twiddle_factors, uint32_t N, uint8_t decimal_bits) { + for (uint16_t j = 0; j < N / 2; j++) { + float angle = -2.0 * PI * j / N; + twiddle_factors[2 * j] = (int16_t)(cos(angle) * (1 << decimal_bits)); // Real part in Q1.8 format + twiddle_factors[2 * j + 1] = (int16_t)(sin(angle) * (1 << decimal_bits)); // Imaginary part in Q1.8 format + } +} + +void compute_twiddle_factors_radix4(int16_t *twiddle_factors, uint16_t N, int16_t decimal_bits) { + // Twiddle factor for radix-4 FFT + // N is the FFT size, which must be a power of 4 + // twiddle_factors is the output array where real and imaginary parts are interleaved + uint16_t twiddle_count = N / 4; // Radix-4 FFT needs twiddles for N/4 size + + for (uint16_t k = 0; k < twiddle_count; k++) { + // Compute the real and imaginary parts of W_N^k = e^(-2*pi*i*k/N) + double angle = -2.0 * PI * k / N; + double real_part = cos(angle); + double imag_part = sin(angle); + + // Convert to fixed-point with the specified number of decimal bits + int16_t real_fixed = (int16_t)(real_part * (1 << decimal_bits)); + int16_t imag_fixed = (int16_t)(imag_part * (1 << decimal_bits)); + + // Store the values in the interleaved array + twiddle_factors[2 * k] = real_fixed; // Real part + twiddle_factors[2 * k + 1] = imag_fixed; // Imaginary part + } +} + +void __attribute__((noinline, aligned(4))) iterative_FFT_radix2(int16_t *x, int16_t *X, uint16_t N, int16_t *twiddle_factors, int16_t decimal_bits, + int16_t *w_real_fixed, int16_t *w_imag_fixed, int16_t *xrev, int16_t *bit_reversed_seq) { + + // Perform bit reversal on the input array + uint8_t num_bits = log_floor(N, 2); + + for (uint16_t i = 0; i < N; i++) { + // Access real and imaginary parts independently + xrev[2 * i] = x[2 * bit_reversed_seq[i]]; // Real part + xrev[2 * i + 1] = x[2 * bit_reversed_seq[i] + 1]; // Imaginary part + } + + // Stage 0 unrolled: simple additions and subtractions + for (uint16_t i = 0; i < N; i += 2) { + // Access real and imaginary parts for two points independently + int16_t a_real = xrev[2 * i]; + int16_t a_imag = xrev[2 * i + 1]; + int16_t b_real = xrev[2 * (i + 1)]; + int16_t b_imag = xrev[2 * (i + 1) + 1]; + + // No twiddle factor for the first stage (equivalent to multiplying by 1 + 0j) + xrev[2 * i] = a_real + b_real; + xrev[2 * i + 1] = a_imag + b_imag; + xrev[2 * (i + 1)] = a_real - b_real; + xrev[2 * (i + 1) + 1] = a_imag - b_imag; + } + + // FFT processing for remaining stages + uint16_t stage_count = num_bits; + uint16_t twiddle_step = N/2; + + for (uint16_t stage = 1, step = 4; stage < stage_count; stage++, step *= 2) { + uint16_t halfstep = step / 2; + twiddle_step /= 2; + + for (uint16_t j = 0; j < halfstep; j++) { + // Access precomputed twiddle factors from the interleaved array + w_real_fixed[j] = twiddle_factors[2 * (j * twiddle_step)]; + w_imag_fixed[j] = twiddle_factors[2 * (j * twiddle_step) + 1]; + } + + // Modify the outer loop to increment from 0 to N, increasing by step + for (uint16_t i = 0; i < N; i += step) { + // Calculate the starting index for this iteration + uint16_t idx = i; + for (uint16_t j = 0; j < halfstep; j++) { + // Butterfly operations - Access real and imag parts independently + int16_t a_real = xrev[2 * (idx + j)]; + int16_t a_imag = xrev[2 * (idx + j) + 1]; + int16_t b_real = xrev[2 * (idx + j + halfstep)]; + int16_t b_imag = xrev[2 * (idx + j + halfstep) + 1]; + + // Complex multiplication (b * w) using 32-bit intermediate values + int32_t temp_real = ((int32_t)b_real * w_real_fixed[j] - (int32_t)b_imag * w_imag_fixed[j]) >> decimal_bits; + int32_t temp_imag = ((int32_t)b_real * w_imag_fixed[j] + (int32_t)b_imag * w_real_fixed[j]) >> decimal_bits; + + // Store the results directly in the array, independently for real and imag parts + xrev[2 * (idx + j)] = a_real + temp_real; + xrev[2 * (idx + j) + 1] = a_imag + temp_imag; + xrev[2 * (idx + j + halfstep)] = a_real - temp_real; + xrev[2 * (idx + j + halfstep) + 1] = a_imag - temp_imag; + } + } + } + + // Copy the result to output array X + for (uint16_t i = 0; i < 2 * N; i++) { + X[i] = xrev[i]; + } +} + +// Radix-4 FFT +void __attribute__((noinline, aligned(4))) iterative_FFT_radix4(int16_t *x, int16_t *X, uint16_t N, int16_t *twiddle_factors, int16_t *w_real_fixed, int16_t * w_imag_fixed, int16_t *xrev, int8_t decimal_bits, int16_t * bit_reversed_seq) { + + uint16_t stage_count = log_floor(N, 4); // Radix-4 halves the number of stages + uint16_t twiddle_step = N; + + for (uint16_t i = 0; i < N; i++) { + xrev[2 * i] = x[2 * bit_reversed_seq[i]]; // Real part + xrev[2 * i + 1] = x[2 * bit_reversed_seq[i] + 1]; // Imaginary part + } + + // Unroll the first stage: radix-4 butterfly (4 points per butterfly) + uint16_t quarterstep = 1; + for (uint16_t i = 0; i < N; i += 4) { + uint16_t idx = i; + // Load the real and imaginary parts for the 4 input points + int16_t a_real = xrev[2 * idx]; + int16_t a_imag = xrev[2 * idx + 1]; + int16_t b_real = xrev[2 * (idx + quarterstep)]; + int16_t b_imag = xrev[2 * (idx + quarterstep) + 1]; + int16_t c_real = xrev[2 * (idx + 2 * quarterstep)]; + int16_t c_imag = xrev[2 * (idx + 2 * quarterstep) + 1]; + int16_t d_real = xrev[2 * (idx + 3 * quarterstep)]; + int16_t d_imag = xrev[2 * (idx + 3 * quarterstep) + 1]; + + // Radix-4 butterfly calculations (without twiddle factors) + int16_t t0_real = a_real + c_real; + int16_t t0_imag = a_imag + c_imag; + int16_t t1_real = b_real + d_real; + int16_t t1_imag = b_imag + d_imag; + + int16_t t2_real = a_real - c_real; + int16_t t2_imag = a_imag - c_imag; + int16_t t3_real = b_real - d_real; + int16_t t3_imag = b_imag - d_imag; + + // Output without twiddle factors + xrev[2 * idx] = t0_real + t1_real; // Result 1 real part + xrev[2 * idx + 1] = t0_imag + t1_imag; // Result 1 imag part + + xrev[2 * (idx + quarterstep)] = t2_real - t3_imag; // Result 2 real part + xrev[2 * (idx + quarterstep) + 1] = t2_imag + t3_real; // Result 2 imag part + + xrev[2 * (idx + 2 * quarterstep)] = t0_real - t1_real; // Result 3 real part + xrev[2 * (idx + 2 * quarterstep) + 1] = t0_imag - t1_imag; // Result 3 imag part + + xrev[2 * (idx + 3 * quarterstep)] = t2_real + t3_imag; // Result 4 real part + xrev[2 * (idx + 3 * quarterstep) + 1] = t2_imag - t3_real; // Result 4 imag part + } + + // Perform the remaining stages + for (uint16_t stage = 1, step = 16; stage < stage_count; stage++, step *= 4) { + uint16_t quarterstep = step / 4; + twiddle_step /= 4; + + // Precompute the twiddle factors for this stage + for (uint16_t j = 0; j < quarterstep; j++) { + w_real_fixed[j] = twiddle_factors[2 * (j * twiddle_step)]; + w_imag_fixed[j] = twiddle_factors[2 * (j * twiddle_step) + 1]; + } + + // Loop over groups of 4 points per butterfly + for (uint16_t i = 0; i < N; i += step) { + uint16_t idx = i; + + // Perform load, butterfly, and store operations in a single loop + for (uint16_t j = 0; j < quarterstep; j++) { + // Load the data for the 4 points + int16_t a_real = xrev[2 * (idx + j)]; + int16_t a_imag = xrev[2 * (idx + j) + 1]; + int16_t b_real = xrev[2 * (idx + j + quarterstep)]; + int16_t b_imag = xrev[2 * (idx + j + quarterstep) + 1]; + int16_t c_real = xrev[2 * (idx + j + 2 * quarterstep)]; + int16_t c_imag = xrev[2 * (idx + j + 2 * quarterstep) + 1]; + int16_t d_real = xrev[2 * (idx + j + 3 * quarterstep)]; + int16_t d_imag = xrev[2 * (idx + j + 3 * quarterstep) + 1]; + + // Apply twiddle factors to b + int16_t tw_b_real = ((int32_t)b_real * w_real_fixed[j] - (int32_t)b_imag * w_imag_fixed[j]) >> decimal_bits; + int16_t tw_b_imag = ((int32_t)b_real * w_imag_fixed[j] + (int32_t)b_imag * w_real_fixed[j]) >> decimal_bits; + + int16_t res1_real = a_real + tw_b_real; // a + Tw(b) + int16_t res1_imag = a_imag + tw_b_imag; + + int16_t res2_real = a_real - tw_b_real; // a - Tw(b) + int16_t res2_imag = a_imag - tw_b_imag; + + // Apply twiddle factors to c + int16_t tw_c_real = ((int32_t)c_real * w_real_fixed[2 * j] - (int32_t)c_imag * w_imag_fixed[2 * j]) >> decimal_bits; + int16_t tw_c_imag = ((int32_t)c_real * w_imag_fixed[2 * j] + (int32_t)c_imag * w_real_fixed[2 * j]) >> decimal_bits; + + int16_t res3_real = a_real + tw_c_real; // a + Tw(c) + int16_t res3_imag = a_imag + tw_c_imag; + + int16_t res4_real = a_real - tw_c_real; // a - Tw(c) + int16_t res4_imag = a_imag - tw_c_imag; + + // Apply twiddle factors to d + int16_t tw_d_real = ((int32_t)d_real * w_real_fixed[3 * j] - (int32_t)d_imag * w_imag_fixed[3 * j]) >> decimal_bits; + int16_t tw_d_imag = ((int32_t)d_real * w_imag_fixed[3 * j] + (int32_t)d_imag * w_real_fixed[3 * j]) >> decimal_bits; + + // Store the results + xrev[2 * (idx + j)] = res1_real; + xrev[2 * (idx + j) + 1] = res1_imag; + xrev[2 * (idx + j + quarterstep)] = res2_real; + xrev[2 * (idx + j + quarterstep) + 1] = res2_imag; + xrev[2 * (idx + j + 2 * quarterstep)] = res3_real; + xrev[2 * (idx + j + 2 * quarterstep) + 1] = res3_imag; + xrev[2 * (idx + j + 3 * quarterstep)] = res4_real; + xrev[2 * (idx + j + 3 * quarterstep) + 1] = res4_imag; + } + } + } + + // Copy the result to output array X + for (uint16_t i = 0; i < 2 * N; i++) { + X[i] = xrev[i]; + } +} + +#define FIXED_POINT_SCALE (1 << DECIMAL_BITS) + +// Helper function to print the FFT result +void print_complex_array(int16_t *array, uint16_t N) { + for (uint16_t i = 0; i < N; i++) { + // Convert fixed-point to decimal by dividing by FIXED_POINT_SCALE + int16_t real_part = array[2 * i]; + int16_t imag_part = array[2 * i + 1]; + + // Calculate integer and fractional parts + int16_t real_int = real_part / FIXED_POINT_SCALE; + int16_t imag_int = imag_part / FIXED_POINT_SCALE; + + // Fractional part needs to be scaled and displayed with leading zeros if necessary + int16_t real_frac = (real_part % FIXED_POINT_SCALE) * 1000 / FIXED_POINT_SCALE; + int16_t imag_frac = (imag_part % FIXED_POINT_SCALE) * 1000 / FIXED_POINT_SCALE; + + // Adjust the fractional part for negative values, and ensure fractional part is positive + if (real_part < 0 && real_frac > 0) { + real_frac = 1000 - real_frac; + } + if (imag_part < 0 && imag_frac > 0) { + imag_frac = 1000 - imag_frac; + } + + // Print the complex number in the correct format + // Use separate handling for real and imaginary signs + printf("X[%d] = %c%d.%03d %c %d.%03dj\n", + i, + (real_part < 0) ? '-' : '+', abs(real_int), abs(real_frac), // Real part + (imag_part < 0) ? '-' : '+', abs(imag_int), abs(imag_frac)); // Imaginary part + } +} + +#endif // FFT_ITERATIVE_RADIX_2_H \ No newline at end of file diff --git a/sw/applications/example_fft/main.c b/sw/applications/example_fft/main.c new file mode 100644 index 000000000..2f5ebb2ab --- /dev/null +++ b/sw/applications/example_fft/main.c @@ -0,0 +1,112 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// File: main.c +// Author: Francesco Poluzzi +// Date: 04/09/2024 +// Description: FFT example for X-Heep +// Functions for the FFT computation are in fft.h +// Parameters as FFT_LEN and DECIMAL_BITS can be changed in data.h. To also automate the input and +// golden model generation with different parameters, the script datagen.py can be used. + +#include +#include +#include + +#include "data.h" +#include "x-heep.h" +#include "timer_sdk.h" +#include "fft.h" + +/* By default, PRINTs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 1 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +// Tolerance for the comparison of the results in fixed point (needs to be adjusted based on the number of decimal bits). +// The error is due to shifts and roundings in the fixed-point computation. +#define TOLERANCE 0x000000f + +int16_t __attribute__((aligned(4))) R_radix_2[2 * FFT_LEN] ; +int16_t __attribute__((aligned(4))) R_radix_4[2 * FFT_LEN] ; +int16_t __attribute__((aligned(4))) twiddle_factors_radix2[FFT_LEN]; +int16_t __attribute__((aligned(4))) twiddle_factors_radix4[FFT_LEN]; +int16_t __attribute__((aligned(4))) w_real_fixed[FFT_LEN / 2]; +int16_t __attribute__((aligned(4))) w_imag_fixed[FFT_LEN / 2]; +int32_t __attribute__((aligned(4))) xrev_32[ FFT_LEN]; +int16_t __attribute__((aligned(4))) xrev[2* FFT_LEN]; +int16_t __attribute__((aligned(4))) bit_reversed_seq_radix2[FFT_LEN]; +int16_t __attribute__((aligned(4))) bit_reversed_seq_radix4[FFT_LEN]; + +int main(void) +{ + uint32_t radix2_cycles, radix4_cycles; + + if(!is_power_of(FFT_LEN, 2)){ + PRINTF("FFT_LEN must be a power of 2, FFT radix 2 cannot be performed.\n"); + return EXIT_FAILURE; + } + + PRINTF("Starting radix 2 FFT\n"); + + // precompute twiddle factors for radix-2 FFT + compute_twiddle_factors_radix2(twiddle_factors_radix2, FFT_LEN, DECIMAL_BITS); + + // precompute bit reversed sequence + get_bit_reversed_seq(bit_reversed_seq_radix2, FFT_LEN, log_floor(FFT_LEN, 2), 2); + + timer_cycles_init(); + timer_start(); + + iterative_FFT_radix2(A, R_radix_2, FFT_LEN, twiddle_factors_radix2, DECIMAL_BITS, w_real_fixed, w_imag_fixed, xrev, bit_reversed_seq_radix2); + + radix2_cycles = timer_stop(); + + for(int i = 0; i < 2 * FFT_LEN; i++){ + if(abs(R_radix_2[i] - R[i] > TOLERANCE)){ + PRINTF("Error: R_gold[%d] = %x, R_radix_2[%d] = %x\n", i, R[i], i, R_radix_2[i]); + return 1; + } + } + + PRINTF("Radix-2 FFT took %d cycles\n", radix2_cycles); + + if(!is_power_of(FFT_LEN, 4)){ + PRINTF("FFT_LEN must be a power of 4, FFT radix 4 cannot be performed.\n"); + return EXIT_FAILURE; + } + + PRINTF("Starting radix 4 FFT\n"); + + // precompute twiddle factors for radix-4 FFT + compute_twiddle_factors_radix4(twiddle_factors_radix4, FFT_LEN, DECIMAL_BITS); + + // precompute bit reversed sequence + get_bit_reversed_seq(bit_reversed_seq_radix4, FFT_LEN, log_floor(FFT_LEN, 4), 4); + + timer_cycles_init(); + timer_start(); + + iterative_FFT_radix4(A, R_radix_4, FFT_LEN, twiddle_factors_radix4, w_real_fixed, w_imag_fixed, xrev, DECIMAL_BITS, bit_reversed_seq_radix4); + + radix4_cycles = timer_stop(); + + for(int i = 0; i < 2 * FFT_LEN; i++){ + if(abs(R_radix_2[i] - R[i] > TOLERANCE)){ + PRINTF("Error: R_gold[%d] = %x, R_radix_4[%d] = %x\n", i, R[i], i, R_radix_4[i]); + return 1; + } + } + + PRINTF("Radix-4 FFT took %d cycles\n", radix4_cycles); + + return EXIT_SUCCESS; +} diff --git a/sw/applications/example_timer_sdk/main.c b/sw/applications/example_timer_sdk/main.c index 8e6c6c2c5..cb5bb5ae6 100644 --- a/sw/applications/example_timer_sdk/main.c +++ b/sw/applications/example_timer_sdk/main.c @@ -3,20 +3,27 @@ // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // // File: example_timer_sdk.c -// Author: Juan Sapriza -// Date: 15/07/2024 +// Author: Juan Sapriza, Francesco Poluzzi +// Date: 23/07/2024 // Description: Example application to test the Timer SDK. Will count the time to execute a few short tasks. #include #include +#include #include "core_v_mini_mcu.h" #include "timer_sdk.h" #include "x-heep.h" +#include "soc_ctrl.h" /* By default, printfs are activated for FPGA and disabled for simulation. */ #define PRINTF_IN_FPGA 1 #define PRINTF_IN_SIM 0 +/* Error tolerances for the tests. */ +#define CYCLE_TOLERANCE 2 // cycles tolerance for simple timer reads +#define INTERRUPT_TOLERANCE 70 // cycles tolerance for timer interrupt +#define TIMER_WAIT_TOLERANCE 20 // milliseconds tolerance for timer wait + #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #elif PRINTF_IN_FPGA && !TARGET_SIM @@ -25,32 +32,95 @@ #define PRINTF(...) #endif +void __attribute__((aligned(4), interrupt)) handler_irq_timer(void) { + timer_arm_stop(); + timer_irq_clear(); + return; +} + int main(){ - uint8_t i = 0; - uint32_t cpu_cycles; + uint32_t i = 0; + uint32_t timer_cycles; + uint32_t nop_cycles[4]; + + // Get current Frequency + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + uint32_t freq_hz = soc_ctrl_get_frequency(&soc_ctrl); - timer_init(); // Init the timer SDK + timer_cycles_init(); // Init the timer SDK for clock cycles timer_start(); // Start counting the time - cpu_cycles = timer_stop(); // Stop counting the time - PRINTF("0 NOPs:\t%d cc\n\r", cpu_cycles ); - - timer_start(); + nop_cycles[0] = timer_stop(); // Stop counting the time + PRINTF("0 NOPs:\t%d cc\n\r", nop_cycles[0] ); + + timer_start(); asm volatile ("nop"); - cpu_cycles = timer_stop(); - PRINTF("1 NOP:\t%d cc\n\r", cpu_cycles ); + nop_cycles[1] = timer_stop(); + PRINTF("1 NOP:\t%d cc\n\r", nop_cycles[1] ); timer_start(); asm volatile ("nop"); asm volatile ("nop"); - cpu_cycles = timer_stop(); - PRINTF("2 NOPs:\t%d cc\n\r", cpu_cycles ); + nop_cycles[2] = timer_stop(); + PRINTF("2 NOPs:\t%d cc\n\r", nop_cycles[2] ); timer_start(); asm volatile ("nop"); asm volatile ("nop"); asm volatile ("nop"); - cpu_cycles = timer_stop(); - PRINTF("3 NOPs:\t%d cc\n\r", cpu_cycles ); + nop_cycles[3] = timer_stop(); + PRINTF("3 NOPs:\t%d cc\n\r", nop_cycles[3] ); + + if( abs(nop_cycles[1] - nop_cycles[0])>CYCLE_TOLERANCE || abs(nop_cycles[2] - nop_cycles[1])>CYCLE_TOLERANCE || abs(nop_cycles[3] - nop_cycles[2])>CYCLE_TOLERANCE){ + PRINTF("Clock count failed\n\r"); + return EXIT_FAILURE; + } + + enable_timer_interrupt(); // Enable the timer machine-level interrupt + + timer_cycles_init(); + timer_irq_enable(); + timer_arm_start(1000); + asm volatile ("wfi"); // Wait for interrupt + timer_cycles = timer_stop(); + if(abs(timer_cycles-1000) < INTERRUPT_TOLERANCE){ + PRINTF("Timer threshold interrupt working\n" ); + } else { + PRINTF("Timer threshold interrupt failed\n\r"); + return EXIT_FAILURE; + } + timer_cycles_init(); // Init the timer SDK for microseconds + timer_start(); + for(i = 0; i < 1000; i++){ + asm volatile ("nop"); + } + timer_cycles = timer_stop(); + PRINTF("Microseconds for 1000 NOPs:\t%d μs\n\r", (uint32_t)get_time_from_cycles(timer_cycles) ); + + #ifdef TARGET_IS_FPGA + PRINTF("Wait 5 second\n\r"); + timer_wait_us(5000000); // Wait for 5 seconds + timer_cycles = timer_stop(); + PRINTF("Done\n\r"); + + if(abs(timer_cycles-(5*freq_hz)) > TIMER_WAIT_TOLERANCE){ + PRINTF("Timer wait failed\n\r"); + return EXIT_FAILURE; + } + #endif + #ifdef TARGET_SIM // Reduced time for simulation for faster testing + PRINTF("Wait 0.001 second\n\r"); + timer_wait_us(1000); // Wait for 1 millisecond + timer_cycles = timer_stop(); + PRINTF("Done\n\r"); + + if(abs(timer_cycles-(0.001*freq_hz)) > TIMER_WAIT_TOLERANCE){ + PRINTF("Timer wait failed\n\r"); + return EXIT_FAILURE; + } + #endif + + PRINTF("All tests passed\n\r"); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/sw/device/lib/base/base.h b/sw/device/lib/base/base.h index e0582e74a..0d56792bd 100644 --- a/sw/device/lib/base/base.h +++ b/sw/device/lib/base/base.h @@ -106,7 +106,7 @@ typedef enum dif_toggle { // * @param val A potential dif_toggle_t value. // * @return Bool indicating validity of toggle value. // */ -// inline bool dif_is_valid_toggle(dif_toggle_t val) { +// static inline bool dif_is_valid_toggle(dif_toggle_t val) { // switch (val) { // case kDifToggleEnabled: // return true; @@ -123,7 +123,7 @@ typedef enum dif_toggle { // * @param val A dif_toggle_t value. // * @return Corresponding bool value. // */ -// inline bool dif_toggle_to_bool(dif_toggle_t val) { +// static inline bool dif_toggle_to_bool(dif_toggle_t val) { // switch (val) { // case kDifToggleEnabled: // return true; @@ -140,7 +140,7 @@ typedef enum dif_toggle { // * @param val A bool value. // * @return Corresponding dif_toggle_t value. // */ -// inline dif_toggle_t dif_bool_to_toggle(bool val) { +// static inline dif_toggle_t dif_bool_to_toggle(bool val) { // return val ? kDifToggleEnabled : kDifToggleDisabled; // } @@ -150,7 +150,7 @@ typedef enum dif_toggle { // * @param val A multi-bit bool value. // * @return Corresponding dif_toggle_t value. // */ -// inline dif_toggle_t dif_multi_bit_bool_to_toggle(multi_bit_bool_t val) { +// static inline dif_toggle_t dif_multi_bit_bool_to_toggle(multi_bit_bool_t val) { // switch (val) { // case kMultiBitBool4True: // case kMultiBitBool8True: @@ -169,7 +169,7 @@ typedef enum dif_toggle { // * @return Corresponding `multi_bit_bool_t` value. Invalid values resolve to // * "false". // */ -// inline multi_bit_bool_t dif_toggle_to_multi_bit_bool4(dif_toggle_t val) { +// static inline multi_bit_bool_t dif_toggle_to_multi_bit_bool4(dif_toggle_t val) { // if (val == kDifToggleEnabled) { // return kMultiBitBool4True; // } else { @@ -184,7 +184,7 @@ typedef enum dif_toggle { // * @return Corresponding `multi_bit_bool_t` value. Invalid values resolve to // * "false". // */ -// inline multi_bit_bool_t dif_toggle_to_multi_bit_bool8(dif_toggle_t val) { +// static inline multi_bit_bool_t dif_toggle_to_multi_bit_bool8(dif_toggle_t val) { // if (val == kDifToggleEnabled) { // return kMultiBitBool8True; // } else { @@ -199,7 +199,7 @@ typedef enum dif_toggle { // * @return Corresponding `multi_bit_bool_t` value. Invalid values resolve to // * "false". // */ -// inline multi_bit_bool_t dif_toggle_to_multi_bit_bool12(dif_toggle_t val) { +// static inline multi_bit_bool_t dif_toggle_to_multi_bit_bool12(dif_toggle_t val) { // if (val == kDifToggleEnabled) { // return kMultiBitBool12True; // } else { @@ -214,7 +214,7 @@ typedef enum dif_toggle { // * @return Corresponding `multi_bit_bool_t` value. Invalid values resolve to // * "false". // */ -// inline multi_bit_bool_t dif_toggle_to_multi_bit_bool16(dif_toggle_t val) { +// static inline multi_bit_bool_t dif_toggle_to_multi_bit_bool16(dif_toggle_t val) { // if (val == kDifToggleEnabled) { // return kMultiBitBool16True; // } else { diff --git a/sw/device/lib/base/bitfield.c b/sw/device/lib/base/bitfield.c deleted file mode 100644 index b2562c0d5..000000000 --- a/sw/device/lib/base/bitfield.c +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright lowRISC contributors. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 - -#include "bitfield.h" - -// `extern` declarations to give the inline functions in the -// corresponding header a link location. - -extern uint32_t bitfield_field32_read(uint32_t bitfield, - bitfield_field32_t field); -extern uint32_t bitfield_field32_write(uint32_t bitfield, - bitfield_field32_t field, - uint32_t value); - -extern bitfield_field32_t bitfield_bit32_to_field32( - bitfield_bit32_index_t bit_index); - -extern bool bitfield_bit32_read(uint32_t bitfield, - bitfield_bit32_index_t bit_index); -extern uint32_t bitfield_bit32_write(uint32_t bitfield, - bitfield_bit32_index_t bit_index, - bool value); - -extern int32_t bitfield_find_first_set32(int32_t bitfield); -extern int32_t bitfield_count_leading_zeroes32(uint32_t bitfield); -extern int32_t bitfield_count_trailing_zeroes32(uint32_t bitfield); -extern int32_t bitfield_popcount32(uint32_t bitfield); -extern int32_t bitfield_parity32(uint32_t bitfield); -extern uint32_t bitfield_byteswap32(uint32_t bitfield); diff --git a/sw/device/lib/base/bitfield.h b/sw/device/lib/base/bitfield.h index e2b81b838..19377e35f 100644 --- a/sw/device/lib/base/bitfield.h +++ b/sw/device/lib/base/bitfield.h @@ -74,7 +74,7 @@ typedef struct bitfield_field32 { * @return Zero-extended `field` from `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_field32_read(uint32_t bitfield, +static inline uint32_t bitfield_field32_read(uint32_t bitfield, bitfield_field32_t field) { return (bitfield >> field.index) & field.mask; } @@ -92,7 +92,7 @@ inline uint32_t bitfield_field32_read(uint32_t bitfield, * @return `bitfield` with `field` set to `value`. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_field32_write(uint32_t bitfield, +static inline uint32_t bitfield_field32_write(uint32_t bitfield, bitfield_field32_t field, uint32_t value) { bitfield &= ~(field.mask << field.index); @@ -123,7 +123,7 @@ typedef uint32_t bitfield_bit32_index_t; * @return A 1-bit field that corresponds to `bit_index`. */ BITFIELD_WARN_UNUSED_RESULT -inline bitfield_field32_t bitfield_bit32_to_field32( +static inline bitfield_field32_t bitfield_bit32_to_field32( bitfield_bit32_index_t bit_index) { return (bitfield_field32_t){ .mask = 0x1, .index = bit_index, @@ -138,7 +138,7 @@ inline bitfield_field32_t bitfield_bit32_to_field32( * @return `true` if the bit was one, `false` otherwise. */ BITFIELD_WARN_UNUSED_RESULT -inline bool bitfield_bit32_read(uint32_t bitfield, +static inline bool bitfield_bit32_read(uint32_t bitfield, bitfield_bit32_index_t bit_index) { return bitfield_field32_read(bitfield, bitfield_bit32_to_field32(bit_index)) == 0x1u; @@ -153,7 +153,7 @@ inline bool bitfield_bit32_read(uint32_t bitfield, * @return `bitfield` with the `bit_index`th bit set to `value`. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_bit32_write(uint32_t bitfield, +static inline uint32_t bitfield_bit32_write(uint32_t bitfield, bitfield_bit32_index_t bit_index, bool value) { return bitfield_field32_write(bitfield, bitfield_bit32_to_field32(bit_index), @@ -174,7 +174,7 @@ inline uint32_t bitfield_bit32_write(uint32_t bitfield, * @return Zero-extended `field` from `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_read(uint32_t bitfield, +static inline uint32_t bitfield_read(uint32_t bitfield, uint32_t mask, uint32_t index) { @@ -195,7 +195,7 @@ inline uint32_t bitfield_read(uint32_t bitfield, * @return `bitfield` with `field` set to `value`. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_write(uint32_t bitfield, +static inline uint32_t bitfield_write(uint32_t bitfield, uint32_t mask, uint32_t index, uint32_t value) @@ -223,7 +223,7 @@ inline uint32_t bitfield_write(uint32_t bitfield, * @return One plus the index of the least-significant 1-bit of `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline int32_t bitfield_find_first_set32(int32_t bitfield) { +static inline int32_t bitfield_find_first_set32(int32_t bitfield) { return __builtin_ffs(bitfield); } @@ -247,7 +247,7 @@ inline int32_t bitfield_find_first_set32(int32_t bitfield) { * @return The number of leading 0-bits in `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline int32_t bitfield_count_leading_zeroes32(uint32_t bitfield) { +static inline int32_t bitfield_count_leading_zeroes32(uint32_t bitfield) { return (bitfield != 0) ? __builtin_clz(bitfield) : 32; } @@ -271,7 +271,7 @@ inline int32_t bitfield_count_leading_zeroes32(uint32_t bitfield) { * @return The number of trailing 0-bits in `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline int32_t bitfield_count_trailing_zeroes32(uint32_t bitfield) { +static inline int32_t bitfield_count_trailing_zeroes32(uint32_t bitfield) { return (bitfield != 0) ? __builtin_ctz(bitfield) : 32; } @@ -293,7 +293,7 @@ inline int32_t bitfield_count_trailing_zeroes32(uint32_t bitfield) { * @return The number of 1-bits in `bitfield`. */ BITFIELD_WARN_UNUSED_RESULT -inline int32_t bitfield_popcount32(uint32_t bitfield) { +static inline int32_t bitfield_popcount32(uint32_t bitfield) { return __builtin_popcount(bitfield); } @@ -315,7 +315,7 @@ inline int32_t bitfield_popcount32(uint32_t bitfield) { * @return The number of 1-bits in `bitfield`, modulo 2. */ BITFIELD_WARN_UNUSED_RESULT -inline int32_t bitfield_parity32(uint32_t bitfield) { +static inline int32_t bitfield_parity32(uint32_t bitfield) { return __builtin_parity(bitfield); } @@ -338,7 +338,7 @@ inline int32_t bitfield_parity32(uint32_t bitfield) { * @return `bitfield` with the order of bytes reversed. */ BITFIELD_WARN_UNUSED_RESULT -inline uint32_t bitfield_byteswap32(uint32_t bitfield) { +static inline uint32_t bitfield_byteswap32(uint32_t bitfield) { return __builtin_bswap32(bitfield); } diff --git a/sw/device/lib/base/memory.c b/sw/device/lib/base/memory.c index e0234bf1b..a534074e4 100644 --- a/sw/device/lib/base/memory.c +++ b/sw/device/lib/base/memory.c @@ -8,9 +8,6 @@ extern "C" { #include "memory.h" -extern uint32_t read_32(const void *); -extern void write_32(uint32_t, void *); - // Some symbols below are only defined for device builds. For host builds, we // their implementations will be provided by the host's libc implementation. // diff --git a/sw/device/lib/base/memory.h b/sw/device/lib/base/memory.h index 3ff64ff76..7f91de70e 100644 --- a/sw/device/lib/base/memory.h +++ b/sw/device/lib/base/memory.h @@ -55,7 +55,7 @@ extern "C" { * @param ptr a word-aligned pointer pointed to at least four bytes of memory. * @return the word `ptr` points to. */ -inline uint32_t read_32(const void *ptr) { +static inline uint32_t read_32(const void *ptr) { // Both GCC and Clang optimize the code below into a single word-load on most // platforms. It is necessary and sufficient to indicate to the compiler that // the pointer points to four bytes of four-byte-aligned memory. @@ -86,7 +86,7 @@ inline uint32_t read_32(const void *ptr) { * @param value the value to store. * @param ptr a word-aligned pointer pointed to at least four bytes of memory. */ -inline void write_32(uint32_t value, void *ptr) { +static inline void write_32(uint32_t value, void *ptr) { // Both GCC and Clang optimize the code below into a single word-store on most // platforms. See the comment in `read_32()` for more implementation-private // information. diff --git a/sw/device/lib/base/mmio.c b/sw/device/lib/base/mmio.c index dff7f78aa..be75927d9 100644 --- a/sw/device/lib/base/mmio.c +++ b/sw/device/lib/base/mmio.c @@ -125,45 +125,6 @@ void mmio_region_memcpy_to_mmio32(mmio_region_t base, uint32_t offset, mmio_region_memcpy32(base, offset, (void *)src, len, false); } -// `extern` declarations to give the inline functions in the -// corresponding header a link location. -extern uint8_t mmio_region_read8(mmio_region_t base, ptrdiff_t offset); -extern uint32_t mmio_region_read32(mmio_region_t base, ptrdiff_t offset); -extern void mmio_region_write8(mmio_region_t base, ptrdiff_t offset, - uint8_t value); -extern void mmio_region_write32(mmio_region_t base, ptrdiff_t offset, - uint32_t value); -extern uint32_t mmio_region_read_mask32(mmio_region_t base, ptrdiff_t offset, - uint32_t mask, uint32_t mask_index); -extern bool mmio_region_get_bit32(mmio_region_t base, ptrdiff_t offset, - uint32_t bit_index); -extern void mmio_region_nonatomic_clear_mask32(mmio_region_t base, - ptrdiff_t offset, uint32_t mask, - uint32_t mask_index); -extern void mmio_region_nonatomic_set_mask32(mmio_region_t base, - ptrdiff_t offset, uint32_t mask, - uint32_t mask_index); -extern void mmio_region_write_only_set_mask32(mmio_region_t base, - ptrdiff_t offset, uint32_t mask, - uint32_t mask_index); -extern void mmio_region_nonatomic_set_field32(mmio_region_t base, - ptrdiff_t offset, - bitfield_field32_t field, - uint32_t value); -extern void mmio_region_write_only_set_field32(mmio_region_t base, - ptrdiff_t offset, - bitfield_field32_t field, - uint32_t value); -extern void mmio_region_nonatomic_clear_bit32(mmio_region_t base, - ptrdiff_t offset, - uint32_t bit_index); -extern void mmio_region_nonatomic_set_bit32(mmio_region_t base, - ptrdiff_t offset, - uint32_t bit_index); -extern void mmio_region_write_only_set_bit32(mmio_region_t base, - ptrdiff_t offset, - uint32_t bit_index); - #ifdef __cplusplus } diff --git a/sw/device/lib/base/mmio.h b/sw/device/lib/base/mmio.h index d0b94a467..511737812 100644 --- a/sw/device/lib/base/mmio.h +++ b/sw/device/lib/base/mmio.h @@ -63,7 +63,7 @@ typedef struct mmio_region { volatile void *base; } mmio_region_t; * @return a `mmio_region_t` value representing that region. */ MMIO_WARN_UNUSED_RESULT -inline mmio_region_t mmio_region_from_addr(uintptr_t address) { +static inline mmio_region_t mmio_region_from_addr(uintptr_t address) { return (mmio_region_t){ .base = (volatile void *)address, }; @@ -81,7 +81,7 @@ inline mmio_region_t mmio_region_from_addr(uintptr_t address) { * @return the read value. */ MMIO_WARN_UNUSED_RESULT -inline uint8_t mmio_region_read8(mmio_region_t base, ptrdiff_t offset) { +static inline uint8_t mmio_region_read8(mmio_region_t base, ptrdiff_t offset) { return ((volatile uint8_t *)base.base)[offset / sizeof(uint8_t)]; } @@ -97,7 +97,7 @@ inline uint8_t mmio_region_read8(mmio_region_t base, ptrdiff_t offset) { * @return the read value. */ MMIO_WARN_UNUSED_RESULT -inline uint32_t mmio_region_read32(mmio_region_t base, ptrdiff_t offset) { +static inline uint32_t mmio_region_read32(mmio_region_t base, ptrdiff_t offset) { return ((volatile uint32_t *)base.base)[offset / sizeof(uint32_t)]; } @@ -112,7 +112,7 @@ inline uint32_t mmio_region_read32(mmio_region_t base, ptrdiff_t offset) { * @param offset the offset to write at, in bytes. * @param value the value to write. */ -inline void mmio_region_write8(mmio_region_t base, ptrdiff_t offset, +static inline void mmio_region_write8(mmio_region_t base, ptrdiff_t offset, uint8_t value) { ((volatile uint8_t *)base.base)[offset / sizeof(uint8_t)] = value; } @@ -128,7 +128,7 @@ inline void mmio_region_write8(mmio_region_t base, ptrdiff_t offset, * @param offset the offset to write at, in bytes. * @param value the value to write. */ -inline void mmio_region_write32(mmio_region_t base, ptrdiff_t offset, +static inline void mmio_region_write32(mmio_region_t base, ptrdiff_t offset, uint32_t value) { ((volatile uint32_t *)base.base)[offset / sizeof(uint32_t)] = value; } @@ -171,7 +171,7 @@ void mmio_region_write32(mmio_region_t base, ptrdiff_t offset, uint32_t value); */ MMIO_WARN_UNUSED_RESULT MMIO_DEPRECATED -inline uint32_t mmio_region_read_mask32(mmio_region_t base, ptrdiff_t offset, +static inline uint32_t mmio_region_read_mask32(mmio_region_t base, ptrdiff_t offset, uint32_t mask, uint32_t mask_index) { return bitfield_field32_read( mmio_region_read32(base, offset), @@ -192,7 +192,7 @@ inline uint32_t mmio_region_read_mask32(mmio_region_t base, ptrdiff_t offset, */ MMIO_WARN_UNUSED_RESULT MMIO_DEPRECATED -inline bool mmio_region_get_bit32(mmio_region_t base, ptrdiff_t offset, +static inline bool mmio_region_get_bit32(mmio_region_t base, ptrdiff_t offset, uint32_t bit_index) { return bitfield_bit32_read(mmio_region_read32(base, offset), bit_index); } @@ -209,7 +209,7 @@ inline bool mmio_region_get_bit32(mmio_region_t base, ptrdiff_t offset, * @param mask_index mask position within the selected register. */ MMIO_DEPRECATED -inline void mmio_region_nonatomic_clear_mask32(mmio_region_t base, +static inline void mmio_region_nonatomic_clear_mask32(mmio_region_t base, ptrdiff_t offset, uint32_t mask, uint32_t mask_index) { uint32_t register_value = mmio_region_read32(base, offset); @@ -231,7 +231,7 @@ inline void mmio_region_nonatomic_clear_mask32(mmio_region_t base, * @param mask_index mask position within the selected register. */ MMIO_DEPRECATED -inline void mmio_region_nonatomic_set_mask32(mmio_region_t base, +static inline void mmio_region_nonatomic_set_mask32(mmio_region_t base, ptrdiff_t offset, uint32_t mask, uint32_t mask_index) { uint32_t register_value = mmio_region_read32(base, offset); @@ -253,7 +253,7 @@ inline void mmio_region_nonatomic_set_mask32(mmio_region_t base, * @param mask_index mask position within the selected register. */ MMIO_DEPRECATED -inline void mmio_region_write_only_set_mask32(mmio_region_t base, +static inline void mmio_region_write_only_set_mask32(mmio_region_t base, ptrdiff_t offset, uint32_t mask, uint32_t mask_index) { uint32_t register_value = 0x0u; @@ -278,7 +278,7 @@ inline void mmio_region_write_only_set_mask32(mmio_region_t base, * @param value value to set the field to. */ MMIO_DEPRECATED -inline void mmio_region_nonatomic_set_field32(mmio_region_t base, +static inline void mmio_region_nonatomic_set_field32(mmio_region_t base, ptrdiff_t offset, bitfield_field32_t field, uint32_t value) { @@ -299,7 +299,7 @@ inline void mmio_region_nonatomic_set_field32(mmio_region_t base, * @param value value to set field to. */ MMIO_DEPRECATED -inline void mmio_region_write_only_set_field32(mmio_region_t base, +static inline void mmio_region_write_only_set_field32(mmio_region_t base, ptrdiff_t offset, bitfield_field32_t field, uint32_t value) { @@ -319,7 +319,7 @@ inline void mmio_region_write_only_set_field32(mmio_region_t base, * @param bit_index the bit to clear. */ MMIO_DEPRECATED -inline void mmio_region_nonatomic_clear_bit32(mmio_region_t base, +static inline void mmio_region_nonatomic_clear_bit32(mmio_region_t base, ptrdiff_t offset, uint32_t bit_index) { uint32_t register_value = mmio_region_read32(base, offset); @@ -337,7 +337,7 @@ inline void mmio_region_nonatomic_clear_bit32(mmio_region_t base, * @param bit_index the bit to set. */ MMIO_DEPRECATED -inline void mmio_region_nonatomic_set_bit32(mmio_region_t base, +static inline void mmio_region_nonatomic_set_bit32(mmio_region_t base, ptrdiff_t offset, uint32_t bit_index) { uint32_t register_value = mmio_region_read32(base, offset); @@ -358,7 +358,7 @@ inline void mmio_region_nonatomic_set_bit32(mmio_region_t base, * @param bit_index the bit to set. */ MMIO_DEPRECATED -inline void mmio_region_write_only_set_bit32(mmio_region_t base, +static inline void mmio_region_write_only_set_bit32(mmio_region_t base, ptrdiff_t offset, uint32_t bit_index) { uint32_t register_value = 0x0u; diff --git a/sw/device/lib/drivers/dma/dma.h b/sw/device/lib/drivers/dma/dma.h index ad87434a8..d91f93ab2 100644 --- a/sw/device/lib/drivers/dma/dma.h +++ b/sw/device/lib/drivers/dma/dma.h @@ -421,7 +421,7 @@ __attribute__((optimize("O0"))) void fic_irq_dma(void); */ /* @ToDo: Consider changing the "mask" parameter for a bitfield definition (see dma_regs.h) */ -inline void write_register( uint32_t p_val, +static inline void write_register( uint32_t p_val, uint32_t p_offset, uint32_t p_mask, uint8_t p_sel, diff --git a/sw/device/lib/drivers/rv_timer/rv_timer.h b/sw/device/lib/drivers/rv_timer/rv_timer.h index 09f309452..cba449743 100644 --- a/sw/device/lib/drivers/rv_timer/rv_timer.h +++ b/sw/device/lib/drivers/rv_timer/rv_timer.h @@ -105,6 +105,12 @@ typedef enum rv_timer_approximate_tick_params_result { * @param[out] out Tick parameters that will approximately produce the desired * counter frequency. * @return The result of the operation. + * + * The minimum value for `counter_freq` is given by: + * counter_freq_min = (255/4096) * clock_freq + * For example, if the clock frequency is 15MHz, the minimum value for `counter_freq` is about + * 1 MHz. + * The maximum value for `counter_freq` is given by the clock frequency. */ rv_timer_approximate_tick_params_result_t rv_timer_approximate_tick_params(uint64_t clock_freq, uint64_t counter_freq, diff --git a/sw/device/lib/runtime/hart.h b/sw/device/lib/runtime/hart.h index 9759edfb4..64e4d0902 100644 --- a/sw/device/lib/runtime/hart.h +++ b/sw/device/lib/runtime/hart.h @@ -27,7 +27,7 @@ extern "C" { * * This function may behave as if it is a no-op. */ -inline void wait_for_interrupt(void) { asm volatile("wfi"); } +static inline void wait_for_interrupt(void) { asm volatile("wfi"); } #ifdef __cplusplus diff --git a/sw/device/lib/sdk/timer/timer_sdk.c b/sw/device/lib/sdk/timer/timer_sdk.c index f6caf1b37..3e4e27195 100644 --- a/sw/device/lib/sdk/timer/timer_sdk.c +++ b/sw/device/lib/sdk/timer/timer_sdk.c @@ -3,22 +3,23 @@ // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // // File: timer_sdk.c -// Author: Michele Caon -// Date: 31/07/2023 +// Author: Michele Caon, Francesco Poluzzi +// Date: 23/07/2024 // Description: Timer functions #include #include "timer_sdk.h" #include "csr.h" +#include "soc_ctrl.h" + /******************************/ /* ---- GLOBAL VARIABLES ---- */ /******************************/ // Timer value -uint32_t timer_value = 0; -uint32_t hw_timer_value = 0; +int32_t hw_timer_value = 0; rv_timer_t timer; @@ -33,41 +34,109 @@ mmio_region_t timer_base = { rv_timer_tick_params_t tick_params; + /*************************************/ /* ---- FUNCTION IMPLEMENTATION ---- */ /*************************************/ -// Initialize the hardware timer -void hw_timer_init() -{ - // Initialize the timer - rv_timer_init(timer_base, timer_cfg, &timer); - rv_timer_approximate_tick_params(REFERENCE_CLOCK_Hz, REFERENCE_CLOCK_Hz, &tick_params); - rv_timer_set_tick_params(&timer, 0, tick_params); - rv_timer_counter_set_enabled(&timer, 0, kRvTimerEnabled); -} - -uint32_t hw_timer_get_cycles() +uint32_t timer_get_cycles() { uint64_t cycle_count; rv_timer_counter_read(&timer, 0, &cycle_count); return (uint32_t)cycle_count; } -void hw_timer_start() +void timer_irq_enable() +{ + rv_timer_irq_enable(&timer, 0, 0, kRvTimerEnabled); +} + +void timer_irq_clear() +{ + rv_timer_irq_clear(&timer, 0, 0); +} + +void timer_arm_start(uint32_t threshold) +{ + rv_timer_arm(&timer, 0, 0, threshold); + timer_start(); +} + +void timer_arm_stop() +{ + rv_timer_counter_set_enabled(&timer, 0, kRvTimerDisabled); +} + + +void timer_arm_set(uint32_t threshold) { - hw_timer_value = -hw_timer_get_cycles(); + rv_timer_arm(&timer, 0, 0, threshold); } -uint32_t hw_timer_stop() +void timer_start() { - hw_timer_value += hw_timer_get_cycles(); - return hw_timer_value; + hw_timer_value = -timer_get_cycles(); + rv_timer_counter_set_enabled(&timer, 0, kRvTimerEnabled); +} + +void timer_reset() +{ + hw_timer_value = 0; + rv_timer_reset(&timer); +} + +uint32_t timer_stop() +{ + hw_timer_value += timer_get_cycles(); + rv_timer_counter_set_enabled(&timer, 0, kRvTimerDisabled); + return hw_timer_value; } // Initialize the timer -void timer_init() +void timer_cycles_init() { - // Enable MCYCLE counter - CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + // Get current Frequency + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + uint32_t freq_hz = soc_ctrl_get_frequency(&soc_ctrl); + + // Initialize the timer + timer_reset(); + rv_timer_init(timer_base, timer_cfg, &timer); + rv_timer_approximate_tick_params(freq_hz, freq_hz, &tick_params); + rv_timer_set_tick_params(&timer, 0, tick_params); } + +// Initialize the timer +void timer_wait_us(uint32_t us) +{ + // Get current Frequency + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + uint32_t freq_hz = soc_ctrl_get_frequency(&soc_ctrl); + + timer_cycles_init(); + timer_irq_enable(); + timer_arm_start(us*(freq_hz/1000000)-50); // 50 cycles for taking into account initialization + asm volatile ("wfi"); + timer_irq_clear(); + return; +} + +void enable_timer_interrupt() +{ + //enable timer interrupt + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + // Set mie.MEIE bit to one to enable machine-level timer interrupts + const uint32_t mask = 1 << 7; + CSR_SET_BITS(CSR_REG_MIE, mask); +} + +float get_time_from_cycles(uint32_t cycles){ + // Get current Frequency + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + uint32_t freq_hz = soc_ctrl_get_frequency(&soc_ctrl); + + return (float)cycles/((float)freq_hz/1000000); +} \ No newline at end of file diff --git a/sw/device/lib/sdk/timer/timer_sdk.h b/sw/device/lib/sdk/timer/timer_sdk.h index 1f16df64f..154ef30bc 100644 --- a/sw/device/lib/sdk/timer/timer_sdk.h +++ b/sw/device/lib/sdk/timer/timer_sdk.h @@ -1,10 +1,11 @@ + // Copyright 2023 EPFL and Politecnico di Torino. // Solderpad Hardware License, Version 2.1, see LICENSE.md for details. // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // // File: timer_sdk.h -// Authors: Michele Caon, Luigi Giuffrida -// Date: 31/07/2023 +// Authors: Michele Caon, Luigi Giuffrida, Francesco Poluzzi +// Date: 23/07/2024 // Description: Execution time measurements utilities #ifndef TIMER_SDK_H_ @@ -19,15 +20,14 @@ #include "core_v_mini_mcu.h" #include "x-heep.h" -#define TICK_FREQ 1000000 +#define FREQ_1MHz 1000000 /******************************/ /* ---- GLOBAL VARIABLES ---- */ /******************************/ // Timer value -extern uint32_t timer_value; -extern uint32_t hw_timer_value; +extern int32_t hw_timer_value; extern rv_timer_t timer; @@ -36,65 +36,86 @@ extern rv_timer_t timer; /********************************/ /** - * @brief Initialize the hardware timer + * @brief Get the current value of the HW timer * +* @return int64_t Current value of the HW timer */ -void hw_timer_init(); +uint32_t timer_get_cycles(); /** - * @brief Get the current value of the HW timer + * @brief Start the timer * -* @return int64_t Current value of the HW timer */ -uint32_t hw_timer_get_cycles(); +void timer_start(); /** - * @brief Start the HW timer + * @brief Stop and reset the timer to 0 * */ -void hw_timer_start(); +void timer_reset(); /** * @brief Stop the HW timer * * @return int64_t Elapsed time in clock cycles */ -uint32_t hw_timer_stop(); - +uint32_t timer_stop(); /** - * @brief Initialize the timer + * @brief Initialize the timer for counting clock cycles + * @brief Initialize the timer for counting clock cycles * */ -void timer_init(); +void timer_cycles_init(); /** - * @brief Get the current value of the MCYCLE CSR - * - * @return int64_t Current value of the MCYCLE CSR + * @brief Enable the timer IRQ */ -inline uint32_t timer_get_cycles() { - uint32_t cycle_count; - CSR_READ(CSR_REG_MCYCLE, &cycle_count); - return cycle_count; -} +void timer_irq_enable(); /** - * @brief Start the timer - * + * @brief Clear the timer IRQ */ -inline void timer_start() { - timer_value = -timer_get_cycles(); -} +void timer_irq_clear(); /** - * @brief Stop the timer + * @brief Arms the timer to go off once the counter value is greater than or equal to threshold + * and starts the timer + */ +void timer_arm_start(uint32_t threshold); + +/** + * @brief Stop to output when timer is greater than or equal to threshold previously set + */ +void timer_arm_stop(); + +/** + * @brief Set the timer to go off once the counter value is greater than or equal to threshold, + * without starting the timer + */ +void timer_arm_set(uint32_t threshold); + +/** + * @brief Enable the timer machine-level interrupts for X-Heep + */ +void enable_timer_interrupt(); + +/** + * @brief Wait for a certain amount of microseconds. + * You need to enable timer interrupts with enable_timer_interrupt() before using this function + */ +void timer_wait_us(uint32_t ms); + +/** + * @brief Enable the timer machine-level interrupts for X-Heep + */ +void enable_timer_interrupt(); + +/** + * @brief Get the time taken to execute a certain number of cycles * - * @return int64_t Elapsed time in clock cycles + * @return float value representing the time taken in microseconds */ -inline uint32_t timer_stop() { - timer_value += timer_get_cycles(); - return timer_value; -} +float get_time_from_cycles(uint32_t cycles); -#endif /* TIMER_SDK_H_ */ +#endif /* TIMER_SDK_H_ */ \ No newline at end of file diff --git a/util/mcu_gen.py b/util/mcu_gen.py index 0474187fe..07a93112d 100755 --- a/util/mcu_gen.py +++ b/util/mcu_gen.py @@ -209,7 +209,7 @@ def create_pad_ring_bonding(self): self.pad_ring_bonding_bonding += ' .' + self.signal_name + 'oe_i(' + oe_internal_signals + '),' self.x_heep_system_interface += ' inout wire ' + self.signal_name + 'io,' - def __init__(self, name, cell_name, pad_type, pad_mapping, index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, has_attribute, attribute_bits): + def __init__(self, name, cell_name, pad_type, pad_mapping, index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, has_attribute, attribute_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip): self.name = name self.cell_name = cell_name @@ -240,6 +240,12 @@ def __init__(self, name, cell_name, pad_type, pad_mapping, index, pad_active, pa self.is_driven_manually = pad_driven_manually self.do_skip_declaration = pad_skip_declaration + self.layout_index = pad_layout_index + self.layout_orient = pad_layout_orient + self.layout_cell = pad_layout_cell + self.layout_offset = pad_layout_offset + self.layout_skip = pad_layout_skip + if(len(pad_mux_list) == 0): self.signal_name_drive.append(self.signal_name) self.pad_type_drive.append(pad_type) @@ -629,6 +635,31 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_keep_internal = False + try: + pad_layout_orient = pads[key]['layout_attributes']['orient'] + except KeyError: + pad_layout_orient = None + + try: + pad_layout_cell = pads[key]['layout_attributes']['cell'] + except KeyError: + pad_layout_cell = None + + try: + pad_layout_offset = pads[key]['layout_attributes']['offset'] + except KeyError: + pad_layout_offset = None + + try: + pad_layout_skip = pads[key]['layout_attributes']['skip'] + except KeyError: + pad_layout_skip = None + + try: + pad_layout_index = pads[key]['layout_attributes']['index'] + except KeyError: + pad_layout_index = None + pad_mux_list = [] for pad_mux in pad_mux_list_hjson: @@ -654,13 +685,13 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_skip_declaration_mux = False - p = Pad(pad_mux, '', pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) + p = Pad(pad_mux, '', pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) pad_mux_list.append(p) if pad_num > 1: for p in range(pad_num): pad_cell_name = "pad_" + key + "_" + str(p+pad_offset) + "_i" - pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) if not pad_keep_internal: pad_obj.create_pad_ring() pad_obj.create_core_v_mini_mcu_ctrl() @@ -679,7 +710,7 @@ def len_extracted_peripherals(peripherals): else: pad_cell_name = "pad_" + key + "_i" - pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) if not pad_keep_internal: pad_obj.create_pad_ring() pad_obj.create_core_v_mini_mcu_ctrl() @@ -740,6 +771,31 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_skip_declaration = False + try: + pad_layout_orient = external_pads[key]['layout_attributes']['orient'] + except KeyError: + pad_layout_orient = None + + try: + pad_layout_cell = external_pads[key]['layout_attributes']['cell'] + except KeyError: + pad_layout_cell = None + + try: + pad_layout_offset = external_pads[key]['layout_attributes']['offset'] + except KeyError: + pad_layout_offset = None + + try: + pad_layout_skip = external_pads[key]['layout_attributes']['skip'] + except KeyError: + pad_layout_skip = None + + try: + pad_layout_index = external_pads[key]['layout_attributes']['index'] + except KeyError: + pad_layout_index = None + pad_mux_list = [] for pad_mux in pad_mux_list_hjson: @@ -765,13 +821,13 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_skip_declaration_mux = False - p = Pad(pad_mux, '', external_pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) + p = Pad(pad_mux, '', external_pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) pad_mux_list.append(p) if pad_num > 1: for p in range(pad_num): pad_cell_name = "pad_" + key + "_" + str(p+pad_offset) + "_i" - pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) pad_obj.create_pad_ring() pad_obj.create_pad_ring_bonding() pad_obj.create_internal_signals() @@ -787,7 +843,7 @@ def len_extracted_peripherals(peripherals): else: pad_cell_name = "pad_" + key + "_i" - pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits, pad_layout_index, pad_layout_orient, pad_layout_cell, pad_layout_offset, pad_layout_skip) pad_obj.create_pad_ring() pad_obj.create_pad_ring_bonding() pad_obj.create_internal_signals()