Skip to content

Commit

Permalink
Merge pull request #108 from pulp-platform/mbertuletti/mimo_receiver
Browse files Browse the repository at this point in the history
PUSCH receiver kernels
  • Loading branch information
mbertuletti authored Jan 6, 2025
2 parents 668bc99 + d00e81b commit 133d5ae
Show file tree
Hide file tree
Showing 80 changed files with 9,349 additions and 528 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
path: riscv-gnu-toolchain.tzst

tc-llvm:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Recover the submodule commit hash
Expand Down Expand Up @@ -240,7 +240,7 @@ jobs:
git diff --exit-code
check-control-registers:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install Python requirements
Expand All @@ -266,7 +266,7 @@ jobs:
# Build Software #
####################
build-apps-gcc:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: tc-gcc
strategy:
matrix:
Expand Down Expand Up @@ -297,7 +297,7 @@ jobs:
path: apps-gcc.tzst

build-apps-llvm:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [tc-gcc, tc-llvm]
strategy:
matrix:
Expand Down Expand Up @@ -377,7 +377,7 @@ jobs:
# Run Software #
##################
run-apps-gcc:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 20
needs: [build-apps-gcc, riscv-isa-sim, verilator-model]
strategy:
Expand Down Expand Up @@ -415,7 +415,7 @@ jobs:
make trace
run-apps-llvm:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 20
needs: [build-apps-llvm, riscv-isa-sim, verilator-model]
strategy:
Expand Down Expand Up @@ -453,7 +453,7 @@ jobs:
make trace
run-apps-halide:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 20
needs: [build-apps-halide, riscv-isa-sim, verilator-model]
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
# Check License #
#################
check-license:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install Python requirements
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Add pv.pack.h xpulpv2 instruction
- Add a script to generate random data to preload the L2 memory
- Add stack overflow simulator warning using dedicated CSR
- Add mimo_mmse_f16 kernels
- Add cmatmul_f16 kernels
- Add cfft_radix4_f16 kernels
- Add chest_f16 kernels

### Fixed
- Measure the `wfi` stalls and stalls caused by `opc` properly
Expand Down
1 change: 1 addition & 0 deletions python-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ progressbar2
tabulate
sympy
scipy
pyflexfloat
9 changes: 7 additions & 2 deletions software/apps/baremetal/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ APPS := $(patsubst $(APPS_DIR)/%/main.c,%,$(shell find $(APPS_DIR) -name "main.c
BINARIES := $(addprefix $(BIN_DIR)/,$(APPS))
ALL := $(APPS)

ALL_GCC := $(filter-out matmul_f16 matmul_f32, $(ALL))
ALL_LLVM := $(filter-out synth_i32 chest_q16 cfft_radix2_q16 cfft_radix4_q16, $(ALL))
FP_SUFFIXES := f16 f32 f8
I_SUFFIXES := q16 q32 i16 i32 i8
I_APPS := $(foreach suf, $(FP_SUFFIXES), $(filter %_$(suf), $(ALL)))
FP_APPS := $(foreach suf, $(I_SUFFIXES), $(filter %_$(suf), $(ALL)))
# Filter out applications
ALL_GCC := $(filter-out $(I_APPS), $(ALL))
ALL_LLVM := $(filter-out $(FP_APPS), $(ALL))

# Make all applications
all: $(ALL_GCC)
Expand Down
58 changes: 58 additions & 0 deletions software/apps/baremetal/axpy_f16/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"

#include "data_axpy_f16.h"

#include "baremetal/mempool_axpy_f16.h"
#include "baremetal/mempool_checks.h"

__fp16 l1_X[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio")));
__fp16 l1_Y[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio")));

int main() {

uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
uint32_t time_init, time_end;
mempool_barrier_init(core_id);

time_init = 0;
time_end = 0;
if (core_id == 0) {
dma_memcpy_blocking(l1_X, l2_X, array_N * sizeof(int16_t));
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int16_t));
}
uint32_t register volatile a = *(uint32_t *)&(l2_A)&0x0000FFFF;
mempool_barrier(num_cores);

// PARALLEL, LOCAL ACCESSES
time_init = mempool_get_timer();
mempool_start_benchmark();
axpy_f16vecp_local_unrolled4(a, l1_X, l1_Y, array_N);
mempool_stop_benchmark();
time_end = mempool_get_timer();

mempool_barrier(num_cores);
// Check results
if (core_id == 0) {
uint32_t clock_cycles = (time_end - time_init);
printf("\nKernel execution takes %d clock cycles\n", clock_cycles);
}
mempool_check_f16(l1_Y, l2_Z, 100, 0.1f, 0);
mempool_barrier(num_cores);

return 0;
}
57 changes: 57 additions & 0 deletions software/apps/baremetal/axpy_f32/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

// Author: Marco Bertuletti, ETH Zurich

#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"

#include "data_axpy_f32.h"

#include "baremetal/mempool_axpy_f32.h"
#include "baremetal/mempool_checks.h"

float l1_X[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio")));
float l1_Y[array_N] __attribute__((aligned(NUM_BANKS), section(".l1_prio")));

int main() {

uint32_t core_id = mempool_get_core_id();
uint32_t num_cores = mempool_get_core_count();
uint32_t time_init, time_end;
mempool_barrier_init(core_id);

time_init = 0;
time_end = 0;
if (core_id == 0) {
dma_memcpy_blocking(l1_X, l2_X, array_N * sizeof(int32_t));
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int32_t));
}
float register volatile a = l2_A;
mempool_barrier(num_cores);

// PARALLEL
time_init = mempool_get_timer();
mempool_start_benchmark();
axpy_f32p_local_unrolled4(a, l1_X, l1_Y, array_N);
mempool_stop_benchmark();
time_end = mempool_get_timer();

// Check results
if (core_id == 0) {
uint32_t clock_cycles = (time_end - time_init);
printf("\nKernel execution takes %d clock cycles\n", clock_cycles);
}
mempool_check_f32(l1_Y, l2_Z, 100, 0.1f, 0);
mempool_barrier(num_cores);

return 0;
}
16 changes: 7 additions & 9 deletions software/apps/baremetal/axpy_i32/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,19 @@
#include <stdlib.h>
#include <string.h>

/* Mempool runtime libraries */
#include "builtins_v2.h"
#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"

#include "baremetal/mempool_axpy_i32p.h"
#include "baremetal/mempool_checks.h"
#include "data_axpy_i32.h"

int32_t l1_X[array_N]
__attribute__((aligned(NUM_CORES * sizeof(uint32_t)), section(".l1")));
int32_t l1_Y[array_N]
__attribute__((aligned(NUM_CORES * sizeof(uint32_t)), section(".l1")));
#include "baremetal/mempool_axpy_i32.h"
#include "baremetal/mempool_checks.h"

int32_t l1_X[array_N] __attribute__((aligned(NUM_BANKS), section(".l1")));
int32_t l1_Y[array_N] __attribute__((aligned(NUM_BANKS), section(".l1")));
int volatile error __attribute__((section(".l1")));

int main() {
Expand All @@ -38,11 +35,12 @@ int main() {
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int32_t));
error = 0;
}
register volatile int32_t a = l2_A;
mempool_barrier(num_cores);

// Benchmark
mempool_start_benchmark();
calc_axpy_unloop_x4_localbank(l1_X, l1_Y, ALPHA, array_N, core_id, num_cores);
calc_axpy_unloop_x4_localbank(l1_X, l1_Y, a, array_N, core_id, num_cores);
mempool_barrier(num_cores);
mempool_stop_benchmark();

Expand Down
15 changes: 10 additions & 5 deletions software/apps/baremetal/cfft_radix2_q16/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <stdlib.h>
#include <string.h>

/* Mempool runtime libraries */
#include "builtins_v2.h"
#include "dma.h"
#include "encoding.h"
Expand All @@ -19,15 +18,21 @@
#include "synchronization.h"

#include "data_cfft_radix2_q16.h"
#define N_BANKS (NUM_CORES * BANKING_FACTOR)

/* CFFT mempool libraries */
/*
======================
Parameters and defines
SINGLE: When defined runs single-core CFFT.
PARALLEL: When defined runs parallel CFFT.
*/

#define PARALLEL

#include "baremetal/mempool_cfft_q16_bitreversal.h"
#include "baremetal/mempool_checks.h"
#include "baremetal/mempool_radix2_cfft_q16.h"

#define PARALLEL

/* CFFT mempool data */
int16_t l1_pSrc[2 * N_CSAMPLES]
__attribute__((aligned(sizeof(int32_t)), section(".l1_prio")));
Expand Down
Loading

0 comments on commit 133d5ae

Please sign in to comment.