Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#MERGE: fix some compilation errors I saw #121

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Root CMake file for the openGPMP project
# *************************************************************************/

cmake_minimum_required(VERSION 3.25)
cmake_minimum_required(VERSION 3.20)
set (CMAKE_CXX_STANDARD 20)
include(CheckIncludeFileCXX)

Expand Down Expand Up @@ -196,7 +196,7 @@ if(NOT BUILD_TINYGPMP AND NOT BUILD_PYGPMP OR BUILD_OPENGPMP)

# run C++ and Fortran unit tests
add_dependencies(${PROJECT_NAME} RUN_CPP_TESTS)
add_dependencies(${PROJECT_NAME} RUN_FORTRAN_TESTS)
add_dependencies(${PROJECT_NAME} RUN_FORTRAN_TESTS)
endif()

# uninstall target
Expand Down
115 changes: 115 additions & 0 deletions experiment/gemm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#include <immintrin.h>
#include <vector>
#include <iostream>
#include <chrono>

// Block size for tiling optimization
const size_t BLOCK_SIZE = 256;

// Naive GEMM implementation (no optimizations)
void gemm_naive(const float* A, const float* B, float* C, size_t M, size_t N, size_t K) {
for (size_t i = 0; i < M; ++i) {
for (size_t j = 0; j < N; ++j) {
C[i * N + j] = 0.0f;
for (size_t k = 0; k < K; ++k) {
C[i * N + j] += A[i * K + k] * B[k * N + j];
}
}
}
}

// Tiled GEMM implementation (optimized with blocking and packing)
void pack_matrix_A(const float* A, float* packed_A, size_t M, size_t K, size_t block_start_row, size_t block_start_col) {
for (size_t i = 0; i < BLOCK_SIZE && (block_start_row + i) < M; ++i) {
for (size_t j = 0; j < BLOCK_SIZE && (block_start_col + j) < K; ++j) {
packed_A[i * BLOCK_SIZE + j] = A[(block_start_row + i) * K + block_start_col + j];
}
}
}

void pack_matrix_B(const float* B, float* packed_B, size_t K, size_t N, size_t block_start_row, size_t block_start_col) {
for (size_t i = 0; i < BLOCK_SIZE && (block_start_row + i) < K; ++i) {
for (size_t j = 0; j < BLOCK_SIZE && (block_start_col + j) < N; ++j) {
packed_B[i * BLOCK_SIZE + j] = B[(block_start_row + i) * N + block_start_col + j];
}
}
}

void gemm_block(const float* packed_A, const float* packed_B, float* C, size_t M, size_t N, size_t K, size_t block_row, size_t block_col) {
for (size_t i = 0; i < BLOCK_SIZE && (block_row + i) < M; ++i) {
for (size_t j = 0; j < BLOCK_SIZE && (block_col + j) < N; ++j) {
float sum = 0.0f;
for (size_t k = 0; k < BLOCK_SIZE && k < K; ++k) {
sum += packed_A[i * BLOCK_SIZE + k] * packed_B[k * BLOCK_SIZE + j];
}
C[(block_row + i) * N + block_col + j] += sum;
}
}
}

void gemm_tiled(const float* A, const float* B, float* C, size_t M, size_t N, size_t K) {
std::vector<float> packed_A(BLOCK_SIZE * BLOCK_SIZE, 0.0f);
std::vector<float> packed_B(BLOCK_SIZE * BLOCK_SIZE, 0.0f);

for (size_t block_row = 0; block_row < M; block_row += BLOCK_SIZE) {
for (size_t block_col = 0; block_col < N; block_col += BLOCK_SIZE) {
for (size_t block_k = 0; block_k < K; block_k += BLOCK_SIZE) {
pack_matrix_A(A, packed_A.data(), M, K, block_row, block_k);
pack_matrix_B(B, packed_B.data(), K, N, block_k, block_col);
gemm_block(packed_A.data(), packed_B.data(), C, M, N, K, block_row, block_col);
}
}
}
}

void print_matrix(const std::vector<float>& mat, size_t rows, size_t cols) {
for (size_t i = 0; i < rows; ++i) {
for (size_t j = 0; j < cols; ++j) {
std::cout << mat[i * cols + j] << " ";
}
std::cout << "\n";
}
}

int main() {
// Dimensions of matrices (use larger sizes for more meaningful benchmarks)
size_t M = 384, N = 384, K = 384;

// Initialize matrices with random values
std::vector<float> A(M * K, 1.0f);
std::vector<float> B(K * N, 1.0f);
std::vector<float> C_naive(M * N, 0.0f);
std::vector<float> C_optimized(M * N, 0.0f);

// Measure time for naive GEMM
auto start_naive = std::chrono::high_resolution_clock::now();
gemm_naive(A.data(), B.data(), C_naive.data(), M, N, K);
auto end_naive = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> duration_naive = end_naive - start_naive;
std::cout << "Naive GEMM Time: " << duration_naive.count() << " seconds\n";

// Measure time for optimized GEMM
auto start_optimized = std::chrono::high_resolution_clock::now();
gemm_tiled(A.data(), B.data(), C_optimized.data(), M, N, K);
auto end_optimized = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> duration_optimized = end_optimized - start_optimized;
std::cout << "Optimized GEMM Time: " << duration_optimized.count() << " seconds\n";

// Verify results
bool correct = true;
for (size_t i = 0; i < M * N; ++i) {
if (std::abs(C_naive[i] - C_optimized[i]) > 1e-6) {
correct = false;
break;
}
}

if (correct) {
std::cout << "Results match between naive and optimized GEMM.\n";
} else {
std::cout << "Results do not match between naive and optimized GEMM.\n";
}

return 0;
}

Binary file added experiment/gemm_comparison
Binary file not shown.
2 changes: 2 additions & 0 deletions modules/linalg/dgemm_arr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ void gpmp::linalg::DGEMM::dgemm_micro_kernel(long kc,
long kb = kc / 4;
long kl = kc % 4;

#ifdef __x86_64__
dgemm_kernel_asm(A,
B,
C,
Expand All @@ -109,6 +110,7 @@ void gpmp::linalg::DGEMM::dgemm_micro_kernel(long kc,
incColC,
alpha,
beta);
#endif
}

// MATRIX BUFFERS
Expand Down
3 changes: 2 additions & 1 deletion modules/optim/function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@
*
************************************************************************/
#include <iostream>
#include <openGPMP/optim/function.hpp>
#include <algorithm>
#include <vector>
#include <openGPMP/optim/function.hpp>

std::vector<double> gpmp::optim::Func::generate_random_point(
const std::vector<double> &lower_bounds,
Expand Down
15 changes: 0 additions & 15 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,14 @@ set(CPP_TEST_FILES
linalg/t_eigen.cpp
linalg/t_linsys.cpp

linalg/t_matrix_vector_i32.cpp
linalg/t_matrix_vector_f64.cpp

linalg/t_matrix_arr_naive.cpp

linalg/t_matrix_arr_i8.cpp
linalg/t_matrix_arr_i16.cpp
linalg/t_matrix_arr_i32.cpp
linalg/t_matrix_arr_f64.cpp

linalg/t_matrix_arr_f90.cpp

linalg/t_igemm_arr.cpp
linalg/t_sgemm_arr.cpp
linalg/t_dgemm_arr.cpp

linalg/t_vector_vector_naive.cpp

linalg/t_vector_vector_i8.cpp
linalg/t_vector_vector_i32.cpp
linalg/t_vector_vector_f64.cpp


nt/t_cipher.cpp
nt/t_rc4.cpp
nt/t_primes.cpp
Expand Down
2 changes: 2 additions & 0 deletions tests/linalg/t_eigen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <openGPMP/linalg/eigen.hpp>
#include <stdexcept>
#include <vector>
#include <algorithm>


const double TOLERANCE = 1e-3;

Expand Down
5 changes: 5 additions & 0 deletions tinygpmp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
`tinygpmp` aims to support low-voltage and resource constrained devices,
primarily microcontrollers. For now, focus on support for AVR and STM32
series devices is planned.

## Testing

Check notice on line 6 in tinygpmp/README.md

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

tinygpmp/README.md#L6

Expected: 1; Actual: 0; Below
The `fixture` directory contains instructions and source code for an example application
and debugging via the UART pins on an STM32 and monitoring this connection using
screen or minicom on the `/dev`
2 changes: 2 additions & 0 deletions tinygpmp/include/fec/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Forward Error Correction

Check notice on line 1 in tinygpmp/include/fec/README.md

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

tinygpmp/include/fec/README.md#L1

Expected: [None]; Actual: # Forward Error Correction

2 changes: 2 additions & 0 deletions tinygpmp/modules/fec/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Forward Error Correction

Check notice on line 1 in tinygpmp/modules/fec/README.md

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

tinygpmp/modules/fec/README.md#L1

Expected: [None]; Actual: # Forward Error Correction

Loading