Skip to content

Commit

Permalink
Add support for struct kernel parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
mikex86 committed Sep 1, 2024
1 parent 2ed31f1 commit 3187e8c
Show file tree
Hide file tree
Showing 11 changed files with 412 additions and 5 deletions.
16 changes: 15 additions & 1 deletion driverapi/src/cmdqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,21 @@ NvCommandQueue::launchFunction(LibreCUFunction function,
kernargs_buf[j++] = param_value;
break;
}
default: LIBRECUDA_FAIL(LIBRECUDA_ERROR_INVALID_VALUE)
default: {
if (param_size % sizeof(NvU32) != 0) {
// cuda encodes everything with these 32-bit words. The fact that this would be allowed is highly
// implausible given that even most c compilers pad struct lengths to multiples of 4 anyway,
// so cuda doing it any different would be highly implausible
LIBRECUDA_DEBUG("Encountered kernel with array parameter with size % 4 != 0! This should not be possible");
LIBRECUDA_FAIL(LIBRECUDA_ERROR_INVALID_VALUE);
}
auto *param_ptr = reinterpret_cast<NvU32 *>(params[i]);
size_t num_words = param_size / sizeof(NvU32);
for (size_t k = 0; k < num_words; k++) {
kernargs_buf[j++] = param_ptr[k];
}
break;
}
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ add_subdirectory(memcopy)
add_subdirectory(dynamic_shared_mem)
add_subdirectory(compute_chronological_consistency)
add_subdirectory(test_async_kernels)
add_subdirectory(dma_chronological_consistency)
add_subdirectory(dma_chronological_consistency)
add_subdirectory(kernel_struct_param)
2 changes: 1 addition & 1 deletion tests/compute_chronological_consistency/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ target_link_libraries(
driverapi
)

configure_file("${CMAKE_CURRENT_LIST_DIR}/write_float.cubin" ${CMAKE_BINARY_DIR}/tests/write_float COPYONLY)
configure_file("${CMAKE_CURRENT_LIST_DIR}/write_float.cubin" ${CMAKE_BINARY_DIR}/tests/compute_chronological_consistency COPYONLY)
2 changes: 1 addition & 1 deletion tests/dynamic_shared_mem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ target_link_libraries(
driverapi
)

configure_file("${CMAKE_CURRENT_LIST_DIR}/write_float.cubin" ${CMAKE_BINARY_DIR}/tests/write_float COPYONLY)
configure_file("${CMAKE_CURRENT_LIST_DIR}/write_float.cubin" ${CMAKE_BINARY_DIR}/tests/dynamic_shared_mem COPYONLY)
11 changes: 11 additions & 0 deletions tests/kernel_struct_param/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
add_executable(
test_kernel_struct_param
main.cpp
)
target_link_libraries(
test_kernel_struct_param
PRIVATE
driverapi
)

configure_file("${CMAKE_CURRENT_LIST_DIR}/read_from_struct.cubin" ${CMAKE_BINARY_DIR}/tests/kernel_struct_param COPYONLY)
126 changes: 126 additions & 0 deletions tests/kernel_struct_param/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include <librecuda.h>

#include <iostream>
#include <iomanip>
#include <vector>
#include <fstream>
#include <cstring>

inline void cudaCheck(libreCudaStatus_t error, const char *file, int line) {
if (error != LIBRECUDA_SUCCESS) {
const char *error_string;
libreCuGetErrorString(error, &error_string);
printf("[CUDA ERROR] at file %s:%d: %s\n", file, line, error_string);
exit(EXIT_FAILURE);
}
};
#define CUDA_CHECK(err) (cudaCheck(err, __FILE__, __LINE__))

struct struct_t {
int x, y, z;
int w, h, d;
char str[32];
char me_ugly;
};
static_assert(sizeof(struct_t) == 60);

int main() {
CUDA_CHECK(libreCuInit(0));

int device_count{};
CUDA_CHECK(libreCuDeviceGetCount(&device_count));
std::cout << "Device count: " + std::to_string(device_count) << std::endl;

LibreCUdevice device{};
CUDA_CHECK(libreCuDeviceGet(&device, 0));

LibreCUcontext ctx{};
CUDA_CHECK(libreCuCtxCreate_v2(&ctx, CU_CTX_SCHED_YIELD, device));

char name_buffer[256] = {};
libreCuDeviceGetName(name_buffer, 256, device);
std::cout << "Device Name: " + std::string(name_buffer) << std::endl;
LibreCUmodule module{};

// read cubin file
uint8_t *image;
size_t n_bytes;
{
std::ifstream input("read_from_struct.cubin", std::ios::binary);
std::vector<uint8_t> bytes(
(std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
input.close();
image = new uint8_t[bytes.size()];
memcpy(image, bytes.data(), bytes.size());
n_bytes = bytes.size();
}
CUDA_CHECK(libreCuModuleLoadData(&module, image, n_bytes));

// read functions
uint32_t num_funcs{};
CUDA_CHECK(libreCuModuleGetFunctionCount(&num_funcs, module));
std::cout << "Num functions: " << num_funcs << std::endl;

auto *functions = new LibreCUFunction[num_funcs];
CUDA_CHECK(libreCuModuleEnumerateFunctions(functions, num_funcs, module));

for (size_t i = 0; i < num_funcs; i++) {
LibreCUFunction func = functions[i];
const char *func_name{};
CUDA_CHECK(libreCuFuncGetName(&func_name, func));
std::cout << " function \"" << func_name << "\"" << std::endl;
}

delete[] functions;

// find function
LibreCUFunction func{};
CUDA_CHECK(libreCuModuleGetFunction(&func, module, "read_from_struct"));
// create stream
LibreCUstream stream{};
CUDA_CHECK(libreCuStreamCreate(&stream, 0));

void *w_dst_va{};
CUDA_CHECK(libreCuMemAlloc(&w_dst_va, sizeof(int), true));

struct_t s = {
.w=64,
};

void *params[] = {
&s, // struct
&w_dst_va, // dst
};

CUDA_CHECK(
libreCuLaunchKernel(func,
1, 1, 1,
1, 1, 1,
8192,
stream,
params, sizeof(params) / sizeof(void *),
nullptr
)
);

// dispatch built up command buffer to GPU
CUDA_CHECK(libreCuStreamCommence(stream));

// wait for work to complete
CUDA_CHECK(libreCuStreamAwait(stream));
std::cout << "Dst value (post exec): " << *(int *) (w_dst_va) << std::endl;

// free memory
CUDA_CHECK(libreCuMemFree(w_dst_va));

// destroy stream
CUDA_CHECK(libreCuStreamDestroy(stream));

// unload module
CUDA_CHECK(libreCuModuleUnload(module));

// destroy ctx
CUDA_CHECK(libreCuCtxDestroy(ctx));
return 0;
}
Loading

0 comments on commit 3187e8c

Please sign in to comment.