Skip to content

Commit

Permalink
use distinct kernels in many kernels launch
Browse files Browse the repository at this point in the history
  • Loading branch information
mikex86 committed Nov 27, 2024
1 parent 7080e42 commit e71d3ac
Showing 1 changed file with 13 additions and 23 deletions.
36 changes: 13 additions & 23 deletions tests/many_kernels_launch/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ int main() {
libreCuDeviceGetName(name_buffer, 256, device);
std::cout << "Device Name: " + std::string(name_buffer) << std::endl;

LibreCUmodule module{};

// read cubin file
uint8_t *image;
Expand All @@ -46,39 +45,28 @@ int main() {
std::memcpy(image, bytes.data(), bytes.size());
n_bytes = bytes.size();
}
CUDA_CHECK(libreCuModuleLoadData(&module, image, n_bytes));

// read functions
uint32_t num_funcs{};
CUDA_CHECK(libreCuModuleGetFunctionCount(&num_funcs, module));
std::cout << "Num functions: " << num_funcs << std::endl;

auto *functions = new LibreCUFunction[num_funcs];
CUDA_CHECK(libreCuModuleEnumerateFunctions(functions, num_funcs, module));

for (size_t i = 0; i < num_funcs; i++) {
LibreCUFunction func = functions[i];
const char *func_name{};
CUDA_CHECK(libreCuFuncGetName(&func_name, func));
std::cout << " function \"" << func_name << "\"" << std::endl;
size_t num_kernels = 1025;
LibreCUmodule modules[num_kernels];
for (int i = 0; i < num_kernels; i++) {
CUDA_CHECK(libreCuModuleLoadData(modules + i, image, n_bytes));
}

delete[] functions;

// find function
LibreCUFunction func{};
CUDA_CHECK(libreCuModuleGetFunction(&func, module, "emtpy_kernel"));
// find functions
LibreCUFunction funcs[num_kernels];
for (int i = 0; i < num_kernels; i++) {
CUDA_CHECK(libreCuModuleGetFunction(funcs + i, modules[i], "emtpy_kernel"));
}

// create stream
LibreCUstream stream{};
CUDA_CHECK(libreCuStreamCreate(&stream, 0));

void *params[] = {};
size_t num_kernels = 1025;

auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < num_kernels; ++i) {
CUDA_CHECK(libreCuLaunchKernel(func,
CUDA_CHECK(libreCuLaunchKernel(funcs[i],
1, 1, 1,
1, 1, 1,
0,
Expand All @@ -105,7 +93,9 @@ int main() {
CUDA_CHECK(libreCuStreamDestroy(stream));

// unload module
CUDA_CHECK(libreCuModuleUnload(module));
for (int i = 0; i < num_kernels; ++i) {
CUDA_CHECK(libreCuModuleUnload(modules[i]));
}

// destroy ctx
CUDA_CHECK(libreCuCtxDestroy(ctx));
Expand Down

0 comments on commit e71d3ac

Please sign in to comment.