-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
405 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
add_executable( | ||
test_indexing | ||
main.cpp | ||
) | ||
target_link_libraries( | ||
test_indexing | ||
PRIVATE | ||
driverapi | ||
) | ||
|
||
configure_file("${CMAKE_CURRENT_LIST_DIR}/write_float.cubin" ${CMAKE_BINARY_DIR}/tests/test_indexing/ COPYONLY) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
#include <librecuda.h> | ||
|
||
#include <iostream> | ||
#include <iomanip> | ||
#include <vector> | ||
#include <fstream> | ||
#include <cstring> | ||
|
||
inline void cudaCheck(libreCudaStatus_t error, const char *file, int line) { | ||
if (error != LIBRECUDA_SUCCESS) { | ||
const char *error_string; | ||
libreCuGetErrorString(error, &error_string); | ||
printf("[CUDA ERROR] at file %s:%d: %s\n", file, line, error_string); | ||
exit(EXIT_FAILURE); | ||
} | ||
}; | ||
#define CUDA_CHECK(err) (cudaCheck(err, __FILE__, __LINE__)) | ||
|
||
int main() { | ||
CUDA_CHECK(libreCuInit(0)); | ||
|
||
int device_count{}; | ||
CUDA_CHECK(libreCuDeviceGetCount(&device_count)); | ||
std::cout << "Device count: " + std::to_string(device_count) << std::endl; | ||
|
||
LibreCUdevice device{}; | ||
CUDA_CHECK(libreCuDeviceGet(&device, 0)); | ||
|
||
LibreCUcontext ctx{}; | ||
CUDA_CHECK(libreCuCtxCreate_v2(&ctx, CU_CTX_SCHED_YIELD, device)); | ||
|
||
char name_buffer[256] = {}; | ||
libreCuDeviceGetName(name_buffer, 256, device); | ||
std::cout << "Device Name: " + std::string(name_buffer) << std::endl; | ||
|
||
LibreCUmodule module{}; | ||
|
||
// read cubin file | ||
uint8_t *image; | ||
size_t n_bytes; | ||
{ | ||
std::ifstream input("write_float.cubin", std::ios::binary); | ||
std::vector<uint8_t> bytes( | ||
(std::istreambuf_iterator<char>(input)), | ||
(std::istreambuf_iterator<char>())); | ||
input.close(); | ||
image = new uint8_t[bytes.size()]; | ||
memcpy(image, bytes.data(), bytes.size()); | ||
n_bytes = bytes.size(); | ||
} | ||
CUDA_CHECK(libreCuModuleLoadData(&module, image, n_bytes)); | ||
|
||
// read functions | ||
uint32_t num_funcs{}; | ||
CUDA_CHECK(libreCuModuleGetFunctionCount(&num_funcs, module)); | ||
std::cout << "Num functions: " << num_funcs << std::endl; | ||
|
||
auto *functions = new LibreCUFunction[num_funcs]; | ||
CUDA_CHECK(libreCuModuleEnumerateFunctions(functions, num_funcs, module)); | ||
|
||
for (size_t i = 0; i < num_funcs; i++) { | ||
LibreCUFunction func = functions[i]; | ||
const char *func_name{}; | ||
CUDA_CHECK(libreCuFuncGetName(&func_name, func)); | ||
std::cout << " function \"" << func_name << "\"" << std::endl; | ||
} | ||
|
||
delete[] functions; | ||
|
||
// find function | ||
LibreCUFunction func{}; | ||
CUDA_CHECK(libreCuModuleGetFunction(&func, module, "write_float")); | ||
|
||
// set dynamic shared memory | ||
CUDA_CHECK(libreCuFuncSetAttribute(func, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, 8192)); | ||
|
||
// create stream | ||
LibreCUstream stream{}; | ||
CUDA_CHECK(libreCuStreamCreate(&stream, 0)); | ||
|
||
void *float_dst_va{}; | ||
size_t n_elements = 50256 * 768; | ||
CUDA_CHECK(libreCuMemAlloc(&float_dst_va, n_elements * sizeof(float), true)); | ||
|
||
auto *host_dst = new float[n_elements]; | ||
|
||
void *params[] = { | ||
&float_dst_va, // dst | ||
&n_elements | ||
}; | ||
CUDA_CHECK( | ||
libreCuLaunchKernel(func, | ||
n_elements/256, 1, 1, | ||
256, 1, 1, | ||
8192, | ||
stream, | ||
params, sizeof(params) / sizeof(void *), | ||
nullptr | ||
) | ||
); | ||
CUDA_CHECK(libreCuMemCpy(host_dst, float_dst_va, n_elements * sizeof(float), stream, false)); | ||
|
||
// dispatch built up command buffer to GPU | ||
CUDA_CHECK(libreCuStreamCommence(stream)); | ||
|
||
// wait for work to complete | ||
CUDA_CHECK(libreCuStreamAwait(stream)); | ||
|
||
for (size_t i = 0; i < n_elements; i++) { | ||
if (host_dst[i] != 1.0) { | ||
std::cerr << "Not all values were filled!" << std::endl; | ||
break; | ||
} | ||
} | ||
|
||
// free memory | ||
CUDA_CHECK(libreCuMemFree(float_dst_va)); | ||
|
||
delete[] host_dst; | ||
|
||
// destroy stream | ||
CUDA_CHECK(libreCuStreamDestroy(stream)); | ||
|
||
// unload module | ||
CUDA_CHECK(libreCuModuleUnload(module)); | ||
|
||
// destroy ctx | ||
CUDA_CHECK(libreCuCtxDestroy(ctx)); | ||
return 0; | ||
} |
Oops, something went wrong.