-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpythonEntry.cpp
102 lines (68 loc) · 2.89 KB
/
pythonEntry.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <iostream>
#include <fstream>
#include "vector"
#include "gpuInterface.h"
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;
using namespace std;
//hash table implemented for uint32_t. this offsets the float value so some precision can be
//kept when converting float to int. (further precision loss is inconsequential)
const float scalarOffset=1E6;
uint32_t loadMarkersFromDict(const py::dict& inputLUT,std::vector<KeyValue>& insert_kvs){
uint32_t num_entries=0;
for (auto it : inputLUT) {
std::string markerStr=py::cast<std::string>(it.first);
uint32_t marker = (uint32_t)stoi(markerStr);
float value = py::cast<float> (it.second);
KeyValue tempKeyValue={marker,uint32_t(value*scalarOffset)};
insert_kvs.push_back({tempKeyValue});
num_entries++;
}
return num_entries;
}
int run(const py::dict& inputLUT,py::array_t<uint32_t> inputArray){
py::buffer_info buf1 = inputArray.request();
uint32_t ARRAY_SIZE=buf1.size;
uint32_t *ptrToNumpyArray = (uint32_t *) buf1.ptr;
std::vector<KeyValue> insert_kvs,query_kvs;
uint32_t num_entries=loadMarkersFromDict(inputLUT,insert_kvs);
cout << "\n\nfirst in insert_kvs: " << insert_kvs.front().key << " , "<< float(insert_kvs.front().value)/scalarOffset << endl;
cout << "2nd in insert_kvs: " << insert_kvs[2].key << " , "<< float(insert_kvs[2].value)/scalarOffset <<endl;
cout << "last in insert_kvs: " << insert_kvs.back().key << " , "<< float(insert_kvs.back().value)/scalarOffset << "\n\n"<<endl;
const uint32_t ARRAY_BYTES = ARRAY_SIZE * sizeof(uint32_t);
uint32_t* h_in=new uint32_t[ARRAY_SIZE];
for (int i=0;i<ARRAY_SIZE;i++){
h_in[i]=(uint32_t) ptrToNumpyArray[i];
}
cout<<"starting insertion"<<endl;
KeyValue* pHashTable = create_hashtable(); //TODO add check for hash table size vs marker count
// Insert items into the hash table
const uint32_t num_insert_batches = std::min(16,(int)ARRAY_SIZE); // emtpy inserts (for small ARRAY_SIZE) will cause silent crash
uint32_t num_inserts_per_batch = (uint32_t)insert_kvs.size() / num_insert_batches;
for (uint32_t i = 0; i < num_insert_batches; i++)
{
insert_hashtable(
pHashTable,
insert_kvs.data() + i * num_inserts_per_batch,
num_inserts_per_batch
);
}
cout << "\n\n lookup directly on array, parallel: \n" << endl;
uint32_t num_lookup_per_batch = ARRAY_SIZE / num_insert_batches;
for (uint32_t i = 0; i < num_insert_batches; i++)
{
lookup_hashtable_on_array(
pHashTable,
&ptrToNumpyArray[i*num_lookup_per_batch],
num_lookup_per_batch,
num_lookup_per_batch*sizeof(uint32_t)
);
}
destroy_hashtable(pHashTable);
return scalarOffset;
}
PYBIND11_MODULE(cudaProcesses, m) {
m.doc() = "cudaProcesses pybind11 example"; // optional module docstring
m.def("run", &run, "load image, change it and write to disk");
}