-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcaldgemm_cal.h
182 lines (160 loc) · 6.14 KB
/
caldgemm_cal.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/**
* Interface of the CALDGEMM library.
*
* Copyright 2015:
* - David Rohr ([email protected])
* - Matthias Bach ([email protected])
* - Matthias Kretz ([email protected])
*
* This file is part of CALDGEMM.
*
* CALDGEMM is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CALDGEMM is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with CALDGEMM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CALDGEMM_CAL_H
#define CALDGEMM_CAL_H
#include <cal.h>
#include <cal_ext.h>
#include <calcl.h>
#include "cal_private_ext.h"
#include <emmintrin.h>
#include "caldgemm.h"
class caldgemm_cal : public caldgemm
{
public:
caldgemm_cal();
virtual ~caldgemm_cal();
virtual double getMaxGPUTemperature();
private:
int adl_util_initialized;
virtual int UseOutputPthreads();
virtual int UseInputPthreads();
virtual int UseMutexPerDevice();
unsigned int numInputs, numOutputs, numConstantBuffers;
#ifdef CALDGEMM_44
#ifdef CALDGEMM_SINGLE_BUFFER
static const unsigned int dwBuffersA = 1;
#elif !defined(CALDGEMM_48) & !defined(CALDGEMM_DOUBLE_BUFFERS)
static const unsigned int dwBuffersA = 2;
#else
static const unsigned int dwBuffersA = 4;
#endif
#ifdef CALDGEMM_SINGLE_BUFFER
static const unsigned int dwBuffersB = 1;
#elif !defined(CALDGEMM_84) & !defined(CALDGEMM_DOUBLE_BUFFERS)
static const unsigned int dwBuffersB = 2;
#else
static const unsigned int dwBuffersB = 4;
#endif
#else //CALDGEMM_44
#ifdef CALDGEMM_TRANSPOSED_A
static const unsigned int dwBuffersA = 2;
#else
static const unsigned int dwBuffersA = 8;
#endif
static const unsigned int dwBuffersB = 2;
#endif //CALDGEMM_44
#ifdef CALDGEMM_USE_MEMEXPORT
static const unsigned int dwBuffersC = 1;
#else
static const unsigned int dwBuffersC = 8;
#endif
struct BufferProperties
{
union
{
float* ptr_float;
unsigned int* ptr_uint;
int* ptr_int;
double* ptr_double;
char* ptr_char;
void* ptr_void;
};
unsigned int Width;
unsigned int Height;
unsigned int VectorSize;
unsigned int DataSize;
bool CALMemory;
CALresource res;
CALmem mem;
CALmem dstMem;
unsigned int pitch;
CALresource tmpres;
CALmem tmpmem;
BufferProperties* conversionBuffer;
};
int divideBuffer(BufferProperties* dst, double* src, int width, int height, int gpu_width, int gpu_height, int pitch, int numBuffers, bool transpose CALDGEMM_DIVBUFA);
int mergeBuffers(double* dst, BufferProperties* src, int width, int height, int gpu_width, int gpu_height, int pitch, int numBuffers);
void checkCalPatch();
void cal_init_constant_data(BufferProperties* &data, double alpha);
virtual int DGEMM_prepare_backend(size_t k, int j, unsigned int num_device, bool prepareM, bool prepareN, bool buffersSufficiant, bool buffersSufficiant0 CALDGEMM_DIVBUFA);
struct CALVersion {unsigned int major, minor, imp;};
virtual int Initialize (bool nocalinit);
int SetupKernel(const char* ILKernel, CALmodule* module, CALcontext* ctx, unsigned int device_num, bool disassemble = false);
int RunProgram(CALcontext* ctx, CALmodule* module, unsigned int Width, unsigned int Height, CALevent* event);
int CleanupData(CALcontext* ctx, CALresource* &resourceHandler, BufferProperties* &data, unsigned int numHandles, int nContext, unsigned int num_device);
int Cleanup(CALdevice* device, CALcontext* ctx, CALmodule* module, CALresource* &resourceHandler, BufferProperties* &data, unsigned int numHandles, int nContext, unsigned int num_device);
int SetupData(CALmodule* module, CALresource* &_Res, BufferProperties* &data, CALdevice* device, CALcontext* ctx, unsigned int numInputs, unsigned int numOutputs, unsigned int numConstantBuffers, CALname** ctxProgNames, int nContext, unsigned int num_device);
int CopyDataFromGPU(int nDevice, CALresource* _Res, BufferProperties* data, unsigned int num, int nContext, size_t lastm, size_t lastn, int mustlock = 0);
int CopyDataToGPU(int nDevice, CALresource* _Res, BufferProperties* data, unsigned int num, int nContext, bool constants, BufferProperties* dest_data = NULL);
int ValidateCALRuntime();
class eventCls
{
public:
#ifdef CALDGEMM_QUERY_ALL_EVENTS
CALevent events[13];
volatile int nEvents;
inline CALevent* GetNextEvent()
{
if (nEvents == 13)
{
fprintf(STD_OUT, "Event buffer overflow\n");
exit(1);
}
return(&events[nEvents++]);
}
inline void Reset() {nEvents = 0;}
#else
CALevent events[1];
static const int nEvents = 1;
inline CALevent* GetNextEvent() {return(&events[0]);}
inline void Reset() {};
#endif
};
PFNCALCTXWAITFOREVENTS calCtxWaitForEvents;
BufferProperties* datas[max_devices][max_bbuffers];
CALdevice devices[max_devices];
CALcontext ctxs[max_devices];
CALresource* resourceHandlers[max_devices][max_bbuffers];
CALmodule modules[max_devices][kernel_count];
CALmodule modulesConvert[max_devices];
CALmodule fakeModule;
CALname *progNames[max_devices][kernel_count];
CALname progNamesConvert[max_devices][2 * dwBuffersA];
eventCls events[max_devices][obuffercount];
unsigned int device_nums[max_devices];
static const char *ILKernel, *ILKernelALPHA1, *ILKernelLinpack, *ILFakeKernel, *ILConvertKernel;
virtual int ValidateRuntime();
virtual int CheckDevices();
virtual int InitDevices();
virtual int ReinitDevices();
virtual int InitConstantData(double alpha);
virtual int ExecuteKernels(caldgemm::DGEMMPrepareAndExecuteTask& Task, int blockm, int blockn);
virtual int ExitRuntime();
virtual int ExitDevices();
virtual int WaitForEvent(int, int, int lock = 0);
virtual int FetchResult(int device, int j, int m, int n, int mustlock = 0);
virtual int CheckDMAQueue(int device, int forcej = -1);
virtual int RunMergeBuffers(double* dst, int device, int j, int width, int height, int gpu_width, int gpu_height, int pitch);
};
#endif