1 #include <sys/wait.h>
  2 #include <chrono>
  3 #include <hip/hip_runtime.h>
  4 #include <hip/hiprtc.h>
  5 #include "hip_backend.h"
  6 
  7 #define CHECK_RET_CODE(call, ret_code)                                                             \
  8   {                                                                                                \
  9     if ((call) != ret_code) {                                                                      \
 10       std::cout << "Failed in call: " << #call << std::endl;                                       \
 11       std::abort();                                                                                \
 12     }                                                                                              \
 13   }
 14 #define HIP_CHECK(call) CHECK_RET_CODE(call, hipSuccess)
 15 #define HIPRTC_CHECK(call) CHECK_RET_CODE(call, HIPRTC_SUCCESS)
 16 
 17 uint64_t timeSinceEpochMillisec() {
 18     using namespace std::chrono;
 19     return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
 20 }
 21 
 22 HIPBackend::HIPProgram::HIPKernel::HIPBuffer::HIPBuffer(Backend::Program::Kernel *kernel, Arg_s *arg)
 23         : Buffer(kernel, arg), devicePtr() {
 24     /*
 25      *   (void *) arg->value.buffer.memorySegment,
 26      *   (size_t) arg->value.buffer.sizeInBytes);
 27      */
 28 #ifdef VERBOSE
 29     std::cout << "hipMalloc()" << std::endl;
 30 #endif
 31     HIP_CHECK(hipMalloc(&devicePtr, (size_t) arg->value.buffer.sizeInBytes));
 32 #ifdef VERBOSE
 33     std::cout << "devptr " << std::hex<<  (long)devicePtr <<std::dec <<std::endl;
 34 #endif
 35     arg->value.buffer.vendorPtr = static_cast<void *>(this);
 36 }
 37 
 38 HIPBackend::HIPProgram::HIPKernel::HIPBuffer::~HIPBuffer() {
 39 
 40 #ifdef VERBOSE
 41     std::cout << "hipFree()"
 42               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 43               << std::endl;
 44 #endif
 45     HIP_CHECK(hipFree(devicePtr));
 46     arg->value.buffer.vendorPtr = nullptr;
 47 }
 48 
 49 void HIPBackend::HIPProgram::HIPKernel::HIPBuffer::copyToDevice() {
 50     auto hipKernel = dynamic_cast<HIPKernel*>(kernel);
 51 #ifdef VERBOSE
 52     std::cout << "copyToDevice() 0x"   << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
 53               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 54               << std::endl;
 55 #endif
 56     char *ptr = (char*)arg->value.buffer.memorySegment;
 57 
 58     unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 59     unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 60 
 61     if (ifacefacade1 != 0x1face00000facadeL && ifacefacade1 != ifacefacade2) {
 62         std::cerr<<"End of buf marker before HtoD"<< std::hex << ifacefacade1 << ifacefacade2<< " buffer corrupt !" <<std::endl
 63                 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 64         exit(-1);
 65     }
 66 
 67     HIP_CHECK(hipMemcpyHtoDAsync(devicePtr, arg->value.buffer.memorySegment, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
 68 }
 69 
 70 void HIPBackend::HIPProgram::HIPKernel::HIPBuffer::copyFromDevice() {
 71     auto hipKernel = dynamic_cast<HIPKernel*>(kernel);
 72 #ifdef VERBOSE
 73     std::cout << "copyFromDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
 74               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 75               << std::endl;
 76 #endif
 77     char *ptr = (char*)arg->value.buffer.memorySegment;
 78 
 79     unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 80     unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 81 
 82     if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
 83         std::cerr<<"end of buf marker before  DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
 84                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 85         exit(-1);
 86     }
 87     HIP_CHECK(hipMemcpyDtoHAsync(arg->value.buffer.memorySegment, devicePtr, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
 88 
 89     ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 90     ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 91 
 92     if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
 93         std::cerr<<"end of buf marker after  DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
 94                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 95         exit(-1);
 96     }
 97 }
 98 
 99 HIPBackend::HIPProgram::HIPKernel::HIPKernel(Backend::Program *program, char * name, hipFunction_t kernel)
100         : Backend::Program::Kernel(program, name), kernel(kernel),hipStream() {
101 }
102 
103 HIPBackend::HIPProgram::HIPKernel::~HIPKernel() = default;
104 
105 long HIPBackend::HIPProgram::HIPKernel::ndrange(void *argArray) {
106 #ifdef VERBOSE
107     std::cout << "ndrange(" << range << ") " << name << std::endl;
108 #endif
109 
110     hipStreamCreate(&hipStream);
111     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
112     void *argslist[argSled.argc()];
113     NDRange *ndrange = nullptr;
114 #ifdef VERBOSE
115     std::cerr << "there are " << argSled.argc() << "args " << std::endl;
116 #endif
117     for (int i = 0; i < argSled.argc(); i++) {
118         Arg_s *arg = argSled.arg(i);
119         switch (arg->variant) {
120             case '&': {
121                 if (arg->idx == 0){
122                     ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
123                 }
124                 auto hipBuffer = new HIPBuffer(this, arg);
125                 hipBuffer->copyToDevice();
126                 argslist[arg->idx] = static_cast<void *>(&hipBuffer->devicePtr);
127                 break;
128             }
129             case 'I':
130             case 'F':
131             case 'J':
132             case 'D':
133             case 'C':
134             case 'S': {
135                 argslist[arg->idx] = static_cast<void *>(&arg->value);
136                 break;
137             }
138             default: {
139                 std::cerr << " unhandled variant " << (char) arg->variant << std::endl;
140                 break;
141             }
142         }
143     }
144 
145     int range = ndrange->maxX;
146     int rangediv1024 = range / 1024;
147     int rangemod1024 = range % 1024;
148     if (rangemod1024 > 0) {
149         rangediv1024++;
150     }
151 
152 #ifdef VERBOSE
153     std::cout << "Running the kernel..." << std::endl;
154     std::cout << "   Requested range   = " << range << std::endl;
155     std::cout << "   Range mod 1024    = " << rangemod1024 << std::endl;
156     std::cout << "   Actual range 1024 = " << (rangediv1024 * 1024) << std::endl;
157 #endif
158 
159     HIP_CHECK(hipModuleLaunchKernel(kernel, rangediv1024, 1, 1, 1024, 1, 1, 0, hipStream, argslist, 0));
160 
161 #ifdef VERBOSE
162     std::cout << "Kernel complete..."<<hipGetErrorString(t)<<std::endl;
163 #endif
164 
165     for (int i = 0; i < argSled.argc(); i++) {
166         Arg_s *arg = argSled.arg(i);
167         if (arg->variant == '&') {
168             static_cast<HIPBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice();
169 
170         }
171     }
172 
173     for (int i = 0; i < argSled.argc(); i++) {
174         Arg_s *arg = argSled.arg(i);
175         if (arg->variant == '&') {
176             delete static_cast<HIPBuffer *>(arg->value.buffer.vendorPtr);
177             arg->value.buffer.vendorPtr = nullptr;
178         }
179     }
180     HIP_CHECK(hipStreamSynchronize(hipStream));
181     HIP_CHECK(hipStreamDestroy(hipStream));
182 
183     return (long) 0;
184 }
185 
186 
187 HIPBackend::HIPProgram::HIPProgram(Backend *backend, BuildInfo *buildInfo, hipModule_t module)
188         : Backend::Program(backend, buildInfo), module(module) {
189 }
190 
191 HIPBackend::HIPProgram::~HIPProgram() = default;
192 
193 long HIPBackend::HIPProgram::getKernel(int nameLen, char *name) {
194 
195     hipFunction_t kernel;
196     HIP_CHECK(hipModuleGetFunction(&kernel, module, name));
197     long kernelHandle =  reinterpret_cast<long>(new HIPKernel(this, name, kernel));
198 
199     return kernelHandle;
200 }
201 
202 bool HIPBackend::HIPProgram::programOK() {
203     return true;
204 }
205 
206 HIPBackend::HIPBackend(HIPBackend::HIPConfig *hipConfig, int
207 configSchemaLen, char *configSchema)
208         : Backend((Backend::Config*) hipConfig, configSchemaLen, configSchema), device(),context()  {
209 #ifdef VERBOSE
210     std::cout << "HIPBackend constructor " << ((hipConfig == nullptr) ? "hipConfig== null" : "got hipConfig")
211               << std::endl;
212 #endif
213     int deviceCount = 0;
214     hipError_t err = hipInit(0);
215     if (err == HIP_SUCCESS) {
216         hipGetDeviceCount(&deviceCount);
217         std::cout << "HIPBackend device count" << std::endl;
218         hipDeviceGet(&device, 0);
219         std::cout << "HIPBackend device ok" << std::endl;
220         hipCtxCreate(&context, 0, device);
221         std::cout << "HIPBackend context created ok" << std::endl;
222     } else {
223         std::cout << "HIPBackend failed, we seem to have the runtime library but no device, no context, nada "
224                   << std::endl;
225         exit(1);
226     }
227 }
228 
229 HIPBackend::HIPBackend() : HIPBackend(nullptr, 0, nullptr) {
230 
231 }
232 
233 HIPBackend::~HIPBackend() {
234 #ifdef VERBOSE
235     std::cout << "freeing context" << std::endl;
236 #endif
237     auto status = hipCtxDestroy(context);
238     if (HIP_SUCCESS != status) {
239         std::cerr << "hipCtxDestroy(() HIP error = " << status
240                   <<" " << hipGetErrorString(static_cast<hipError_t>(status))
241                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
242         exit(-1);
243     }
244 }
245 
246 int HIPBackend::getMaxComputeUnits() {
247     std::cout << "getMaxComputeUnits()" << std::endl;
248     int value = 1;
249     return value;
250 }
251 
252 void HIPBackend::info() {
253     char name[100];
254     hipDeviceGetName(name, sizeof(name), device);
255     std::cout << "> Using device 0: " << name << std::endl;
256 
257     // get compute capabilities and the devicename
258     int major = 0, minor = 0;
259     hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, device);
260     hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, device);
261     std::cout << "> HIP Device has major=" << major << " minor=" << minor << " compute capability" << std::endl;
262 
263     int warpSize;
264     hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device);
265     std::cout << "> HIP Device has wave front size " << warpSize << std::endl;
266 
267     int threadsPerBlock;
268     hipDeviceGetAttribute(&threadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, device);
269     std::cout << "> HIP Device has threadsPerBlock " << threadsPerBlock << std::endl;
270 
271     int cores;
272     hipDeviceGetAttribute(&cores, hipDeviceAttributeMultiprocessorCount, device);
273     std::cout << "> HIP Cores " << cores << std::endl;
274 
275     size_t totalGlobalMem;
276     hipDeviceTotalMem(&totalGlobalMem, device);
277     std::cout << "  Total amount of global memory:   " << (unsigned long long) totalGlobalMem << std::endl;
278     std::cout << "  64-bit Memory Address:           " <<
279               ((totalGlobalMem > (unsigned long long) 4 * 1024 * 1024 * 1024L) ? "YES" : "NO") << std::endl;
280 
281 }
282 
283 long HIPBackend::compileProgram(int len, char *source) {
284 
285 #ifdef VERBOSE
286     std::cout << "inside compileProgram" << std::endl;
287     std::cout << "hip " << source << std::endl;
288 #endif
289     hiprtcProgram prog;
290     auto status = hiprtcCreateProgram(&prog,
291                     source,
292                     "hip_kernel.hip",
293                     0,
294                     nullptr,
295                     nullptr);
296     if (status != HIPRTC_SUCCESS){
297         size_t logSize;
298         hiprtcGetProgramLogSize(prog, &logSize);
299 
300         std::cerr << "hiprtcCreateProgram(() HIP error = " << std::endl;
301         if (logSize) {
302             std::string log(logSize, '\0');
303             hiprtcGetProgramLog(prog, &log[0]);
304             std::cerr <<" " << log
305                       <<" " << __FILE__ << " line " << __LINE__ << std::endl;
306         }
307         exit(-1);
308     }
309 
310     status = hiprtcCompileProgram(prog, 0, nullptr);
311     if (status != HIPRTC_SUCCESS){
312         size_t logSize;
313         hiprtcGetProgramLogSize(prog, &logSize);
314 
315         std::cerr << "hiprtcCompileProgram(() HIP error = " << std::endl;
316         if (logSize) {
317             std::string log(logSize, '\0');
318             hiprtcGetProgramLog(prog, &log[0]);
319             std::cerr <<" " << log
320                       <<" " << __FILE__ << " line " << __LINE__ << std::endl;
321         }
322         exit(-1);
323     }
324 
325     size_t codeSize;
326     hiprtcGetCodeSize(prog, &codeSize);
327 #ifdef VERBOSE
328     std::cerr << "HIP compiled code size " << codeSize << std::endl;
329 #endif
330 
331     std::vector<char> kernel_binary(codeSize);
332     hiprtcGetCode(prog, kernel_binary.data());
333 
334     hipModule_t module;
335     hipModuleLoadData(&module, kernel_binary.data());
336     hiprtcDestroyProgram(&prog);
337 
338     return reinterpret_cast<long>(new HIPProgram(this, nullptr, module));
339 }
340 
341 long getBackend(void *config, int configSchemaLen, char *configSchema) {
342     long backendHandle = reinterpret_cast<long>(
343             new HIPBackend(static_cast<HIPBackend::HIPConfig *>(config), configSchemaLen,
344                             configSchema));
345 #ifdef VERBOSE
346     std::cout << "getBackend() -> backendHandle=" << std::hex << backendHandle << std::dec << std::endl;
347 #endif
348     return backendHandle;
349 }
350 
351 
352