1 #include <sys/wait.h> 2 #include <chrono> 3 #include <hip/hip_runtime.h> 4 #include <hip/hiprtc.h> 5 #include "hip_backend.h" 6 7 #define CHECK_RET_CODE(call, ret_code) \ 8 { \ 9 if ((call) != ret_code) { \ 10 std::cout << "Failed in call: " << #call << std::endl; \ 11 std::abort(); \ 12 } \ 13 } 14 #define HIP_CHECK(call) CHECK_RET_CODE(call, hipSuccess) 15 #define HIPRTC_CHECK(call) CHECK_RET_CODE(call, HIPRTC_SUCCESS) 16 17 uint64_t timeSinceEpochMillisec() { 18 using namespace std::chrono; 19 return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); 20 } 21 22 HIPBackend::HIPProgram::HIPKernel::HIPBuffer::HIPBuffer(Backend::Program::Kernel *kernel, Arg_s *arg) 23 : Buffer(kernel, arg), devicePtr() { 24 /* 25 * (void *) arg->value.buffer.memorySegment, 26 * (size_t) arg->value.buffer.sizeInBytes); 27 */ 28 #ifdef VERBOSE 29 std::cout << "hipMalloc()" << std::endl; 30 #endif 31 HIP_CHECK(hipMalloc(&devicePtr, (size_t) arg->value.buffer.sizeInBytes)); 32 #ifdef VERBOSE 33 std::cout << "devptr " << std::hex<< (long)devicePtr <<std::dec <<std::endl; 34 #endif 35 arg->value.buffer.vendorPtr = static_cast<void *>(this); 36 } 37 38 HIPBackend::HIPProgram::HIPKernel::HIPBuffer::~HIPBuffer() { 39 40 #ifdef VERBOSE 41 std::cout << "hipFree()" 42 << "devptr " << std::hex<< (long)devicePtr <<std::dec 43 << std::endl; 44 #endif 45 HIP_CHECK(hipFree(devicePtr)); 46 arg->value.buffer.vendorPtr = nullptr; 47 } 48 49 void HIPBackend::HIPProgram::HIPKernel::HIPBuffer::copyToDevice() { 50 auto hipKernel = dynamic_cast<HIPKernel*>(kernel); 51 #ifdef VERBOSE 52 std::cout << "copyToDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " " 53 << "devptr " << std::hex<< (long)devicePtr <<std::dec 54 << std::endl; 55 #endif 56 char *ptr = (char*)arg->value.buffer.memorySegment; 57 58 unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16); 59 unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8); 60 61 if (ifacefacade1 != 0x1face00000facadeL && ifacefacade1 != ifacefacade2) { 62 std::cerr<<"End of buf marker before HtoD"<< std::hex << ifacefacade1 << ifacefacade2<< " buffer corrupt !" <<std::endl 63 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 64 exit(-1); 65 } 66 67 HIP_CHECK(hipMemcpyHtoDAsync(devicePtr, arg->value.buffer.memorySegment, arg->value.buffer.sizeInBytes, hipKernel->hipStream)); 68 } 69 70 void HIPBackend::HIPProgram::HIPKernel::HIPBuffer::copyFromDevice() { 71 auto hipKernel = dynamic_cast<HIPKernel*>(kernel); 72 #ifdef VERBOSE 73 std::cout << "copyFromDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " " 74 << "devptr " << std::hex<< (long)devicePtr <<std::dec 75 << std::endl; 76 #endif 77 char *ptr = (char*)arg->value.buffer.memorySegment; 78 79 unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16); 80 unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8); 81 82 if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) { 83 std::cerr<<"end of buf marker before DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl 84 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 85 exit(-1); 86 } 87 HIP_CHECK(hipMemcpyDtoHAsync(arg->value.buffer.memorySegment, devicePtr, arg->value.buffer.sizeInBytes, hipKernel->hipStream)); 88 89 ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16); 90 ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8); 91 92 if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) { 93 std::cerr<<"end of buf marker after DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl 94 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 95 exit(-1); 96 } 97 } 98 99 HIPBackend::HIPProgram::HIPKernel::HIPKernel(Backend::Program *program, char * name, hipFunction_t kernel) 100 : Backend::Program::Kernel(program, name), kernel(kernel),hipStream() { 101 } 102 103 HIPBackend::HIPProgram::HIPKernel::~HIPKernel() = default; 104 105 long HIPBackend::HIPProgram::HIPKernel::ndrange(void *argArray) { 106 #ifdef VERBOSE 107 std::cout << "ndrange(" << range << ") " << name << std::endl; 108 #endif 109 110 hipStreamCreate(&hipStream); 111 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 112 void *argslist[argSled.argc()]; 113 NDRange *ndrange = nullptr; 114 #ifdef VERBOSE 115 std::cerr << "there are " << argSled.argc() << "args " << std::endl; 116 #endif 117 for (int i = 0; i < argSled.argc(); i++) { 118 Arg_s *arg = argSled.arg(i); 119 switch (arg->variant) { 120 case '&': { 121 if (arg->idx == 0){ 122 ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment); 123 } 124 auto hipBuffer = new HIPBuffer(this, arg); 125 hipBuffer->copyToDevice(); 126 argslist[arg->idx] = static_cast<void *>(&hipBuffer->devicePtr); 127 break; 128 } 129 case 'I': 130 case 'F': 131 case 'J': 132 case 'D': 133 case 'C': 134 case 'S': { 135 argslist[arg->idx] = static_cast<void *>(&arg->value); 136 break; 137 } 138 default: { 139 std::cerr << " unhandled variant " << (char) arg->variant << std::endl; 140 break; 141 } 142 } 143 } 144 145 int range = ndrange->maxX; 146 int rangediv1024 = range / 1024; 147 int rangemod1024 = range % 1024; 148 if (rangemod1024 > 0) { 149 rangediv1024++; 150 } 151 152 #ifdef VERBOSE 153 std::cout << "Running the kernel..." << std::endl; 154 std::cout << " Requested range = " << range << std::endl; 155 std::cout << " Range mod 1024 = " << rangemod1024 << std::endl; 156 std::cout << " Actual range 1024 = " << (rangediv1024 * 1024) << std::endl; 157 #endif 158 159 HIP_CHECK(hipModuleLaunchKernel(kernel, rangediv1024, 1, 1, 1024, 1, 1, 0, hipStream, argslist, 0)); 160 161 #ifdef VERBOSE 162 std::cout << "Kernel complete..."<<hipGetErrorString(t)<<std::endl; 163 #endif 164 165 for (int i = 0; i < argSled.argc(); i++) { 166 Arg_s *arg = argSled.arg(i); 167 if (arg->variant == '&') { 168 static_cast<HIPBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice(); 169 170 } 171 } 172 173 for (int i = 0; i < argSled.argc(); i++) { 174 Arg_s *arg = argSled.arg(i); 175 if (arg->variant == '&') { 176 delete static_cast<HIPBuffer *>(arg->value.buffer.vendorPtr); 177 arg->value.buffer.vendorPtr = nullptr; 178 } 179 } 180 HIP_CHECK(hipStreamSynchronize(hipStream)); 181 HIP_CHECK(hipStreamDestroy(hipStream)); 182 183 return (long) 0; 184 } 185 186 187 HIPBackend::HIPProgram::HIPProgram(Backend *backend, BuildInfo *buildInfo, hipModule_t module) 188 : Backend::Program(backend, buildInfo), module(module) { 189 } 190 191 HIPBackend::HIPProgram::~HIPProgram() = default; 192 193 long HIPBackend::HIPProgram::getKernel(int nameLen, char *name) { 194 195 hipFunction_t kernel; 196 HIP_CHECK(hipModuleGetFunction(&kernel, module, name)); 197 long kernelHandle = reinterpret_cast<long>(new HIPKernel(this, name, kernel)); 198 199 return kernelHandle; 200 } 201 202 bool HIPBackend::HIPProgram::programOK() { 203 return true; 204 } 205 206 HIPBackend::HIPBackend(HIPBackend::HIPConfig *hipConfig, int 207 configSchemaLen, char *configSchema) 208 : Backend((Backend::Config*) hipConfig, configSchemaLen, configSchema), device(),context() { 209 #ifdef VERBOSE 210 std::cout << "HIPBackend constructor " << ((hipConfig == nullptr) ? "hipConfig== null" : "got hipConfig") 211 << std::endl; 212 #endif 213 int deviceCount = 0; 214 hipError_t err = hipInit(0); 215 if (err == HIP_SUCCESS) { 216 hipGetDeviceCount(&deviceCount); 217 std::cout << "HIPBackend device count" << std::endl; 218 hipDeviceGet(&device, 0); 219 std::cout << "HIPBackend device ok" << std::endl; 220 hipCtxCreate(&context, 0, device); 221 std::cout << "HIPBackend context created ok" << std::endl; 222 } else { 223 std::cout << "HIPBackend failed, we seem to have the runtime library but no device, no context, nada " 224 << std::endl; 225 exit(1); 226 } 227 } 228 229 HIPBackend::HIPBackend() : HIPBackend(nullptr, 0, nullptr) { 230 231 } 232 233 HIPBackend::~HIPBackend() { 234 #ifdef VERBOSE 235 std::cout << "freeing context" << std::endl; 236 #endif 237 auto status = hipCtxDestroy(context); 238 if (HIP_SUCCESS != status) { 239 std::cerr << "hipCtxDestroy(() HIP error = " << status 240 <<" " << hipGetErrorString(static_cast<hipError_t>(status)) 241 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 242 exit(-1); 243 } 244 } 245 246 int HIPBackend::getMaxComputeUnits() { 247 std::cout << "getMaxComputeUnits()" << std::endl; 248 int value = 1; 249 return value; 250 } 251 252 void HIPBackend::info() { 253 char name[100]; 254 hipDeviceGetName(name, sizeof(name), device); 255 std::cout << "> Using device 0: " << name << std::endl; 256 257 // get compute capabilities and the devicename 258 int major = 0, minor = 0; 259 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, device); 260 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, device); 261 std::cout << "> HIP Device has major=" << major << " minor=" << minor << " compute capability" << std::endl; 262 263 int warpSize; 264 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device); 265 std::cout << "> HIP Device has wave front size " << warpSize << std::endl; 266 267 int threadsPerBlock; 268 hipDeviceGetAttribute(&threadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, device); 269 std::cout << "> HIP Device has threadsPerBlock " << threadsPerBlock << std::endl; 270 271 int cores; 272 hipDeviceGetAttribute(&cores, hipDeviceAttributeMultiprocessorCount, device); 273 std::cout << "> HIP Cores " << cores << std::endl; 274 275 size_t totalGlobalMem; 276 hipDeviceTotalMem(&totalGlobalMem, device); 277 std::cout << " Total amount of global memory: " << (unsigned long long) totalGlobalMem << std::endl; 278 std::cout << " 64-bit Memory Address: " << 279 ((totalGlobalMem > (unsigned long long) 4 * 1024 * 1024 * 1024L) ? "YES" : "NO") << std::endl; 280 281 } 282 283 long HIPBackend::compileProgram(int len, char *source) { 284 285 #ifdef VERBOSE 286 std::cout << "inside compileProgram" << std::endl; 287 std::cout << "hip " << source << std::endl; 288 #endif 289 hiprtcProgram prog; 290 auto status = hiprtcCreateProgram(&prog, 291 source, 292 "hip_kernel.hip", 293 0, 294 nullptr, 295 nullptr); 296 if (status != HIPRTC_SUCCESS){ 297 size_t logSize; 298 hiprtcGetProgramLogSize(prog, &logSize); 299 300 std::cerr << "hiprtcCreateProgram(() HIP error = " << std::endl; 301 if (logSize) { 302 std::string log(logSize, '\0'); 303 hiprtcGetProgramLog(prog, &log[0]); 304 std::cerr <<" " << log 305 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 306 } 307 exit(-1); 308 } 309 310 status = hiprtcCompileProgram(prog, 0, nullptr); 311 if (status != HIPRTC_SUCCESS){ 312 size_t logSize; 313 hiprtcGetProgramLogSize(prog, &logSize); 314 315 std::cerr << "hiprtcCompileProgram(() HIP error = " << std::endl; 316 if (logSize) { 317 std::string log(logSize, '\0'); 318 hiprtcGetProgramLog(prog, &log[0]); 319 std::cerr <<" " << log 320 <<" " << __FILE__ << " line " << __LINE__ << std::endl; 321 } 322 exit(-1); 323 } 324 325 size_t codeSize; 326 hiprtcGetCodeSize(prog, &codeSize); 327 #ifdef VERBOSE 328 std::cerr << "HIP compiled code size " << codeSize << std::endl; 329 #endif 330 331 std::vector<char> kernel_binary(codeSize); 332 hiprtcGetCode(prog, kernel_binary.data()); 333 334 hipModule_t module; 335 hipModuleLoadData(&module, kernel_binary.data()); 336 hiprtcDestroyProgram(&prog); 337 338 return reinterpret_cast<long>(new HIPProgram(this, nullptr, module)); 339 } 340 341 long getBackend(void *config, int configSchemaLen, char *configSchema) { 342 long backendHandle = reinterpret_cast<long>( 343 new HIPBackend(static_cast<HIPBackend::HIPConfig *>(config), configSchemaLen, 344 configSchema)); 345 #ifdef VERBOSE 346 std::cout << "getBackend() -> backendHandle=" << std::hex << backendHandle << std::dec << std::endl; 347 #endif 348 return backendHandle; 349 } 350 351 352