diff --git a/hipamd/include/hcc_detail/hip_runtime_api.h b/hipamd/include/hcc_detail/hip_runtime_api.h index 0316f73b57..9917619039 100644 --- a/hipamd/include/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hcc_detail/hip_runtime_api.h @@ -52,9 +52,15 @@ typedef struct ihipDevice_t *hipDevice_t; typedef struct ihipStream_t *hipStream_t; -typedef uint64_t hipFunction; +typedef struct { +uint64_t kernel; +uint64_t symbol; +}hipFunction; -typedef uint64_t hipModule; +typedef struct{ +uint64_t executable; +uint64_t object; +} hipModule; typedef struct hipEvent_t { struct ihipEvent_t *_handle; diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 1421eb0329..48dc6574a0 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -20,6 +20,7 @@ THE SOFTWARE. #include "hip_runtime.h" #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" +#include "hsa/amd_hsa_kernel_code.h" #include "hcc_detail/hip_hcc.h" #include "hcc_detail/trace_helper.h" #include @@ -27,66 +28,71 @@ THE SOFTWARE. //TODO Use Pool APIs from HCC to get memory regions. namespace hipdrv{ -hsa_status_t findSystemRegions(hsa_region_t region, void *data){ - hsa_region_segment_t segment_id; - hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); - if(segment_id != HSA_REGION_SEGMENT_GLOBAL){ - return HSA_STATUS_SUCCESS; - } + hsa_status_t findSystemRegions(hsa_region_t region, void *data){ + hsa_region_segment_t segment_id; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); - hsa_region_global_flag_t flags; - hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if(segment_id != HSA_REGION_SEGMENT_GLOBAL){ + return HSA_STATUS_SUCCESS; + } - hsa_region_t *reg = (hsa_region_t*)data; + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); - if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){ - *reg = region; - } + hsa_region_t *reg = (hsa_region_t*)data; - return HSA_STATUS_SUCCESS; -} + if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){ + *reg = region; + } -hsa_status_t findKernArgRegions(hsa_region_t region, void *data){ - hsa_region_segment_t segment_id; - hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); + return HSA_STATUS_SUCCESS; + } - if(segment_id != HSA_REGION_SEGMENT_GLOBAL){ - return HSA_STATUS_SUCCESS; - } + hsa_status_t findKernArgRegions(hsa_region_t region, void *data){ + hsa_region_segment_t segment_id; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); - hsa_region_global_flag_t flags; - hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if(segment_id != HSA_REGION_SEGMENT_GLOBAL){ + return HSA_STATUS_SUCCESS; + } - hsa_region_t *reg = (hsa_region_t*)data; + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); - if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){ - *reg = region; - } + hsa_region_t *reg = (hsa_region_t*)data; - return HSA_STATUS_SUCCESS; -} + if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){ + *reg = region; + } + return HSA_STATUS_SUCCESS; + } -} +} // End namespace hipdrv hipError_t hipModuleLoad(hipModule *module, const char *fname){ HIP_INIT_API(fname); hipError_t ret = hipSuccess; + if(module == NULL){ - ret = hipErrorInvalidValue; + return hipErrorInvalidValue; } + auto ctx = ihipGetTlsDefaultCtx(); if(ctx == nullptr){ ret = hipErrorInvalidContext; + }else{ int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); std::ifstream in(fname, std::ios::binary | std::ios::ate); + if(!in){ return hipErrorFileNotFound; + }else{ size_t size = std::string::size_type(in.tellg()); void *p = NULL; @@ -97,22 +103,35 @@ hipError_t hipModuleLoad(hipModule *module, const char *fname){ if(status != HSA_STATUS_SUCCESS){ return hipErrorOutOfMemory; } + char *ptr = (char*)p; if(!ptr){ return hipErrorOutOfMemory; - std::cout<<"Error: failed to allocate memory for code object"<(in), std::istreambuf_iterator(), ptr); status = hsa_code_object_deserialize(ptr, size, NULL, &obj); + if(status != HSA_STATUS_SUCCESS){ return hipErrorSharedObjectInitFailed; } - *module = obj.handle; + + module->object = obj.handle; + + hsa_executable_t executable; + status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + if(status != HSA_STATUS_SUCCESS){ + return hipErrorNotInitialized; + } + + module->executable = executable.handle; + } } + return ret; } @@ -120,41 +139,52 @@ hipError_t hipModuleGetFunction(hipFunction *func, hipModule hmod, const char *n HIP_INIT_API(name); auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; - if(name == nullptr || hmod == 0){ + + if(name == nullptr){ return hipErrorInvalidValue; } + if(ctx == nullptr){ ret = hipErrorInvalidContext; + }else{ int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; hsa_status_t status; - hsa_executable_symbol_t kernel_symbol; hsa_executable_t executable; - status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable); + hsa_executable_symbol_t kernel_symbol; + executable.handle = hmod.executable; if(status != HSA_STATUS_SUCCESS){ return hipErrorNotInitialized; } + hsa_code_object_t obj; - obj.handle = hmod; + obj.handle = hmod.object; status = hsa_executable_load_code_object(executable, gpuAgent, obj, NULL); if(status != HSA_STATUS_SUCCESS){ return hipErrorNotInitialized; } + status = hsa_executable_freeze(executable, NULL); status = hsa_executable_get_symbol(executable, NULL, name, gpuAgent, 0, &kernel_symbol); if(status != HSA_STATUS_SUCCESS){ return hipErrorNotFound; } + status = hsa_executable_symbol_get_info(kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - func); + &func->kernel); + if(status != HSA_STATUS_SUCCESS){ return hipErrorNotFound; } + + func->symbol = kernel_symbol.handle; + } + return ret; } @@ -163,11 +193,13 @@ hipError_t hipLaunchModuleKernel(hipFunction f, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra){ - HIP_INIT_API(f); + HIP_INIT_API(f.kernel); auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; + if(ctx == nullptr){ ret = hipErrorInvalidDevice; + }else{ int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); @@ -194,12 +226,15 @@ Kernel argument preparation. hsa_status_t status = hsa_agent_iterate_regions(gpuAgent, hipdrv::findKernArgRegions, &kernArg); void *kern; status = hsa_memory_allocate(kernArg, kernSize, &kern); + if(status != HSA_STATUS_SUCCESS){ return hipErrorLaunchOutOfResources; } + memcpy(kern, config[1], kernSize); + /* Pre kernel launch @@ -215,6 +250,10 @@ Pre kernel launch hsa_signal_t signal; status = hsa_signal_create(1, 0, NULL, &signal); +// hsa_memory_pool_t pool = av->get_hsa_kernarg_region(); +// status = hsa_amd_memory_pool_allocate(pool, kernSize, 0, &kernArg); +// assert(status == HSA_STATUS_SUCCESS); + /* Creating the packets */ @@ -230,17 +269,15 @@ Creating the packets dispatch_packet->grid_size_x = blockDimX * gridDimX; dispatch_packet->grid_size_y = blockDimY * gridDimY; dispatch_packet->grid_size_z = blockDimZ * gridDimZ; - dispatch_packet->group_segment_size = 0; dispatch_packet->private_segment_size = sharedMemBytes; dispatch_packet->kernarg_address = kern; - dispatch_packet->kernel_object = f; + dispatch_packet->kernel_object = f.kernel; uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); - uint16_t setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; uint32_t header32 = header | (setup << 16); @@ -250,5 +287,6 @@ Creating the packets hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index); hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); } + return ret; }