diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 4e5390a968..7abfffcc22 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -30,6 +30,7 @@ THE SOFTWARE. #include #include +#include #include #include @@ -72,14 +73,7 @@ typedef struct ihipIpcEventHandle_t *hipIpcEventHandle_t; typedef struct ihipModule_t *hipModule_t; -struct ihipModuleSymbol_t{ - uint64_t _object; // The kernel object. - uint32_t _groupSegmentSize; - uint32_t _privateSegmentSize; - char _name[64]; // TODO - review for performance cost. Name is just used for debug. -}; - -typedef struct ihipModuleSymbol_t hipFunction_t; +typedef struct ihipModuleSymbol_t *hipFunction_t; typedef void* hipDeviceptr_t; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 5eb3a6cf09..63a6bffa94 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -35,6 +35,25 @@ THE SOFTWARE. //TODO Use Pool APIs from HCC to get memory regions. + +struct ihipModuleSymbol_t{ + uint64_t _object; // The kernel object. + uint32_t _groupSegmentSize; + uint32_t _privateSegmentSize; + char _name[64]; // TODO - review for performance cost. Name is just used for debug. +}; + +std::list hipFuncTracker; + +template <> +std::string ToString(hipFunction_t v) +{ + std::ostringstream ss; + ss << "0x" << std::hex << v->_object; + return ss.str(); +}; + + #define CHECK_HSA(hsaStatus, hipStatus) \ if (hsaStatus != HSA_STATUS_SUCCESS) {\ return hipStatus;\ @@ -217,6 +236,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char ret = hipErrorInvalidContext; }else{ + ihipModuleSymbol_t *sym = new ihipModuleSymbol_t; int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; @@ -230,20 +250,22 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &func->_object); + &sym->_object); CHECK_HSA(status, hipErrorNotFound); status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &func->_groupSegmentSize); + &sym->_groupSegmentSize); CHECK_HSA(status, hipErrorNotFound); status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &func->_privateSegmentSize); + &sym->_privateSegmentSize); CHECK_HSA(status, hipErrorNotFound); - strncpy(func->_name, name, sizeof(func->_name)); + strncpy(sym->_name, name, sizeof(sym->_name)); + *func = sym; + hipFuncTracker.push_back(*func); } return ihipLogStatus(ret); } @@ -297,9 +319,9 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, /* Kernel argument preparation. */ - grid_launch_parm lp; + grid_launch_parm lp; lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel. - hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f._name); + hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name); hsa_kernel_dispatch_packet_t aql; @@ -315,9 +337,9 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, aql.grid_size_x = blockDimX * gridDimX; aql.grid_size_y = blockDimY * gridDimY; aql.grid_size_z = blockDimZ * gridDimZ; - aql.group_segment_size = f._groupSegmentSize + sharedMemBytes; - aql.private_segment_size = f._privateSegmentSize; - aql.kernel_object = f._object; + aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes; + aql.private_segment_size = f->_privateSegmentSize; + aql.kernel_object = f->_object; aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; aql.header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER); // TODO - honor queue setting for execute_in_order @@ -333,7 +355,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize, nullptr/*completion_future*/); - ihipPostLaunchKernel(f._name, hStream, lp); + ihipPostLaunchKernel(f->_name, hStream, lp); } return ihipLogStatus(ret); @@ -355,7 +377,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipFunction_t func; ihipModuleGetSymbol(&func, hmod, name); *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func._object); + *dptr = reinterpret_cast(func->_object); return ihipLogStatus(ret); } } diff --git a/src/trace_helper.h b/src/trace_helper.h index f58f81fbff..3bf2857c3a 100644 --- a/src/trace_helper.h +++ b/src/trace_helper.h @@ -72,17 +72,6 @@ inline std::string ToString(hipEvent_t v) return ss.str(); }; -// hipEvent_t specialization. TODO - maybe add an event ID for debug? -template <> -inline std::string ToString(hipFunction_t v) -{ - std::ostringstream ss; - ss << "0x" << std::hex << v._object; - return ss.str(); -}; - - - // hipStream_t template <> inline std::string ToString(hipStream_t v) diff --git a/tests/src/runtimeApi/module/hipModule.cpp b/tests/src/runtimeApi/module/hipModule.cpp index d9193cd87f..d7552ee1e6 100644 --- a/tests/src/runtimeApi/module/hipModule.cpp +++ b/tests/src/runtimeApi/module/hipModule.cpp @@ -22,13 +22,15 @@ THE SOFTWARE. #include #include #include +#include +#include #include "test_common.h" #define LEN 64 #define SIZE LEN<<2 -#define fileName "vcpy_isa.co" +#define fileName "vcpy_kernel.code" #define kernel_name "hello_world" __global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){ @@ -59,11 +61,11 @@ int main(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); void *args[2] = {&Ad, &Bd}; + std::cout<argBuffer(2); - memcpy(&argBuffer[0], &Ad, sizeof(void*)); - memcpy(&argBuffer[1], &Bd, sizeof(void*)); + std::vectorargBuffer(5); + memcpy(&argBuffer[3], &Ad, sizeof(void*)); + memcpy(&argBuffer[4], &Bd, sizeof(void*)); size_t size = argBuffer.size()*sizeof(void*); @@ -73,7 +75,7 @@ int main(){ HIP_LAUNCH_PARAM_END }; - hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config); + hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config); HIPCHECK(hipStreamDestroy(stream)); @@ -82,7 +84,15 @@ int main(){ for(uint32_t i=0;i vec(1024*1024*64); + for(unsigned i=0;i<1024*1024*64;i++) { + hipFunction_t func; + hipModuleGetFunction(&func, Module, kernel_name); + vec[i] = func; + } + std::cout<<"Starting sleep"<