From 6fd3daed30a304f46f0a0822e968a0d7d52e36fe Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 9 Feb 2017 17:22:55 -0600 Subject: [PATCH] fixed hipFunction memory management Change-Id: I7ebb323419bcd220ebd6466a8eb38e7bfdb1520a --- include/hip/hcc_detail/hip_runtime_api.h | 10 +---- src/hip_module.cpp | 44 ++++++++++++++----- src/trace_helper.h | 11 ----- tests/src/runtimeApi/module/hipModule.cpp | 24 +++++++--- tests/src/runtimeApi/module/vcpy_kernel.code | Bin 0 -> 18811 bytes tests/src/runtimeApi/module/vcpy_kernel.cpp | 30 +++++++++++++ 6 files changed, 82 insertions(+), 37 deletions(-) create mode 100755 tests/src/runtimeApi/module/vcpy_kernel.code create mode 100644 tests/src/runtimeApi/module/vcpy_kernel.cpp diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 4e5390a968..7abfffcc22 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -30,6 +30,7 @@ THE SOFTWARE. #include #include +#include #include #include @@ -72,14 +73,7 @@ typedef struct ihipIpcEventHandle_t *hipIpcEventHandle_t; typedef struct ihipModule_t *hipModule_t; -struct ihipModuleSymbol_t{ - uint64_t _object; // The kernel object. - uint32_t _groupSegmentSize; - uint32_t _privateSegmentSize; - char _name[64]; // TODO - review for performance cost. Name is just used for debug. -}; - -typedef struct ihipModuleSymbol_t hipFunction_t; +typedef struct ihipModuleSymbol_t *hipFunction_t; typedef void* hipDeviceptr_t; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 5eb3a6cf09..63a6bffa94 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -35,6 +35,25 @@ THE SOFTWARE. //TODO Use Pool APIs from HCC to get memory regions. + +struct ihipModuleSymbol_t{ + uint64_t _object; // The kernel object. + uint32_t _groupSegmentSize; + uint32_t _privateSegmentSize; + char _name[64]; // TODO - review for performance cost. Name is just used for debug. +}; + +std::list hipFuncTracker; + +template <> +std::string ToString(hipFunction_t v) +{ + std::ostringstream ss; + ss << "0x" << std::hex << v->_object; + return ss.str(); +}; + + #define CHECK_HSA(hsaStatus, hipStatus) \ if (hsaStatus != HSA_STATUS_SUCCESS) {\ return hipStatus;\ @@ -217,6 +236,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char ret = hipErrorInvalidContext; }else{ + ihipModuleSymbol_t *sym = new ihipModuleSymbol_t; int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; @@ -230,20 +250,22 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &func->_object); + &sym->_object); CHECK_HSA(status, hipErrorNotFound); status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &func->_groupSegmentSize); + &sym->_groupSegmentSize); CHECK_HSA(status, hipErrorNotFound); status = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &func->_privateSegmentSize); + &sym->_privateSegmentSize); CHECK_HSA(status, hipErrorNotFound); - strncpy(func->_name, name, sizeof(func->_name)); + strncpy(sym->_name, name, sizeof(sym->_name)); + *func = sym; + hipFuncTracker.push_back(*func); } return ihipLogStatus(ret); } @@ -297,9 +319,9 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, /* Kernel argument preparation. */ - grid_launch_parm lp; + grid_launch_parm lp; lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel. - hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f._name); + hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name); hsa_kernel_dispatch_packet_t aql; @@ -315,9 +337,9 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, aql.grid_size_x = blockDimX * gridDimX; aql.grid_size_y = blockDimY * gridDimY; aql.grid_size_z = blockDimZ * gridDimZ; - aql.group_segment_size = f._groupSegmentSize + sharedMemBytes; - aql.private_segment_size = f._privateSegmentSize; - aql.kernel_object = f._object; + aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes; + aql.private_segment_size = f->_privateSegmentSize; + aql.kernel_object = f->_object; aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; aql.header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER); // TODO - honor queue setting for execute_in_order @@ -333,7 +355,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize, nullptr/*completion_future*/); - ihipPostLaunchKernel(f._name, hStream, lp); + ihipPostLaunchKernel(f->_name, hStream, lp); } return ihipLogStatus(ret); @@ -355,7 +377,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipFunction_t func; ihipModuleGetSymbol(&func, hmod, name); *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func._object); + *dptr = reinterpret_cast(func->_object); return ihipLogStatus(ret); } } diff --git a/src/trace_helper.h b/src/trace_helper.h index f58f81fbff..3bf2857c3a 100644 --- a/src/trace_helper.h +++ b/src/trace_helper.h @@ -72,17 +72,6 @@ inline std::string ToString(hipEvent_t v) return ss.str(); }; -// hipEvent_t specialization. TODO - maybe add an event ID for debug? -template <> -inline std::string ToString(hipFunction_t v) -{ - std::ostringstream ss; - ss << "0x" << std::hex << v._object; - return ss.str(); -}; - - - // hipStream_t template <> inline std::string ToString(hipStream_t v) diff --git a/tests/src/runtimeApi/module/hipModule.cpp b/tests/src/runtimeApi/module/hipModule.cpp index d9193cd87f..d7552ee1e6 100644 --- a/tests/src/runtimeApi/module/hipModule.cpp +++ b/tests/src/runtimeApi/module/hipModule.cpp @@ -22,13 +22,15 @@ THE SOFTWARE. #include #include #include +#include +#include #include "test_common.h" #define LEN 64 #define SIZE LEN<<2 -#define fileName "vcpy_isa.co" +#define fileName "vcpy_kernel.code" #define kernel_name "hello_world" __global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){ @@ -59,11 +61,11 @@ int main(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); void *args[2] = {&Ad, &Bd}; + std::cout<argBuffer(2); - memcpy(&argBuffer[0], &Ad, sizeof(void*)); - memcpy(&argBuffer[1], &Bd, sizeof(void*)); + std::vectorargBuffer(5); + memcpy(&argBuffer[3], &Ad, sizeof(void*)); + memcpy(&argBuffer[4], &Bd, sizeof(void*)); size_t size = argBuffer.size()*sizeof(void*); @@ -73,7 +75,7 @@ int main(){ HIP_LAUNCH_PARAM_END }; - hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config); + hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config); HIPCHECK(hipStreamDestroy(stream)); @@ -82,7 +84,15 @@ int main(){ for(uint32_t i=0;i vec(1024*1024*64); + for(unsigned i=0;i<1024*1024*64;i++) { + hipFunction_t func; + hipModuleGetFunction(&func, Module, kernel_name); + vec[i] = func; + } + std::cout<<"Starting sleep"<!|TyJEXSpxJp$K3RyD-7nQWZaI>aj^(hg-HsJefl%aEHtUM6q;d-2|D+t< z-^}lh$?vZyD)z})0h1JFVvj&M-kwOmR)KBt_OQbkeDYJFlO)@t#6DyDqm$#j7{(Fb z0srU^h{xGRo)6S62Kd40$K1S!p#F^_@4&!7?`9GeMzZ{G=gyt#p|LS_=CP@xhbIrH2WDoD%siP-tM_$O!%-`> zx&<02^;(n1%qN?B`qe$@Ro9SNbq(27*D$o|8gi?yA;0Px3ahI@eM&vun@lEq(brZA z=%d?lCJ^($v=~4p;9C*j<(5GQNZGtc<4#&&l{hgx}@j{h(u*CH!tz9~bxu!c`Zi{O1Y3SH`~}e5;G^67^gl z{0o$x#5{8tFy?&4Wd&o)r;9|O)SxA7QbHwcN{!FfwG!_gbzDjpkL zzX`5ec#JW0rd!~jrbPiuLNv|iH%thchxxMy($MX2j=UNEdC-qfrPBJri6i?*Cv<&E zPi1rZ@l>j6)$1+&`IcQbQ@J_2X6kigzFDs7ZNqL%S02@Ly#l(^vQxvlUOsu!XtZ_5 zhHDFFxC1pFrXQ|MSD3C>Yi+$+)@!z2O_!}&U58oJ54So2sCBZP4uC85R?7~+>#ez< z>~^^wklF4$XFCCSr`8NYgEKoeSYdf~!!!drQfU|gXk)$}fOh6f%TdglUDg_))!H(2 zJ~%VNLAg7I$*Um0h-74e$O4fCA`3(o_zzlu);v9YH3gsc2w0)7J?txX9?~}R71dHE zv3%yhCifG1H!&WM2Q8(#>DM!&u*4!RNk3x>tbE`7T9wo8R zvoTR%Z2BUsAL9yOF@-Jk^dt&xo~M0L!NnkZ8S<~MKy+U#|7->RaV<_E;ag?@xA|8V zvETcdvWOIN&XHS1S%41$@R4Huafz50`Y#M7`Y`)I zH2$wL{@H60|MK7mEqMInyEh7n?i(~0Sx?V@SpO~JT;^Q|C`x>Rv5P;mST8IQ!u34( zcg8<{YqS*B6aQY`SuDQqvJ&6NwrzmdzTx7X*jX$77}rq_+M%K$Jify{9jfQ zQvQp;e=(Z>iF=g6{Cj8o_4xjW zp8M==W#RH$@a|{Z26$%~&S$=t&9$2UL&N_^!2gF4|6U)F;~^jauK@q&BK~F0y;{im ze-rqBBjVr7xEv2T|1SgoXCwY)&b?a5`9B5xzZmiFWn7Mjoc}Yx|3bvS%(+(!IsYF3 z|L;fqdl{GGA?N=z@c&Z8zs$K;3pxMq0RQLzKm2={jpC67A`3(oh%69UAhN*!!vgff z{LjS~IJCb~Y}4%ilK&8H!h}GZa<_<$l=QvNCsdJ}ZRv`exyyBNxGr$l%~y z&8g0plI2!oaKPYL2xU?Rm!<D;`Q&W+`P^rcuhK$^~;clh0MnHa=pk=Y_%mj{1dIHrM@ z*L+#2{C&Oxq>{va)ZM;&>Uee3Ri5@>7^QLqqfWLb4CSS9SNlmMqL1JlL-$Yo zd`7r%OltRA!hPo>{6M(xe1zW!_nnV$g>c{b2wUO)xE*}wBj7m(99N+r1UesKAK~}P z_))@r=Oa8rxbJ*~vxNK3NBEd<;qY{x!qePX$&UR1 literal 0 HcmV?d00001 diff --git a/tests/src/runtimeApi/module/vcpy_kernel.cpp b/tests/src/runtimeApi/module/vcpy_kernel.cpp new file mode 100644 index 0000000000..0375eee342 --- /dev/null +++ b/tests/src/runtimeApi/module/vcpy_kernel.cpp @@ -0,0 +1,30 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" + +extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b) +{ + int tx = hipThreadIdx_x; + b[tx] = a[tx]; +} +