diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 3ad1efddeb..8b9f1db97b 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -32,7 +32,7 @@ THE SOFTWARE. // #define USE_MEMCPYTOSYMBOL // //Use the new HCC accelerator_view::copy instead of am_copy -#define USE_AV_COPY (__hcc_workweek__ >= 16351) +#define USE_AV_COPY (__hcc_workweek__ >= 16351) // Compile peer-to-peer support. // >= 2 : use HCC hc:accelerator::get_is_peer @@ -457,7 +457,10 @@ typedef uint64_t SeqNum_t ; // Use this if we already have the stream critical data mutex: void wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty=false); - void launchModuleKernel(hsa_signal_t signal, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel); + void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, + uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel); // Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function. SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; }; diff --git a/samples/0_Intro/module_api/Makefile b/samples/0_Intro/module_api/Makefile index db270beaa0..8981938e91 100644 --- a/samples/0_Intro/module_api/Makefile +++ b/samples/0_Intro/module_api/Makefile @@ -3,11 +3,14 @@ ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif HIPCC=$(HIP_PATH)/bin/hipcc +OPT= HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) ifeq (${HIP_PLATFORM}, hcc) GENCODEOBJECT_FLAGS=--target-isa=fiji +all: runKernel.hip.out + vcpy_isa.compile: vcpy_isa.cpp $(HIPCC) --genco $(GENCODEOBJECT_FLAGS) vcpy_isa.cpp -o vcpy_isa.co @@ -29,5 +32,5 @@ endif all: vcpy_isa.compile runKernel.hip.out runKernel.hip.out: runKernel.cpp - $(HIPCC) runKernel.cpp -o runKernel.hip.out + $(HIPCC) $(OPT) runKernel.cpp -o runKernel.hip.out diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index cb47dafc7d..1954b31c70 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -137,7 +137,7 @@ ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex) }; -static thread_local ihipCtx_t *tls_defaultCtx = nullptr; +static thread_local ihipCtx_t *tls_defaultCtx = nullptr; void ihipSetTlsDefaultCtx(ihipCtx_t *ctx) { tls_defaultCtx = ctx; @@ -512,7 +512,10 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t } -void ihipStream_t::launchModuleKernel(hsa_signal_t signal, +// Precursor: the stream is already locked,specifically so this routine can enqueue work into the specified av. +void ihipStream_t::launchModuleKernel( + hc::accelerator_view av, + hsa_signal_t signal, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, @@ -526,11 +529,6 @@ void ihipStream_t::launchModuleKernel(hsa_signal_t signal, hsa_status_t status; void *kern; - // Lock stream to prevent other threads from enqueueing kernels at same time. - LockedAccessor_StreamCrit_t crit (_criticalData); - - hc::accelerator_view av = crit->_av; - hsa_amd_memory_pool_t *pool = reinterpret_cast(av.get_hsa_kernarg_region()); status = hsa_amd_memory_pool_allocate(*pool, kernSize, 0, &kern); status = hsa_amd_agents_allow_access(1, (hsa_agent_t*)av.get_hsa_agent(), 0, kern); @@ -1344,11 +1342,11 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) } } -void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream) +void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream) { std::string streamString = ToString(stream); fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, + kernelName, lp->grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, lp->dynamic_group_mem_bytes, streamString.c_str());\ } diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 456d3725b8..28c65b6669 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -221,6 +221,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, return ihipModuleGetFunction(hfunc, hmod, name); } + hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, @@ -268,7 +269,7 @@ Kernel argument preparation. /* Launch AQL packet */ - hStream->launchModuleKernel(signal, blockDimX, blockDimY, blockDimZ, + hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, gridDimX, gridDimY, gridDimZ, sharedMemBytes, config[1], kernSize, f->kernel); /* @@ -279,7 +280,7 @@ Kernel argument preparation. ihipPostLaunchKernel(hStream, lp); - + } return ihipLogStatus(ret);