diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 8b9f1db97b..e10b68695e 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -460,7 +460,8 @@ typedef uint64_t SeqNum_t ; void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel); + uint32_t groupSegmentSize, uint32_t sharedMemBytes, + void *kernarg, size_t kernSize, uint64_t kernel); // Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function. SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; }; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 1954b31c70..97911d08eb 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -522,7 +522,8 @@ void ihipStream_t::launchModuleKernel( uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t sharedMemBytes, + uint32_t groupSegmentSize, + uint32_t privateSegmentSize, void *kernarg, size_t kernSize, uint64_t kernel){ @@ -545,8 +546,8 @@ void ihipStream_t::launchModuleKernel( dispatch_packet->grid_size_x = blockDimX * gridDimX; dispatch_packet->grid_size_y = blockDimY * gridDimY; dispatch_packet->grid_size_z = blockDimZ * gridDimZ; - dispatch_packet->group_segment_size = 0; - dispatch_packet->private_segment_size = sharedMemBytes; + dispatch_packet->group_segment_size = groupSegmentSize; + dispatch_packet->private_segment_size = privateSegmentSize; dispatch_packet->kernarg_address = kern; dispatch_packet->kernel_object = kernel; uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 28c65b6669..640b2bb7c4 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -252,10 +252,23 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, }else{ return ihipLogStatus(hipErrorInvalidValue); } + + uint32_t groupSegmentSize; + hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &groupSegmentSize); + + uint32_t privateSegmentSize; + status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &privateSegmentSize); + + privateSegmentSize += sharedMemBytes; + + /* Kernel argument preparation. */ - hsa_status_t status; grid_launch_parm lp; hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp); @@ -270,7 +283,7 @@ Kernel argument preparation. Launch AQL packet */ hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, sharedMemBytes, config[1], kernSize, f->kernel); + gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->kernel); /* Wait for signal