From 097e4eb9d8a003f4dc00814b4c437cf9896df40e Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 4 Dec 2016 00:13:19 -0600 Subject: [PATCH] Enable USE_DISPATCH_HSA_KERNEL. Optimize hipLaunchModule dispatch latency. --- src/hip_hcc.cpp | 55 ---------------------------------------------- src/hip_hcc.h | 2 -- src/hip_module.cpp | 27 ----------------------- 3 files changed, 84 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index abd260762f..223e7a9243 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -389,61 +389,6 @@ void ihipStream_t::lockclose_postKernelCommand(const char * kernelName, hc::acce -#if USE_DISPATCH_HSA_KERNEL==0 -// Precursor: the stream is already locked,specifically so this routine can enqueue work into the specified av. -void ihipStream_t::launchModuleKernel( - hc::accelerator_view av, - hsa_signal_t signal, - uint32_t blockDimX, - uint32_t blockDimY, - uint32_t blockDimZ, - uint32_t gridDimX, - uint32_t gridDimY, - uint32_t gridDimZ, - uint32_t groupSegmentSize, - uint32_t privateSegmentSize, - void *kernarg, - size_t kernSize, - uint64_t kernel){ - hsa_status_t status; - void *kern; - - hsa_amd_memory_pool_t *pool = reinterpret_cast(av.get_hsa_kernarg_region()); - status = hsa_amd_memory_pool_allocate(*pool, kernSize, 0, &kern); - status = hsa_amd_agents_allow_access(1, (hsa_agent_t*)av.get_hsa_agent(), 0, kern); - memcpy(kern, kernarg, kernSize); - hsa_queue_t *Queue = (hsa_queue_t*)av.get_hsa_queue(); - const uint32_t queue_mask = Queue->size-1; - uint32_t packet_index = hsa_queue_load_write_index_relaxed(Queue); - hsa_kernel_dispatch_packet_t *dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(Queue->base_address))[packet_index & queue_mask]); - - dispatch_packet->completion_signal = signal; - dispatch_packet->workgroup_size_x = blockDimX; - dispatch_packet->workgroup_size_y = blockDimY; - dispatch_packet->workgroup_size_z = blockDimZ; - dispatch_packet->grid_size_x = blockDimX * gridDimX; - dispatch_packet->grid_size_y = blockDimY * gridDimY; - dispatch_packet->grid_size_z = blockDimZ * gridDimZ; - dispatch_packet->group_segment_size = groupSegmentSize; - dispatch_packet->private_segment_size = privateSegmentSize; - dispatch_packet->kernarg_address = kern; - dispatch_packet->kernel_object = kernel; - uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | - (1 << HSA_PACKET_HEADER_BARRIER) | - (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | - (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); - - uint16_t setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; - uint32_t header32 = header | (setup << 16); - - __atomic_store_n((uint32_t*)(dispatch_packet), header32, __ATOMIC_RELEASE); - - hsa_queue_store_write_index_relaxed(Queue, packet_index + 1); - hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index); -} -#endif - - //============================================================================= // Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted. // The packed _peerAgents can efficiently be used on each memory allocation. diff --git a/src/hip_hcc.h b/src/hip_hcc.h index b01d41be14..5ab51b5ea3 100644 --- a/src/hip_hcc.h +++ b/src/hip_hcc.h @@ -32,8 +32,6 @@ THE SOFTWARE. #error("This version of HIP requires a newer version of HCC."); #endif -#define USE_DISPATCH_HSA_KERNEL 0 -// //--- diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 606d99f2fd..2daa251004 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -282,8 +282,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, grid_launch_parm lp; hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName); -#if USE_DISPATCH_HSA_KERNEL - hsa_kernel_dispatch_packet_t aql; memset(&aql, 0, sizeof(aql)); @@ -307,31 +305,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize); -#else - - /* - Create signal - */ - - hsa_signal_t signal; - status = hsa_signal_create(1, 0, NULL, &signal); - - - /* - Launch AQL packet - */ - hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernArgSize, f->_kernel); - - - /* - Wait for signal - */ - - hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); - -#endif // USE_DISPATCH_HSA_KERNEL - ihipPostLaunchKernel(f->_kernelName, hStream, lp);