Revert "Enable USE_DISPATCH_HSA_KERNEL."
This reverts commit f8bcbe8680.
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -389,6 +389,61 @@ void ihipStream_t::lockclose_postKernelCommand(const char * kernelName, hc::acce
|
||||
|
||||
|
||||
|
||||
#if USE_DISPATCH_HSA_KERNEL==0
|
||||
// Precursor: the stream is already locked,specifically so this routine can enqueue work into the specified av.
|
||||
void ihipStream_t::launchModuleKernel(
|
||||
hc::accelerator_view av,
|
||||
hsa_signal_t signal,
|
||||
uint32_t blockDimX,
|
||||
uint32_t blockDimY,
|
||||
uint32_t blockDimZ,
|
||||
uint32_t gridDimX,
|
||||
uint32_t gridDimY,
|
||||
uint32_t gridDimZ,
|
||||
uint32_t groupSegmentSize,
|
||||
uint32_t privateSegmentSize,
|
||||
void *kernarg,
|
||||
size_t kernSize,
|
||||
uint64_t kernel){
|
||||
hsa_status_t status;
|
||||
void *kern;
|
||||
|
||||
hsa_amd_memory_pool_t *pool = reinterpret_cast<hsa_amd_memory_pool_t*>(av.get_hsa_kernarg_region());
|
||||
status = hsa_amd_memory_pool_allocate(*pool, kernSize, 0, &kern);
|
||||
status = hsa_amd_agents_allow_access(1, (hsa_agent_t*)av.get_hsa_agent(), 0, kern);
|
||||
memcpy(kern, kernarg, kernSize);
|
||||
hsa_queue_t *Queue = (hsa_queue_t*)av.get_hsa_queue();
|
||||
const uint32_t queue_mask = Queue->size-1;
|
||||
uint32_t packet_index = hsa_queue_load_write_index_relaxed(Queue);
|
||||
hsa_kernel_dispatch_packet_t *dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(Queue->base_address))[packet_index & queue_mask]);
|
||||
|
||||
dispatch_packet->completion_signal = signal;
|
||||
dispatch_packet->workgroup_size_x = blockDimX;
|
||||
dispatch_packet->workgroup_size_y = blockDimY;
|
||||
dispatch_packet->workgroup_size_z = blockDimZ;
|
||||
dispatch_packet->grid_size_x = blockDimX * gridDimX;
|
||||
dispatch_packet->grid_size_y = blockDimY * gridDimY;
|
||||
dispatch_packet->grid_size_z = blockDimZ * gridDimZ;
|
||||
dispatch_packet->group_segment_size = groupSegmentSize;
|
||||
dispatch_packet->private_segment_size = privateSegmentSize;
|
||||
dispatch_packet->kernarg_address = kern;
|
||||
dispatch_packet->kernel_object = kernel;
|
||||
uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
uint16_t setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
uint32_t header32 = header | (setup << 16);
|
||||
|
||||
__atomic_store_n((uint32_t*)(dispatch_packet), header32, __ATOMIC_RELEASE);
|
||||
|
||||
hsa_queue_store_write_index_relaxed(Queue, packet_index + 1);
|
||||
hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//=============================================================================
|
||||
// Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted.
|
||||
// The packed _peerAgents can efficiently be used on each memory allocation.
|
||||
|
||||
@@ -32,6 +32,8 @@ THE SOFTWARE.
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
#define USE_DISPATCH_HSA_KERNEL 0
|
||||
//
|
||||
|
||||
|
||||
//---
|
||||
|
||||
@@ -282,6 +282,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
grid_launch_parm lp;
|
||||
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName);
|
||||
|
||||
#if USE_DISPATCH_HSA_KERNEL
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
@@ -305,6 +307,31 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize);
|
||||
#else
|
||||
|
||||
/*
|
||||
Create signal
|
||||
*/
|
||||
|
||||
hsa_signal_t signal;
|
||||
status = hsa_signal_create(1, 0, NULL, &signal);
|
||||
|
||||
|
||||
/*
|
||||
Launch AQL packet
|
||||
*/
|
||||
hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ,
|
||||
gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernArgSize, f->_kernel);
|
||||
|
||||
|
||||
/*
|
||||
Wait for signal
|
||||
*/
|
||||
|
||||
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
#endif // USE_DISPATCH_HSA_KERNEL
|
||||
|
||||
|
||||
ihipPostLaunchKernel(f->_kernelName, hStream, lp);
|
||||
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user