Merge pull request #949 from gargrahul/single_stream_concurrent_kernels
Add extension for kernel concurrency on same stream
[ROCm/hip commit: 352b17346c]
이 커밋은 다음에 포함됨:
@@ -130,7 +130,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent) {
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
@@ -202,8 +202,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
aql.kernel_object = f->_object;
|
||||
aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
aql.header =
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER); // TODO - honor queue setting for execute_in_order
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE);
|
||||
if((flags & 0x1)== 0 ) {
|
||||
//in_order
|
||||
aql.header |= (1 << HSA_PACKET_HEADER_BARRIER);
|
||||
}
|
||||
|
||||
if (HCC_OPT_FLUSH) {
|
||||
aql.header |= (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
@@ -250,9 +253,21 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr
|
||||
hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
|
||||
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
|
||||
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
|
||||
}
|
||||
|
||||
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
||||
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
|
||||
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
@@ -264,7 +279,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
|
||||
}
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
새 이슈에서 참조
사용자 차단