P4 to Git Change 2014404 by gandryey@gera-win10 on 2019/10/16 11:13:37

SWDEV-184710 - Support hipLaunchCooperativeKernelMultiDevice()
	- Add support for multi grid launch in hip
	- Detect the new hidden argument and pass the required information for the kernel launch
	- Memory for synchronization is allocated as a single object and then the offset for each GPU is found

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#343 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#82 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#136 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#99 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#97 edit


[ROCm/hip commit: 70a52b9cd7]
Этот коммит содержится в:
foreman
2019-10-16 11:24:09 -04:00
родитель 185c445b9a
Коммит ac8109b1ac
+35 -13
Просмотреть файл
@@ -256,27 +256,27 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
uint32_t params = 0)
{
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) {
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params);
hip::Function* function = hip::Function::asFunction(f);
amd::Kernel* kernel = function->function_;
amd::Device* device = hip::getCurrentContext()->devices()[0];
amd::ScopedLock lock(function->lock_);
hip::Event* eStart = reinterpret_cast<hip::Event*>(startEvent);
hip::Event* eStop = reinterpret_cast<hip::Event*>(stopEvent);
amd::HostQueue* queue = hip::getQueue(hStream);
const amd::Device& device = queue->vdev()->device();
if ((params & amd::NDRangeKernelCommand::CooperativeGroups) &&
!device->info().cooperativeGroups_) {
!device.info().cooperativeGroups_) {
return hipErrorLaunchFailure;
}
if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) &&
!device->info().cooperativeMultiDeviceGroups_) {
!device.info().cooperativeMultiDeviceGroups_) {
return hipErrorLaunchFailure;
}
if (!queue) {
@@ -323,7 +323,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
}
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
*queue, waitList, *kernel, ndrange, sharedMemBytes, params);
*queue, waitList, *kernel, ndrange, sharedMemBytes,
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice);
if (!command) {
return hipErrorOutOfMemory;
}
@@ -436,33 +437,54 @@ hipError_t hipLaunchCooperativeKernel(const void* f,
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags, uint32_t extFlags)
{
int currentDevice = ihipGetDevice();
int numActiveGPUs = 0;
ihipDeviceGetCount(&numActiveGPUs);
if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) {
return hipErrorInvalidValue;
}
hipError_t result = hipErrorUnknown;
uint64_t allGridSize = 0;
for (int i = 0; i < numDevices; ++i) {
const hipLaunchParams& launch = launchParamsList[i];
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
}
uint64_t prevGridSize = 0;
uint32_t firstDevice = 0;
for (int i = 0; i < numDevices; ++i) {
hipSetDevice(i);
const hipLaunchParams& launch = launchParamsList[i];
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.stream)->asHostQueue();
hipFunction_t func = PlatformState::instance().getFunc(launch.func, i);
hipFunction_t func = nullptr;
// The order of devices in the launch may not match the order in the global array
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
// Find the matching device and request the kernel function
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
func = PlatformState::instance().getFunc(launch.func, dev);
// Save VDI index of the first device in the launch
if (i == 0) {
firstDevice = queue->vdev()->device().index();
}
break;
}
}
if (func == nullptr) {
HIP_RETURN(result);
}
result = ihipModuleLaunchKernel(func,
launch.gridDim.x * launch.blockDim.x,
launch.gridDim.y * launch.blockDim.y,
launch.gridDim.z * launch.blockDim.z,
launch.blockDim.x, launch.blockDim.y, launch.blockDim.z,
launch.sharedMem, launch.stream,
launch.args, nullptr, nullptr, nullptr, flags, extFlags);
launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
if (result != hipSuccess) {
break;
}
prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
}
hipSetDevice(currentDevice);
return result;
}