Support hipLaunchCooperativeKernelMultiDevice()

- Add validation logic for MGPU launches to pass a cuda test

Change-Id: Iccca7fde43493fc3bc6685512d39202271ae3e92
Этот коммит содержится в:
German Andryeyev
2020-04-06 13:54:03 -04:00
родитель 382d5ce77f
Коммит 5fe91ccb1b
4 изменённых файлов: 54 добавлений и 1 удалений
+17 -1
Просмотреть файл
@@ -128,6 +128,14 @@ typedef struct hipDeviceProp_t {
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
int ECCEnabled; ///<Device has ECC support enabled
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
///devices with unmatched functions
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched grid dimensions
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
///devices with unmatched block dimensions
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
///devices with unmatched shared memories
} hipDeviceProp_t;
@@ -329,8 +337,16 @@ typedef enum hipDeviceAttribute_t {
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
hipDeviceAttributeEccEnabled ///<Device has ECC support enabled
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem ///< Supports cooperative launch on multiple
///devices with unmatched shared memories
} hipDeviceAttribute_t;
enum hipComputeMode {
+5
Просмотреть файл
@@ -197,6 +197,11 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device )
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
deviceProps.maxTexture1D = info.imageMaxBufferSize_;
deviceProps.maxTexture2D[0] = info.image2DMaxWidth_;
deviceProps.maxTexture2D[1] = info.image2DMaxHeight_;
+12
Просмотреть файл
@@ -281,6 +281,18 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
case hipDeviceAttributeEccEnabled:
*pi = prop.ECCEnabled;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
}
+20
Просмотреть файл
@@ -481,9 +481,29 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
hipError_t result = hipErrorUnknown;
uint64_t allGridSize = 0;
std::vector<const amd::Device*> mgpu_list(numDevices);
for (int i = 0; i < numDevices; ++i) {
const hipLaunchParams& launch = launchParamsList[i];
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
// Make sure block dimensions are valid
if (0 == launch.blockDim.x * launch.blockDim.y * launch.blockDim.z) {
return hipErrorInvalidConfiguration;
}
if (launch.stream != nullptr) {
// Validate devices to make sure it dosn't have duplicates
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.stream)->asHostQueue();
auto device = &queue->vdev()->device();
for (int j = 0; j < numDevices; ++j) {
if (mgpu_list[j] == device) {
return hipErrorInvalidDevice;
}
}
mgpu_list[i] = device;
} else {
return hipErrorInvalidResourceHandle;
}
}
uint64_t prevGridSize = 0;
uint32_t firstDevice = 0;