SWDEV-184710
Support hipLaunchCooperativeKernelMultiDevice() - Add validation logic for MGPU launches to pass a cuda test Change-Id: Iccca7fde43493fc3bc6685512d39202271ae3e92
Этот коммит содержится в:
@@ -128,6 +128,14 @@ typedef struct hipDeviceProp_t {
|
||||
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
|
||||
int ECCEnabled; ///<Device has ECC support enabled
|
||||
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
||||
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
|
||||
///devices with unmatched functions
|
||||
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
|
||||
///devices with unmatched grid dimensions
|
||||
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
|
||||
///devices with unmatched block dimensions
|
||||
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
|
||||
///devices with unmatched shared memories
|
||||
|
||||
} hipDeviceProp_t;
|
||||
|
||||
@@ -329,8 +337,16 @@ typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
|
||||
hipDeviceAttributeEccEnabled ///<Device has ECC support enabled
|
||||
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
|
||||
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched functions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched grid dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched block dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched shared memories
|
||||
} hipDeviceAttribute_t;
|
||||
|
||||
enum hipComputeMode {
|
||||
|
||||
@@ -197,6 +197,11 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device )
|
||||
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
|
||||
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_;
|
||||
deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.maxTexture1D = info.imageMaxBufferSize_;
|
||||
deviceProps.maxTexture2D[0] = info.image2DMaxWidth_;
|
||||
deviceProps.maxTexture2D[1] = info.image2DMaxHeight_;
|
||||
|
||||
@@ -281,6 +281,18 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
case hipDeviceAttributeEccEnabled:
|
||||
*pi = prop.ECCEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedFunc;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedGridDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedBlockDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
|
||||
*pi = prop.cooperativeMultiDeviceUnmatchedSharedMem;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -481,9 +481,29 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
|
||||
hipError_t result = hipErrorUnknown;
|
||||
uint64_t allGridSize = 0;
|
||||
std::vector<const amd::Device*> mgpu_list(numDevices);
|
||||
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
const hipLaunchParams& launch = launchParamsList[i];
|
||||
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
|
||||
|
||||
// Make sure block dimensions are valid
|
||||
if (0 == launch.blockDim.x * launch.blockDim.y * launch.blockDim.z) {
|
||||
return hipErrorInvalidConfiguration;
|
||||
}
|
||||
if (launch.stream != nullptr) {
|
||||
// Validate devices to make sure it dosn't have duplicates
|
||||
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.stream)->asHostQueue();
|
||||
auto device = &queue->vdev()->device();
|
||||
for (int j = 0; j < numDevices; ++j) {
|
||||
if (mgpu_list[j] == device) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
}
|
||||
mgpu_list[i] = device;
|
||||
} else {
|
||||
return hipErrorInvalidResourceHandle;
|
||||
}
|
||||
}
|
||||
uint64_t prevGridSize = 0;
|
||||
uint32_t firstDevice = 0;
|
||||
|
||||
Ссылка в новой задаче
Block a user