Device property concurrentKernels is added to hipDeviceProp_t struct.
For HCC path concurrentKernels is set to true since all ROCR hardware supports this feature.
For NVCC path concurrentKernels is obtained from CUDA's device property cudaDeviceProp::concurrentKernels.
[ROCm/clr commit: 4d4ca3ef3f]
Этот коммит содержится в:
@@ -93,6 +93,7 @@ typedef struct hipDeviceProp_t {
|
||||
int clockInstructionRate ; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
|
||||
|
||||
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
||||
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently
|
||||
} hipDeviceProp_t;
|
||||
|
||||
|
||||
|
||||
@@ -207,6 +207,7 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev
|
||||
p_prop->arch.has3dGrid = (ccVers >= 200);
|
||||
p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
|
||||
|
||||
p_prop->concurrentKernels = cdprop.concurrentKernels;
|
||||
|
||||
return hipCUDAErrorTohipError(cerror);
|
||||
}
|
||||
|
||||
@@ -383,6 +383,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
prop->arch.has3dGrid = 1;
|
||||
prop->arch.hasDynamicParallelism = 0;
|
||||
|
||||
prop->concurrentKernels = 1; // All ROCR hardware supports executing multiple kernels concurrently
|
||||
return e;
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user