Device property concurrentKernels is added to hipDeviceProp_t struct.

For HCC path concurrentKernels is set to true since all ROCR hardware supports this feature.
For NVCC path concurrentKernels is obtained from CUDA's device property cudaDeviceProp::concurrentKernels.


[ROCm/clr commit: 4d4ca3ef3f]
Этот коммит содержится в:
Evgeny Mankov
2016-02-09 17:10:35 +03:00
родитель 4df8743f84
Коммит 9f596e0aab
3 изменённых файлов: 3 добавлений и 0 удалений
+1
Просмотреть файл
@@ -93,6 +93,7 @@ typedef struct hipDeviceProp_t {
int clockInstructionRate ; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently
} hipDeviceProp_t;
+1
Просмотреть файл
@@ -207,6 +207,7 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev
p_prop->arch.has3dGrid = (ccVers >= 200);
p_prop->arch.hasDynamicParallelism = (ccVers >= 350);
p_prop->concurrentKernels = cdprop.concurrentKernels;
return hipCUDAErrorTohipError(cerror);
}
+1
Просмотреть файл
@@ -383,6 +383,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
prop->arch.has3dGrid = 1;
prop->arch.hasDynamicParallelism = 0;
prop->concurrentKernels = 1; // All ROCR hardware supports executing multiple kernels concurrently
return e;
}