P4 to Git Change 1496286 by gandryey@gera-w8 on 2017/12/19 13:24:13

SWDEV-140401 - [CQE OCL][PAL][Vega10] Observed ~ 80 % performance drop while running GE_HealthCare - PenalityTest
	- Port the logic to consider cache line size for the local workgroup detection.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#41 edit
Этот коммит содержится в:
foreman
2017-12-19 13:29:53 -05:00
родитель 1682b022fc
Коммит 5994573b3f
+28 -9
Просмотреть файл
@@ -813,10 +813,15 @@ void HSAILKernel::findLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkS
tmp /= div;
}
// Assuming DWORD access
const uint cacheLineMatch = dev().settings().cacheLineSize_ >> 2;
// Check if partial dispatch is enabled and
if (dev().settings().partialDispatch_ &&
// we couldn't find optimal workload
(lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) {
// we couldn't find optimal workload
(((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
// or size is too small for the cache line
(lclWorkSize[0] < cacheLineMatch))) {
size_t maxSize = 0;
size_t maxDim = 0;
for (uint d = 0; d < workDim; ++d) {
@@ -825,14 +830,28 @@ void HSAILKernel::findLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkS
maxDim = d;
}
}
// Check if a local workgroup has the most optimal size
if (thrPerGrp > maxSize) {
thrPerGrp = maxSize;
// Use X dimension as high priority. Runtime will assume that
// X dimension is more important for the address calculation
if ((maxDim != 0) && (gblWorkSize[0] >= (cacheLineMatch / 2))) {
lclWorkSize[0] = cacheLineMatch;
thrPerGrp /= cacheLineMatch;
lclWorkSize[maxDim] = thrPerGrp;
for (uint d = 1; d < workDim; ++d) {
if (d != maxDim) {
lclWorkSize[d] = 1;
}
}
}
lclWorkSize[maxDim] = thrPerGrp;
for (uint d = 0; d < workDim; ++d) {
if (d != maxDim) {
lclWorkSize[d] = 1;
else {
// Check if a local workgroup has the most optimal size
if (thrPerGrp > maxSize) {
thrPerGrp = maxSize;
}
lclWorkSize[maxDim] = thrPerGrp;
for (uint d = 0; d < workDim; ++d) {
if (d != maxDim) {
lclWorkSize[d] = 1;
}
}
}
}