P4 to Git Change 1496286 by gandryey@gera-w8 on 2017/12/19 13:24:13
SWDEV-140401 - [CQE OCL][PAL][Vega10] Observed ~ 80 % performance drop while running GE_HealthCare - PenalityTest - Port the logic to consider cache line size for the local workgroup detection. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#41 edit
Этот коммит содержится в:
@@ -813,10 +813,15 @@ void HSAILKernel::findLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkS
|
||||
tmp /= div;
|
||||
}
|
||||
|
||||
// Assuming DWORD access
|
||||
const uint cacheLineMatch = dev().settings().cacheLineSize_ >> 2;
|
||||
|
||||
// Check if partial dispatch is enabled and
|
||||
if (dev().settings().partialDispatch_ &&
|
||||
// we couldn't find optimal workload
|
||||
(lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) {
|
||||
// we couldn't find optimal workload
|
||||
(((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
|
||||
// or size is too small for the cache line
|
||||
(lclWorkSize[0] < cacheLineMatch))) {
|
||||
size_t maxSize = 0;
|
||||
size_t maxDim = 0;
|
||||
for (uint d = 0; d < workDim; ++d) {
|
||||
@@ -825,14 +830,28 @@ void HSAILKernel::findLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkS
|
||||
maxDim = d;
|
||||
}
|
||||
}
|
||||
// Check if a local workgroup has the most optimal size
|
||||
if (thrPerGrp > maxSize) {
|
||||
thrPerGrp = maxSize;
|
||||
// Use X dimension as high priority. Runtime will assume that
|
||||
// X dimension is more important for the address calculation
|
||||
if ((maxDim != 0) && (gblWorkSize[0] >= (cacheLineMatch / 2))) {
|
||||
lclWorkSize[0] = cacheLineMatch;
|
||||
thrPerGrp /= cacheLineMatch;
|
||||
lclWorkSize[maxDim] = thrPerGrp;
|
||||
for (uint d = 1; d < workDim; ++d) {
|
||||
if (d != maxDim) {
|
||||
lclWorkSize[d] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
lclWorkSize[maxDim] = thrPerGrp;
|
||||
for (uint d = 0; d < workDim; ++d) {
|
||||
if (d != maxDim) {
|
||||
lclWorkSize[d] = 1;
|
||||
else {
|
||||
// Check if a local workgroup has the most optimal size
|
||||
if (thrPerGrp > maxSize) {
|
||||
thrPerGrp = maxSize;
|
||||
}
|
||||
lclWorkSize[maxDim] = thrPerGrp;
|
||||
for (uint d = 0; d < workDim; ++d) {
|
||||
if (d != maxDim) {
|
||||
lclWorkSize[d] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user