From af0bc54257a019817eaa4679466aaef73a822619 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 8 Jul 2014 19:45:31 -0400 Subject: [PATCH] P4 to Git Change 1053379 by xcui@merged_opencl_jxcwin on 2014/07/08 19:38:01 EPR #304775 - fixed the bug 9838. The svm pointer in the arugment list needs to be tracked to make sure all operation of resource has been finished before we dispatch kernel. code review: http://ocltc.amd.com/reviews/r/5200/ precheckin: http://ocltc.amd.com:8111/viewModification.html?modId=35125&personal=true&buildTypeId=&tab=vcsModificationTests Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#257 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#322 edit [ROCm/clr commit: bf32cddc031cb32728ec3ed37d061a0d7e6011d9] --- .../clr/rocclr/runtime/device/gpu/gpukernel.cpp | 9 +++++---- .../clr/rocclr/runtime/device/gpu/gpuvirtual.cpp | 13 ++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index 46a11f5a01..780d458ba9 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -3850,6 +3850,7 @@ HSAILKernel::loadArguments( mem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast(paramaddr)); if (mem != NULL) { gpuMem = dev().getGpuMemory(mem); + gpuMem->wait(gpu, WaitOnBusyEngine); memList.push_back(gpuMem); } else { @@ -3861,10 +3862,10 @@ HSAILKernel::loadArguments( gpuMem = *reinterpret_cast(paramaddr); } else { - mem = *reinterpret_cast(paramaddr); - if (mem != NULL) { - gpuMem = dev().getGpuMemory(mem); - } + mem = *reinterpret_cast(paramaddr); + if (mem != NULL) { + gpuMem = dev().getGpuMemory(mem); + } } if (gpuMem == NULL) { WriteAqlArg(&aqlArgBuf, &gpuMem, sizeof(void*)); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 9a4497be2c..dcb73b18a8 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -3280,13 +3280,18 @@ VirtualGPU::processMemObjectsHSA( const HSAILKernel::Argument* arg = hsaKernel.argument(i); Memory* memory = NULL; bool readOnly = false; + amd::Memory* svmMem = NULL; // Find if current argument is a buffer if ((desc.type_ == T_POINTER) && (arg->addrQual_ != HSAIL_ADDRESS_LOCAL)) { if (kernelParams.boundToSvmPointer(dev(), params, i)) { - //!\todo Do we have to sync cache coherency or wait for SDMA? - flushL1Cache(); - break; + svmMem = amd::SvmManager::FindSvmBuffer( + *reinterpret_cast(params + desc.offset_)); + if (!svmMem) { + //!\todo Do we have to sync cache coherency or wait for SDMA? + flushL1Cache(); + break; + } } if (nativeMem) { @@ -3294,8 +3299,6 @@ VirtualGPU::processMemObjectsHSA( } else if (*reinterpret_cast (params + desc.offset_) != NULL) { - amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer( - *reinterpret_cast(params + desc.offset_)); if (NULL == svmMem) { memory = dev().getGpuMemory(*reinterpret_cast (params + desc.offset_));