P4 to Git Change 1053379 by xcui@merged_opencl_jxcwin on 2014/07/08 19:38:01

EPR #304775 - fixed the bug 9838. The svm pointer in the arugment list needs to be tracked to make sure all operation of resource has been finished before we dispatch kernel. code review: http://ocltc.amd.com/reviews/r/5200/ precheckin: http://ocltc.amd.com:8111/viewModification.html?modId=35125&personal=true&buildTypeId=&tab=vcsModificationTests Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#257 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#322 edit
2014-07-08 19:45:31 -04:00
@@ -3850,6 +3850,7 @@ HSAILKernel::loadArguments(
                    mem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast<void* const*>(paramaddr));
                    if (mem != NULL) {
                        gpuMem = dev().getGpuMemory(mem);
+                        gpuMem->wait(gpu, WaitOnBusyEngine);
                        memList.push_back(gpuMem);
                    }
                    else {
@@ -3861,10 +3862,10 @@ HSAILKernel::loadArguments(
                    gpuMem = *reinterpret_cast<Memory* const*>(paramaddr);
                }
                else {
-                    mem = *reinterpret_cast<amd::Memory* const*>(paramaddr);
-                    if (mem != NULL) {
-                        gpuMem = dev().getGpuMemory(mem);
-                    }
+                        mem = *reinterpret_cast<amd::Memory* const*>(paramaddr);
+                        if (mem != NULL) {
+                             gpuMem = dev().getGpuMemory(mem);
+                        }
                }
                if (gpuMem == NULL) {
                    WriteAqlArg(&aqlArgBuf, &gpuMem, sizeof(void*));
@@ -3280,13 +3280,18 @@ VirtualGPU::processMemObjectsHSA(
        const HSAILKernel::Argument*  arg = hsaKernel.argument(i);
        Memory* memory = NULL;
        bool    readOnly = false;
+        amd::Memory* svmMem = NULL;

        // Find if current argument is a buffer
        if ((desc.type_ == T_POINTER) && (arg->addrQual_ != HSAIL_ADDRESS_LOCAL)) {
            if (kernelParams.boundToSvmPointer(dev(), params, i)) {
-                //!\todo Do we have to sync cache coherency or wait for SDMA?
-                flushL1Cache();
-                break;
+                svmMem = amd::SvmManager::FindSvmBuffer(
+                    *reinterpret_cast<void* const*>(params + desc.offset_));
+                if (!svmMem) {
+                    //!\todo Do we have to sync cache coherency or wait for SDMA?
+                    flushL1Cache();
+                    break;
+                }
            }

            if (nativeMem) {
@@ -3294,8 +3299,6 @@ VirtualGPU::processMemObjectsHSA(
            }
            else if (*reinterpret_cast<amd::Memory* const*>
                    (params + desc.offset_) != NULL) {
-                amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(
-                    *reinterpret_cast<void* const*>(params + desc.offset_));
                if (NULL == svmMem) {
                    memory = dev().getGpuMemory(*reinterpret_cast<amd::Memory* const*>
                            (params + desc.offset_));