From bf32cddc031cb32728ec3ed37d061a0d7e6011d9 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 8 Jul 2014 19:45:31 -0400
Subject: [PATCH] P4 to Git Change 1053379 by xcui@merged_opencl_jxcwin on
2014/07/08 19:38:01
EPR #304775 - fixed the bug 9838. The svm pointer in the arugment list needs to be tracked to make sure all operation of resource has been finished before we dispatch kernel.
code review:
http://ocltc.amd.com/reviews/r/5200/
precheckin:
http://ocltc.amd.com:8111/viewModification.html?modId=35125&personal=true&buildTypeId=&tab=vcsModificationTests
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#257 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#322 edit
---
rocclr/runtime/device/gpu/gpukernel.cpp | 9 +++++----
rocclr/runtime/device/gpu/gpuvirtual.cpp | 13 ++++++++-----
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp
index 46a11f5a01..780d458ba9 100644
--- a/rocclr/runtime/device/gpu/gpukernel.cpp
+++ b/rocclr/runtime/device/gpu/gpukernel.cpp
@@ -3850,6 +3850,7 @@ HSAILKernel::loadArguments(
mem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast(paramaddr));
if (mem != NULL) {
gpuMem = dev().getGpuMemory(mem);
+ gpuMem->wait(gpu, WaitOnBusyEngine);
memList.push_back(gpuMem);
}
else {
@@ -3861,10 +3862,10 @@ HSAILKernel::loadArguments(
gpuMem = *reinterpret_cast(paramaddr);
}
else {
- mem = *reinterpret_cast(paramaddr);
- if (mem != NULL) {
- gpuMem = dev().getGpuMemory(mem);
- }
+ mem = *reinterpret_cast(paramaddr);
+ if (mem != NULL) {
+ gpuMem = dev().getGpuMemory(mem);
+ }
}
if (gpuMem == NULL) {
WriteAqlArg(&aqlArgBuf, &gpuMem, sizeof(void*));
diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 9a4497be2c..dcb73b18a8 100644
--- a/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -3280,13 +3280,18 @@ VirtualGPU::processMemObjectsHSA(
const HSAILKernel::Argument* arg = hsaKernel.argument(i);
Memory* memory = NULL;
bool readOnly = false;
+ amd::Memory* svmMem = NULL;
// Find if current argument is a buffer
if ((desc.type_ == T_POINTER) && (arg->addrQual_ != HSAIL_ADDRESS_LOCAL)) {
if (kernelParams.boundToSvmPointer(dev(), params, i)) {
- //!\todo Do we have to sync cache coherency or wait for SDMA?
- flushL1Cache();
- break;
+ svmMem = amd::SvmManager::FindSvmBuffer(
+ *reinterpret_cast(params + desc.offset_));
+ if (!svmMem) {
+ //!\todo Do we have to sync cache coherency or wait for SDMA?
+ flushL1Cache();
+ break;
+ }
}
if (nativeMem) {
@@ -3294,8 +3299,6 @@ VirtualGPU::processMemObjectsHSA(
}
else if (*reinterpret_cast
(params + desc.offset_) != NULL) {
- amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(
- *reinterpret_cast(params + desc.offset_));
if (NULL == svmMem) {
memory = dev().getGpuMemory(*reinterpret_cast
(params + desc.offset_));