From 42be07afceb119c4440bb67e36618538832ee3ee Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 22 Sep 2015 18:59:36 -0400
Subject: [PATCH] P4 to Git Change 1193228 by xcui@merged_opencl_jxcwin on
2015/09/22 18:52:47
SWDEV-59579 - resubmit the changelist 1193161. refactory the Coare-grained SVM and fine grain buffer SVM code path, so that if the device SVM running on supports fine grain system, then the SVM API operation will be on system memory, no need to go through GPU backend. In addition, added support for PX system with CZ on windows 10, which supports SVM fine grain system.
code review:
http://ocltc.amd.com/reviews/r/8530/
precheckin:
http://ocltc.amd.com:8111/viewModification.html?modId=58913&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#527 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#152 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#382 edit
[ROCm/clr commit: a3074a2a8fc88e04a05d7a77d8cc1be39843d0c1]
---
.../rocclr/runtime/device/gpu/gpudevice.cpp | 37 ++++-
.../rocclr/runtime/device/gpu/gpudevice.hpp | 1 +
.../rocclr/runtime/device/gpu/gpuvirtual.cpp | 154 ++++++++++--------
3 files changed, 117 insertions(+), 75 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index f7181fcee0..724d350d57 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -2198,7 +2198,13 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
size = amd::alignUp(size, alignment);
amd::Memory* mem = NULL;
+ freeCPUMem_ = false;
if (NULL == svmPtr) {
+ if (isFineGrainedSystem()) {
+ freeCPUMem_ = true;
+ return amd::Os::alignedMalloc(size, alignment);
+ }
+
//create a hidden buffer, which will allocated on the device later
mem = new (context)amd::Buffer(context, flags, size, reinterpret_cast(1));
if (mem == NULL) {
@@ -2211,10 +2217,12 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
mem->release();
return NULL;
}
+ //if the device supports SVM FGS, return the committed CPU address directly.
gpu::Memory* gpuMem = getGpuMemory(mem);
+
//add the information to context so that we can use it later.
amd::SvmManager::AddSvmBuffer(mem->getSvmPtr(), mem);
-
+ svmPtr = mem->getSvmPtr();
}
else {
//find the existing amd::mem object
@@ -2222,20 +2230,31 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
if (NULL == mem) {
return NULL;
}
- gpu::Memory* gpuMem = getGpuMemory(mem);
+ //commit the CPU memory for FGS device.
+ if (isFineGrainedSystem()) {
+ mem->commitSvmMemory();
+ }
+ else {
+ gpu::Memory* gpuMem = getGpuMemory(mem);
+ }
+ svmPtr = mem->getSvmPtr();
}
-
- return mem->getSvmPtr();
+ return svmPtr;
}
void
Device::svmFree(void *ptr) const
{
- amd::Memory * svmMem = NULL;
- svmMem = amd::SvmManager::FindSvmBuffer(ptr);
- if (NULL != svmMem) {
- svmMem->release();
- amd::SvmManager::RemoveSvmBuffer(ptr);
+ if (freeCPUMem_) {
+ amd::Os::alignedFree(ptr);
+ }
+ else {
+ amd::Memory * svmMem = NULL;
+ svmMem = amd::SvmManager::FindSvmBuffer(ptr);
+ if (NULL != svmMem) {
+ svmMem->release();
+ amd::SvmManager::RemoveSvmBuffer(ptr);
+ }
}
}
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
index 9a7c43853a..d4ac52cd5a 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -623,6 +623,7 @@ private:
SrdManager* srdManager_; //!< SRD manager object
static AppProfile appProfile_; //!< application profile
+ mutable bool freeCPUMem_; //!< flag to mark GPU free SVM CPU mem
};
/*@}*/} // namespace gpu
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 6cc3eae985..f4e9012945 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -1013,35 +1013,42 @@ VirtualGPU::submitSvmCopyMemory(amd::SvmCopyMemoryCommand& vcmd)
profilingBegin(vcmd);
cl_command_type type = vcmd.type();
- amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
- amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
- if (NULL == srcMem || NULL == dstMem) {
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
+ //no op for FGS supported device
+ if (!dev().isFineGrainedSystem()) {
+
+ amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
+ amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
+ if (NULL == srcMem || NULL == dstMem) {
+ vcmd.setStatus(CL_INVALID_OPERATION);
+ return;
+ }
+
+ amd::Coord3D srcOrigin(0, 0, 0);
+ amd::Coord3D dstOrigin(0, 0, 0);
+ amd::Coord3D size(vcmd.srcSize(), 1, 1);
+ amd::BufferRect srcRect;
+ amd::BufferRect dstRect;
+
+ srcOrigin.c[0] = static_cast(vcmd.src()) - static_cast(srcMem->getSvmPtr());
+ dstOrigin.c[0] = static_cast(vcmd.dst()) - static_cast(dstMem->getSvmPtr());
+
+ if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
+ vcmd.setStatus(CL_INVALID_OPERATION);
+ return;
+ }
+
+ bool entire = srcMem->isEntirelyCovered(srcOrigin, size) &&
+ dstMem->isEntirelyCovered(dstOrigin, size);
+
+ if (!copyMemory(type, *srcMem, *dstMem, entire,
+ srcOrigin, dstOrigin, size, srcRect, dstRect)) {
+ vcmd.setStatus(CL_INVALID_OPERATION);
+ }
}
-
- amd::Coord3D srcOrigin(0, 0, 0);
- amd::Coord3D dstOrigin(0, 0, 0);
- amd::Coord3D size(vcmd.srcSize(), 1, 1);
- amd::BufferRect srcRect;
- amd::BufferRect dstRect;
-
- srcOrigin.c[0] = static_cast(vcmd.src()) - static_cast(srcMem->getSvmPtr());
- dstOrigin.c[0] = static_cast(vcmd.dst()) - static_cast(dstMem->getSvmPtr());
-
- if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
+ else {
+ //direct memcpy for FGS enabled system
+ amd::SvmBuffer::memFill(vcmd.dst(), vcmd.src(), vcmd.srcSize(), 1);
}
-
- bool entire = srcMem->isEntirelyCovered(srcOrigin, size) &&
- dstMem->isEntirelyCovered(dstOrigin, size);
-
- if (!copyMemory(type, *srcMem, *dstMem, entire,
- srcOrigin, dstOrigin, size, srcRect, dstRect)) {
- vcmd.setStatus(CL_INVALID_OPERATION);
- }
-
profilingEnd(vcmd);
}
@@ -1353,25 +1360,28 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
profilingBegin(vcmd, true);
- // Make sure we have memory for the command execution
- gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+ //no op for FGS supported device
+ if (!dev().isFineGrainedSystem()) {
+ // Make sure we have memory for the command execution
+ gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
- memory->saveMapInfo(vcmd.origin(), vcmd.size(),
- vcmd.mapFlags(), vcmd.isEntireMemory());
+ memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+ vcmd.mapFlags(), vcmd.isEntireMemory());
- if (memory->mapMemory() != NULL) {
- if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
- amd::Coord3D dstOrigin(0, 0, 0);
- assert(memory->cal()->buffer_ && "SVM memory can't be an image");
- if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
- vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
- LogError("submitSVMMapMemory() - copy failed");
- vcmd.setStatus(CL_MAP_FAILURE);
+ if (memory->mapMemory() != NULL) {
+ if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
+ amd::Coord3D dstOrigin(0, 0, 0);
+ assert(memory->cal()->buffer_ && "SVM memory can't be an image");
+ if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
+ vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
+ LogError("submitSVMMapMemory() - copy failed");
+ vcmd.setStatus(CL_MAP_FAILURE);
+ }
}
}
- }
- else {
- LogError("Unhandled svm map!");
+ else {
+ LogError("Unhandled svm map!");
+ }
}
profilingEnd(vcmd);
@@ -1384,18 +1394,21 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
amd::ScopedLock lock(execution());
profilingBegin(vcmd, true);
- gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+ //no op for FGS supported device
+ if (!dev().isFineGrainedSystem()) {
- if (memory->mapMemory() != NULL) {
- if (memory->isUnmapWrite()) {
- amd::Coord3D srcOrigin(0, 0, 0);
- // Target is a remote resource, so copy
- assert(memory->cal()->buffer_ && "SVM memory can't be an image");
- if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
- memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
- memory->writeMapInfo()->entire_)) {
- LogError("submitSvmUnmapMemory() - copy failed");
- vcmd.setStatus(CL_OUT_OF_RESOURCES);
+ gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+ if (memory->mapMemory() != NULL) {
+ if (memory->isUnmapWrite()) {
+ amd::Coord3D srcOrigin(0, 0, 0);
+ // Target is a remote resource, so copy
+ assert(memory->cal()->buffer_ && "SVM memory can't be an image");
+ if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
+ memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
+ memory->writeMapInfo()->entire_)) {
+ LogError("submitSvmUnmapMemory() - copy failed");
+ vcmd.setStatus(CL_OUT_OF_RESOURCES);
+ }
}
}
}
@@ -1411,23 +1424,32 @@ VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd)
profilingBegin(vcmd, true);
- amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
- assert(dstMemory&&"No svm Buffer to fill with!");
- size_t offset = reinterpret_cast(vcmd.dst())
- - reinterpret_cast(dstMemory->getSvmPtr());
- assert((offset >= 0)&&"wrong svm ptr to fill with!");
+ if (!dev().isFineGrainedSystem()) {
+ size_t patternSize = vcmd.patternSize();
+ size_t fillSize = patternSize * vcmd.times();
+ size_t offset = 0;
+ amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
+ assert(dstMemory&&"No svm Buffer to fill with!");
+ offset = reinterpret_cast(vcmd.dst())
+ - reinterpret_cast(dstMemory->getSvmPtr());
+ assert((offset >= 0) && "wrong svm ptr to fill with!");
- gpu::Memory* memory = dev().getGpuMemory(dstMemory);
- size_t fillSize = vcmd.patternSize() * vcmd.times();
+ gpu::Memory* memory = dev().getGpuMemory(dstMemory);
- amd::Coord3D origin(offset, 0, 0);
- amd::Coord3D size(fillSize, 1, 1);
- assert((dstMemory->validateRegion(origin, size))&&"The incorrect fill size!");
+ amd::Coord3D origin(offset, 0, 0);
+ amd::Coord3D size(fillSize, 1, 1);
+ assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
- if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
- vcmd.patternSize(), origin, size)) {
- vcmd.setStatus(CL_INVALID_OPERATION);
+ if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
+ vcmd.patternSize(), origin, size)) {
+ vcmd.setStatus(CL_INVALID_OPERATION);
+ }
}
+ else {
+ // for FGS capable device, fill CPU memory directly
+ amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times());
+ }
+
profilingEnd(vcmd);
}