P4 to Git Change 1193228 by xcui@merged_opencl_jxcwin on 2015/09/22 18:52:47

SWDEV-59579 - resubmit the changelist 1193161. refactory the Coare-grained SVM and  fine grain buffer  SVM code path, so that if the device SVM running on supports fine grain system, then the SVM API operation will be on system memory, no need to go through GPU backend. In addition, added support for PX system with CZ on windows 10, which  supports SVM fine grain system.

	code review:
	http://ocltc.amd.com/reviews/r/8530/
	precheckin:
	http://ocltc.amd.com:8111/viewModification.html?modId=58913&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#527 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#152 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#382 edit


[ROCm/clr commit: a3074a2a8f]
This commit is contained in:
foreman
2015-09-22 18:59:36 -04:00
szülő cf3a9d00c7
commit 42be07afce
3 fájl változott, egészen pontosan 117 új sor hozzáadva és 75 régi sor törölve
@@ -2198,7 +2198,13 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
size = amd::alignUp(size, alignment);
amd::Memory* mem = NULL;
freeCPUMem_ = false;
if (NULL == svmPtr) {
if (isFineGrainedSystem()) {
freeCPUMem_ = true;
return amd::Os::alignedMalloc(size, alignment);
}
//create a hidden buffer, which will allocated on the device later
mem = new (context)amd::Buffer(context, flags, size, reinterpret_cast<void*>(1));
if (mem == NULL) {
@@ -2211,10 +2217,12 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
mem->release();
return NULL;
}
//if the device supports SVM FGS, return the committed CPU address directly.
gpu::Memory* gpuMem = getGpuMemory(mem);
//add the information to context so that we can use it later.
amd::SvmManager::AddSvmBuffer(mem->getSvmPtr(), mem);
svmPtr = mem->getSvmPtr();
}
else {
//find the existing amd::mem object
@@ -2222,20 +2230,31 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
if (NULL == mem) {
return NULL;
}
gpu::Memory* gpuMem = getGpuMemory(mem);
//commit the CPU memory for FGS device.
if (isFineGrainedSystem()) {
mem->commitSvmMemory();
}
else {
gpu::Memory* gpuMem = getGpuMemory(mem);
}
svmPtr = mem->getSvmPtr();
}
return mem->getSvmPtr();
return svmPtr;
}
void
Device::svmFree(void *ptr) const
{
amd::Memory * svmMem = NULL;
svmMem = amd::SvmManager::FindSvmBuffer(ptr);
if (NULL != svmMem) {
svmMem->release();
amd::SvmManager::RemoveSvmBuffer(ptr);
if (freeCPUMem_) {
amd::Os::alignedFree(ptr);
}
else {
amd::Memory * svmMem = NULL;
svmMem = amd::SvmManager::FindSvmBuffer(ptr);
if (NULL != svmMem) {
svmMem->release();
amd::SvmManager::RemoveSvmBuffer(ptr);
}
}
}
@@ -623,6 +623,7 @@ private:
SrdManager* srdManager_; //!< SRD manager object
static AppProfile appProfile_; //!< application profile
mutable bool freeCPUMem_; //!< flag to mark GPU free SVM CPU mem
};
/*@}*/} // namespace gpu
@@ -1013,35 +1013,42 @@ VirtualGPU::submitSvmCopyMemory(amd::SvmCopyMemoryCommand& vcmd)
profilingBegin(vcmd);
cl_command_type type = vcmd.type();
amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
if (NULL == srcMem || NULL == dstMem) {
vcmd.setStatus(CL_INVALID_OPERATION);
return;
//no op for FGS supported device
if (!dev().isFineGrainedSystem()) {
amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
if (NULL == srcMem || NULL == dstMem) {
vcmd.setStatus(CL_INVALID_OPERATION);
return;
}
amd::Coord3D srcOrigin(0, 0, 0);
amd::Coord3D dstOrigin(0, 0, 0);
amd::Coord3D size(vcmd.srcSize(), 1, 1);
amd::BufferRect srcRect;
amd::BufferRect dstRect;
srcOrigin.c[0] = static_cast<const_address>(vcmd.src()) - static_cast<address>(srcMem->getSvmPtr());
dstOrigin.c[0] = static_cast<const_address>(vcmd.dst()) - static_cast<address>(dstMem->getSvmPtr());
if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
vcmd.setStatus(CL_INVALID_OPERATION);
return;
}
bool entire = srcMem->isEntirelyCovered(srcOrigin, size) &&
dstMem->isEntirelyCovered(dstOrigin, size);
if (!copyMemory(type, *srcMem, *dstMem, entire,
srcOrigin, dstOrigin, size, srcRect, dstRect)) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
}
amd::Coord3D srcOrigin(0, 0, 0);
amd::Coord3D dstOrigin(0, 0, 0);
amd::Coord3D size(vcmd.srcSize(), 1, 1);
amd::BufferRect srcRect;
amd::BufferRect dstRect;
srcOrigin.c[0] = static_cast<const_address>(vcmd.src()) - static_cast<address>(srcMem->getSvmPtr());
dstOrigin.c[0] = static_cast<const_address>(vcmd.dst()) - static_cast<address>(dstMem->getSvmPtr());
if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
vcmd.setStatus(CL_INVALID_OPERATION);
return;
else {
//direct memcpy for FGS enabled system
amd::SvmBuffer::memFill(vcmd.dst(), vcmd.src(), vcmd.srcSize(), 1);
}
bool entire = srcMem->isEntirelyCovered(srcOrigin, size) &&
dstMem->isEntirelyCovered(dstOrigin, size);
if (!copyMemory(type, *srcMem, *dstMem, entire,
srcOrigin, dstOrigin, size, srcRect, dstRect)) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
profilingEnd(vcmd);
}
@@ -1353,25 +1360,28 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)
profilingBegin(vcmd, true);
// Make sure we have memory for the command execution
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
//no op for FGS supported device
if (!dev().isFineGrainedSystem()) {
// Make sure we have memory for the command execution
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
memory->saveMapInfo(vcmd.origin(), vcmd.size(),
vcmd.mapFlags(), vcmd.isEntireMemory());
if (memory->mapMemory() != NULL) {
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
amd::Coord3D dstOrigin(0, 0, 0);
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
if (memory->mapMemory() != NULL) {
if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
amd::Coord3D dstOrigin(0, 0, 0);
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
LogError("submitSVMMapMemory() - copy failed");
vcmd.setStatus(CL_MAP_FAILURE);
}
}
}
}
else {
LogError("Unhandled svm map!");
else {
LogError("Unhandled svm map!");
}
}
profilingEnd(vcmd);
@@ -1384,18 +1394,21 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
amd::ScopedLock lock(execution());
profilingBegin(vcmd, true);
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
//no op for FGS supported device
if (!dev().isFineGrainedSystem()) {
if (memory->mapMemory() != NULL) {
if (memory->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
memory->writeMapInfo()->entire_)) {
LogError("submitSvmUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
if (memory->mapMemory() != NULL) {
if (memory->isUnmapWrite()) {
amd::Coord3D srcOrigin(0, 0, 0);
// Target is a remote resource, so copy
assert(memory->cal()->buffer_ && "SVM memory can't be an image");
if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
memory->writeMapInfo()->entire_)) {
LogError("submitSvmUnmapMemory() - copy failed");
vcmd.setStatus(CL_OUT_OF_RESOURCES);
}
}
}
}
@@ -1411,23 +1424,32 @@ VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd)
profilingBegin(vcmd, true);
amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
assert(dstMemory&&"No svm Buffer to fill with!");
size_t offset = reinterpret_cast<uintptr_t>(vcmd.dst())
- reinterpret_cast<uintptr_t>(dstMemory->getSvmPtr());
assert((offset >= 0)&&"wrong svm ptr to fill with!");
if (!dev().isFineGrainedSystem()) {
size_t patternSize = vcmd.patternSize();
size_t fillSize = patternSize * vcmd.times();
size_t offset = 0;
amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
assert(dstMemory&&"No svm Buffer to fill with!");
offset = reinterpret_cast<uintptr_t>(vcmd.dst())
- reinterpret_cast<uintptr_t>(dstMemory->getSvmPtr());
assert((offset >= 0) && "wrong svm ptr to fill with!");
gpu::Memory* memory = dev().getGpuMemory(dstMemory);
size_t fillSize = vcmd.patternSize() * vcmd.times();
gpu::Memory* memory = dev().getGpuMemory(dstMemory);
amd::Coord3D origin(offset, 0, 0);
amd::Coord3D size(fillSize, 1, 1);
assert((dstMemory->validateRegion(origin, size))&&"The incorrect fill size!");
amd::Coord3D origin(offset, 0, 0);
amd::Coord3D size(fillSize, 1, 1);
assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
vcmd.patternSize(), origin, size)) {
vcmd.setStatus(CL_INVALID_OPERATION);
if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
vcmd.patternSize(), origin, size)) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
}
else {
// for FGS capable device, fill CPU memory directly
amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times());
}
profilingEnd(vcmd);
}