diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index f7181fcee0..724d350d57 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -2198,7 +2198,13 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me size = amd::alignUp(size, alignment); amd::Memory* mem = NULL; + freeCPUMem_ = false; if (NULL == svmPtr) { + if (isFineGrainedSystem()) { + freeCPUMem_ = true; + return amd::Os::alignedMalloc(size, alignment); + } + //create a hidden buffer, which will allocated on the device later mem = new (context)amd::Buffer(context, flags, size, reinterpret_cast(1)); if (mem == NULL) { @@ -2211,10 +2217,12 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me mem->release(); return NULL; } + //if the device supports SVM FGS, return the committed CPU address directly. gpu::Memory* gpuMem = getGpuMemory(mem); + //add the information to context so that we can use it later. amd::SvmManager::AddSvmBuffer(mem->getSvmPtr(), mem); - + svmPtr = mem->getSvmPtr(); } else { //find the existing amd::mem object @@ -2222,20 +2230,31 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me if (NULL == mem) { return NULL; } - gpu::Memory* gpuMem = getGpuMemory(mem); + //commit the CPU memory for FGS device. + if (isFineGrainedSystem()) { + mem->commitSvmMemory(); + } + else { + gpu::Memory* gpuMem = getGpuMemory(mem); + } + svmPtr = mem->getSvmPtr(); } - - return mem->getSvmPtr(); + return svmPtr; } void Device::svmFree(void *ptr) const { - amd::Memory * svmMem = NULL; - svmMem = amd::SvmManager::FindSvmBuffer(ptr); - if (NULL != svmMem) { - svmMem->release(); - amd::SvmManager::RemoveSvmBuffer(ptr); + if (freeCPUMem_) { + amd::Os::alignedFree(ptr); + } + else { + amd::Memory * svmMem = NULL; + svmMem = amd::SvmManager::FindSvmBuffer(ptr); + if (NULL != svmMem) { + svmMem->release(); + amd::SvmManager::RemoveSvmBuffer(ptr); + } } } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp index 9a7c43853a..d4ac52cd5a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp @@ -623,6 +623,7 @@ private: SrdManager* srdManager_; //!< SRD manager object static AppProfile appProfile_; //!< application profile + mutable bool freeCPUMem_; //!< flag to mark GPU free SVM CPU mem }; /*@}*/} // namespace gpu diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 6cc3eae985..f4e9012945 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -1013,35 +1013,42 @@ VirtualGPU::submitSvmCopyMemory(amd::SvmCopyMemoryCommand& vcmd) profilingBegin(vcmd); cl_command_type type = vcmd.type(); - amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src()); - amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst()); - if (NULL == srcMem || NULL == dstMem) { - vcmd.setStatus(CL_INVALID_OPERATION); - return; + //no op for FGS supported device + if (!dev().isFineGrainedSystem()) { + + amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src()); + amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst()); + if (NULL == srcMem || NULL == dstMem) { + vcmd.setStatus(CL_INVALID_OPERATION); + return; + } + + amd::Coord3D srcOrigin(0, 0, 0); + amd::Coord3D dstOrigin(0, 0, 0); + amd::Coord3D size(vcmd.srcSize(), 1, 1); + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + srcOrigin.c[0] = static_cast(vcmd.src()) - static_cast
(srcMem->getSvmPtr()); + dstOrigin.c[0] = static_cast(vcmd.dst()) - static_cast
(dstMem->getSvmPtr()); + + if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) { + vcmd.setStatus(CL_INVALID_OPERATION); + return; + } + + bool entire = srcMem->isEntirelyCovered(srcOrigin, size) && + dstMem->isEntirelyCovered(dstOrigin, size); + + if (!copyMemory(type, *srcMem, *dstMem, entire, + srcOrigin, dstOrigin, size, srcRect, dstRect)) { + vcmd.setStatus(CL_INVALID_OPERATION); + } } - - amd::Coord3D srcOrigin(0, 0, 0); - amd::Coord3D dstOrigin(0, 0, 0); - amd::Coord3D size(vcmd.srcSize(), 1, 1); - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - srcOrigin.c[0] = static_cast(vcmd.src()) - static_cast
(srcMem->getSvmPtr()); - dstOrigin.c[0] = static_cast(vcmd.dst()) - static_cast
(dstMem->getSvmPtr()); - - if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) { - vcmd.setStatus(CL_INVALID_OPERATION); - return; + else { + //direct memcpy for FGS enabled system + amd::SvmBuffer::memFill(vcmd.dst(), vcmd.src(), vcmd.srcSize(), 1); } - - bool entire = srcMem->isEntirelyCovered(srcOrigin, size) && - dstMem->isEntirelyCovered(dstOrigin, size); - - if (!copyMemory(type, *srcMem, *dstMem, entire, - srcOrigin, dstOrigin, size, srcRect, dstRect)) { - vcmd.setStatus(CL_INVALID_OPERATION); - } - profilingEnd(vcmd); } @@ -1353,25 +1360,28 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd) profilingBegin(vcmd, true); - // Make sure we have memory for the command execution - gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); + //no op for FGS supported device + if (!dev().isFineGrainedSystem()) { + // Make sure we have memory for the command execution + gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); - memory->saveMapInfo(vcmd.origin(), vcmd.size(), - vcmd.mapFlags(), vcmd.isEntireMemory()); + memory->saveMapInfo(vcmd.origin(), vcmd.size(), + vcmd.mapFlags(), vcmd.isEntireMemory()); - if (memory->mapMemory() != NULL) { - if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) { - amd::Coord3D dstOrigin(0, 0, 0); - assert(memory->cal()->buffer_ && "SVM memory can't be an image"); - if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(), - vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) { - LogError("submitSVMMapMemory() - copy failed"); - vcmd.setStatus(CL_MAP_FAILURE); + if (memory->mapMemory() != NULL) { + if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) { + amd::Coord3D dstOrigin(0, 0, 0); + assert(memory->cal()->buffer_ && "SVM memory can't be an image"); + if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(), + vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) { + LogError("submitSVMMapMemory() - copy failed"); + vcmd.setStatus(CL_MAP_FAILURE); + } } } - } - else { - LogError("Unhandled svm map!"); + else { + LogError("Unhandled svm map!"); + } } profilingEnd(vcmd); @@ -1384,18 +1394,21 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd) amd::ScopedLock lock(execution()); profilingBegin(vcmd, true); - gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); + //no op for FGS supported device + if (!dev().isFineGrainedSystem()) { - if (memory->mapMemory() != NULL) { - if (memory->isUnmapWrite()) { - amd::Coord3D srcOrigin(0, 0, 0); - // Target is a remote resource, so copy - assert(memory->cal()->buffer_ && "SVM memory can't be an image"); - if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin, - memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_, - memory->writeMapInfo()->entire_)) { - LogError("submitSvmUnmapMemory() - copy failed"); - vcmd.setStatus(CL_OUT_OF_RESOURCES); + gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem()); + if (memory->mapMemory() != NULL) { + if (memory->isUnmapWrite()) { + amd::Coord3D srcOrigin(0, 0, 0); + // Target is a remote resource, so copy + assert(memory->cal()->buffer_ && "SVM memory can't be an image"); + if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin, + memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_, + memory->writeMapInfo()->entire_)) { + LogError("submitSvmUnmapMemory() - copy failed"); + vcmd.setStatus(CL_OUT_OF_RESOURCES); + } } } } @@ -1411,23 +1424,32 @@ VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd) profilingBegin(vcmd, true); - amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst()); - assert(dstMemory&&"No svm Buffer to fill with!"); - size_t offset = reinterpret_cast(vcmd.dst()) - - reinterpret_cast(dstMemory->getSvmPtr()); - assert((offset >= 0)&&"wrong svm ptr to fill with!"); + if (!dev().isFineGrainedSystem()) { + size_t patternSize = vcmd.patternSize(); + size_t fillSize = patternSize * vcmd.times(); + size_t offset = 0; + amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst()); + assert(dstMemory&&"No svm Buffer to fill with!"); + offset = reinterpret_cast(vcmd.dst()) + - reinterpret_cast(dstMemory->getSvmPtr()); + assert((offset >= 0) && "wrong svm ptr to fill with!"); - gpu::Memory* memory = dev().getGpuMemory(dstMemory); - size_t fillSize = vcmd.patternSize() * vcmd.times(); + gpu::Memory* memory = dev().getGpuMemory(dstMemory); - amd::Coord3D origin(offset, 0, 0); - amd::Coord3D size(fillSize, 1, 1); - assert((dstMemory->validateRegion(origin, size))&&"The incorrect fill size!"); + amd::Coord3D origin(offset, 0, 0); + amd::Coord3D size(fillSize, 1, 1); + assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!"); - if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(), - vcmd.patternSize(), origin, size)) { - vcmd.setStatus(CL_INVALID_OPERATION); + if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(), + vcmd.patternSize(), origin, size)) { + vcmd.setStatus(CL_INVALID_OPERATION); + } } + else { + // for FGS capable device, fill CPU memory directly + amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times()); + } + profilingEnd(vcmd); }