P4 to Git Change 1193228 by xcui@merged_opencl_jxcwin on 2015/09/22 18:52:47

SWDEV-59579 - resubmit the changelist 1193161. refactory the Coare-grained SVM and fine grain buffer SVM code path, so that if the device SVM running on supports fine grain system, then the SVM API operation will be on system memory, no need to go through GPU backend. In addition, added support for PX system with CZ on windows 10, which supports SVM fine grain system. code review: http://ocltc.amd.com/reviews/r/8530/ precheckin: http://ocltc.amd.com:8111/viewModification.html?modId=58913&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#527 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#152 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#382 edit [ROCm/clr commit: a3074a2a8f]
2015-09-22 18:59:36 -04:00
commit 42be07afce
@@ -2198,7 +2198,13 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me

    size = amd::alignUp(size, alignment);
    amd::Memory* mem = NULL;
+    freeCPUMem_ = false;
    if (NULL == svmPtr) {
+        if (isFineGrainedSystem()) {
+            freeCPUMem_ = true;
+            return amd::Os::alignedMalloc(size, alignment);
+        }
+
        //create a hidden buffer, which will allocated on the device later
        mem = new (context)amd::Buffer(context, flags, size, reinterpret_cast<void*>(1));
        if (mem == NULL) {
@@ -2211,10 +2217,12 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
            mem->release();
            return NULL;
        }
+        //if the device supports SVM FGS, return the committed CPU address directly.
        gpu::Memory* gpuMem = getGpuMemory(mem);
+
        //add the information to context so that we can use it later.
        amd::SvmManager::AddSvmBuffer(mem->getSvmPtr(), mem);
-
+        svmPtr = mem->getSvmPtr();
    }
    else {
        //find the existing amd::mem object
@@ -2222,20 +2230,31 @@ Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_me
        if (NULL == mem) {
            return NULL;
        }
-        gpu::Memory* gpuMem = getGpuMemory(mem);
+        //commit the CPU memory for FGS device.
+        if (isFineGrainedSystem()) {
+            mem->commitSvmMemory();
+        }
+        else {
+            gpu::Memory* gpuMem = getGpuMemory(mem);
+        }
+        svmPtr = mem->getSvmPtr();
    }
-
-    return mem->getSvmPtr();
+    return svmPtr;
 }

 void
 Device::svmFree(void *ptr) const
 {
-    amd::Memory * svmMem = NULL;
-    svmMem = amd::SvmManager::FindSvmBuffer(ptr);
-    if (NULL != svmMem) {
-        svmMem->release();
-        amd::SvmManager::RemoveSvmBuffer(ptr);
+    if (freeCPUMem_) {
+        amd::Os::alignedFree(ptr);
+    }
+    else {
+        amd::Memory * svmMem = NULL;
+        svmMem = amd::SvmManager::FindSvmBuffer(ptr);
+        if (NULL != svmMem) {
+            svmMem->release();
+            amd::SvmManager::RemoveSvmBuffer(ptr);
+        }
    }
 }

@@ -623,6 +623,7 @@ private:
    SrdManager*     srdManager_;    //!< SRD manager object

    static AppProfile appProfile_; //!< application profile
+    mutable bool freeCPUMem_;              //!< flag to mark GPU free SVM CPU mem
 };

 /*@}*/} // namespace gpu
@@ -1013,35 +1013,42 @@ VirtualGPU::submitSvmCopyMemory(amd::SvmCopyMemoryCommand& vcmd)
    profilingBegin(vcmd);

    cl_command_type type = vcmd.type();
-    amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
-    amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
-    if (NULL == srcMem || NULL == dstMem) {
-        vcmd.setStatus(CL_INVALID_OPERATION);
-        return;
+    //no op for FGS supported device
+    if (!dev().isFineGrainedSystem()) {
+
+        amd::Memory* srcMem = amd::SvmManager::FindSvmBuffer(vcmd.src());
+        amd::Memory* dstMem = amd::SvmManager::FindSvmBuffer(vcmd.dst());
+        if (NULL == srcMem || NULL == dstMem) {
+            vcmd.setStatus(CL_INVALID_OPERATION);
+            return;
+        }
+
+        amd::Coord3D srcOrigin(0, 0, 0);
+        amd::Coord3D dstOrigin(0, 0, 0);
+        amd::Coord3D size(vcmd.srcSize(), 1, 1);
+        amd::BufferRect srcRect;
+        amd::BufferRect dstRect;
+
+        srcOrigin.c[0] = static_cast<const_address>(vcmd.src()) - static_cast<address>(srcMem->getSvmPtr());
+        dstOrigin.c[0] = static_cast<const_address>(vcmd.dst()) - static_cast<address>(dstMem->getSvmPtr());
+
+        if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
+            vcmd.setStatus(CL_INVALID_OPERATION);
+            return;
+        }
+
+        bool entire = srcMem->isEntirelyCovered(srcOrigin, size) &&
+            dstMem->isEntirelyCovered(dstOrigin, size);
+
+        if (!copyMemory(type, *srcMem, *dstMem, entire,
+            srcOrigin, dstOrigin, size, srcRect, dstRect)) {
+            vcmd.setStatus(CL_INVALID_OPERATION);
+        }
    }
-
-    amd::Coord3D srcOrigin(0, 0, 0);
-    amd::Coord3D dstOrigin(0, 0, 0);
-    amd::Coord3D size(vcmd.srcSize(), 1, 1);
-    amd::BufferRect srcRect;
-    amd::BufferRect dstRect;
-
-    srcOrigin.c[0] = static_cast<const_address>(vcmd.src()) - static_cast<address>(srcMem->getSvmPtr());
-    dstOrigin.c[0] = static_cast<const_address>(vcmd.dst()) - static_cast<address>(dstMem->getSvmPtr());
-
-    if (!(srcMem->validateRegion(srcOrigin, size)) || !(dstMem->validateRegion(dstOrigin, size))) {
-        vcmd.setStatus(CL_INVALID_OPERATION);
-        return;
+    else {
+        //direct memcpy for FGS enabled system
+        amd::SvmBuffer::memFill(vcmd.dst(), vcmd.src(), vcmd.srcSize(), 1);
    }
-
-    bool entire  = srcMem->isEntirelyCovered(srcOrigin, size) &&
-                   dstMem->isEntirelyCovered(dstOrigin, size);
-
-    if (!copyMemory(type, *srcMem, *dstMem, entire,
-        srcOrigin, dstOrigin, size, srcRect, dstRect)) {
-        vcmd.setStatus(CL_INVALID_OPERATION);
-    }
-
    profilingEnd(vcmd);
 }

@@ -1353,25 +1360,28 @@ VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& vcmd)

    profilingBegin(vcmd, true);

-    // Make sure we have memory for the command execution
-    gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+    //no op for FGS supported device
+    if (!dev().isFineGrainedSystem()) {
+        // Make sure we have memory for the command execution
+        gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());

-    memory->saveMapInfo(vcmd.origin(), vcmd.size(),
-        vcmd.mapFlags(), vcmd.isEntireMemory());
+        memory->saveMapInfo(vcmd.origin(), vcmd.size(),
+            vcmd.mapFlags(), vcmd.isEntireMemory());

-    if (memory->mapMemory() != NULL) {
-        if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
-            amd::Coord3D dstOrigin(0, 0, 0);
-            assert(memory->cal()->buffer_ && "SVM memory can't be an image");
-            if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
-                vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
-                LogError("submitSVMMapMemory() - copy failed");
-                vcmd.setStatus(CL_MAP_FAILURE);
+        if (memory->mapMemory() != NULL) {
+            if (vcmd.mapFlags() & (CL_MAP_READ | CL_MAP_WRITE)) {
+                amd::Coord3D dstOrigin(0, 0, 0);
+                assert(memory->cal()->buffer_ && "SVM memory can't be an image");
+                if (!blitMgr().copyBuffer(*memory, *memory->mapMemory(),
+                    vcmd.origin(), dstOrigin, vcmd.size(), vcmd.isEntireMemory())) {
+                    LogError("submitSVMMapMemory() - copy failed");
+                    vcmd.setStatus(CL_MAP_FAILURE);
+                }
            }
        }
-    }
-    else {
-        LogError("Unhandled svm map!");
+        else {
+            LogError("Unhandled svm map!");
+        }
    }

    profilingEnd(vcmd);
@@ -1384,18 +1394,21 @@ VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& vcmd)
    amd::ScopedLock lock(execution());
    profilingBegin(vcmd, true);

-    gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+    //no op for FGS supported device
+    if (!dev().isFineGrainedSystem()) {

-    if (memory->mapMemory() != NULL) {
-        if (memory->isUnmapWrite()) {
-            amd::Coord3D srcOrigin(0, 0, 0);
-            // Target is a remote resource, so copy
-            assert(memory->cal()->buffer_ && "SVM memory can't be an image");
-            if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
-                memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
-                memory->writeMapInfo()->entire_)) {
-                LogError("submitSvmUnmapMemory() - copy failed");
-                vcmd.setStatus(CL_OUT_OF_RESOURCES);
+        gpu::Memory* memory = dev().getGpuMemory(vcmd.getSvmMem());
+        if (memory->mapMemory() != NULL) {
+            if (memory->isUnmapWrite()) {
+                amd::Coord3D srcOrigin(0, 0, 0);
+                // Target is a remote resource, so copy
+                assert(memory->cal()->buffer_ && "SVM memory can't be an image");
+                if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, srcOrigin,
+                    memory->writeMapInfo()->origin_, memory->writeMapInfo()->region_,
+                    memory->writeMapInfo()->entire_)) {
+                    LogError("submitSvmUnmapMemory() - copy failed");
+                    vcmd.setStatus(CL_OUT_OF_RESOURCES);
+                }
            }
        }
    }
@@ -1411,23 +1424,32 @@ VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd)

    profilingBegin(vcmd, true);

-    amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
-    assert(dstMemory&&"No svm Buffer to fill with!");
-    size_t offset = reinterpret_cast<uintptr_t>(vcmd.dst())
-                    - reinterpret_cast<uintptr_t>(dstMemory->getSvmPtr());
-    assert((offset >= 0)&&"wrong svm ptr to fill with!");
+    if (!dev().isFineGrainedSystem()) {
+        size_t patternSize = vcmd.patternSize();
+        size_t fillSize = patternSize * vcmd.times();
+        size_t offset = 0;
+        amd::Memory* dstMemory = amd::SvmManager::FindSvmBuffer(vcmd.dst());
+        assert(dstMemory&&"No svm Buffer to fill with!");
+        offset = reinterpret_cast<uintptr_t>(vcmd.dst())
+            - reinterpret_cast<uintptr_t>(dstMemory->getSvmPtr());
+        assert((offset >= 0) && "wrong svm ptr to fill with!");

-    gpu::Memory* memory = dev().getGpuMemory(dstMemory);
-    size_t fillSize = vcmd.patternSize() * vcmd.times();
+        gpu::Memory* memory = dev().getGpuMemory(dstMemory);

-    amd::Coord3D    origin(offset, 0, 0);
-    amd::Coord3D    size(fillSize, 1, 1);
-    assert((dstMemory->validateRegion(origin, size))&&"The incorrect fill size!");
+        amd::Coord3D    origin(offset, 0, 0);
+        amd::Coord3D    size(fillSize, 1, 1);
+        assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");

-    if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
-                    vcmd.patternSize(), origin, size)) {
-        vcmd.setStatus(CL_INVALID_OPERATION);
+        if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(),
+            vcmd.patternSize(), origin, size)) {
+            vcmd.setStatus(CL_INVALID_OPERATION);
+        }
    }
+    else {
+        // for FGS capable device, fill CPU memory directly
+        amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times());
+    }
+
    profilingEnd(vcmd);
 }