P4 to Git Change 1254144 by gandryey@gera-rcf-lnx on 2016/04/04 11:14:17

SWDEV-79445 - OCL generic changes and code clean-up - Move prepinned logic to the abstraciton layer Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#193 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#270 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#543 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#158 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#398 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#61 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#62 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#3 edit [ROCm/clr commit: 8756fa14cb]
2016-04-04 11:25:36 -04:00
parent 841cb089da
commit 5b5b3b8cdc
8 changed files with 109 additions and 222 deletions
@@ -225,7 +225,13 @@ Device::tearDown()
 }

 Device::Device(Device* parent)
-  : settings_(NULL), online_(true), blitProgram_(NULL), hwDebugMgr_(NULL), parent_(parent)
+    : settings_(NULL)
+    , online_(true)
+    , blitProgram_(NULL)
+    , hwDebugMgr_(NULL)
+    , parent_(parent)
+    , vaCacheAccess_(nullptr)
+    , vaCacheMap_(nullptr)
 {
    memset(&info_, '\0', sizeof(info_));
    if (parent_ != NULL) {
@@ -235,6 +241,11 @@ Device::Device(Device* parent)

 Device::~Device()
 {
+    CondLog((vaCacheMap_ != nullptr) &&
+        (vaCacheMap_->size() != 0), "Application didn't unmap all host memory!");
+    delete vaCacheMap_;
+    delete vaCacheAccess_;
+
    // Destroy device settings
    if (settings_ != NULL) {
        delete settings_;
@@ -255,6 +266,20 @@ Device::~Device()
    }
 }

+bool
+Device::create()
+{
+    vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
+    if (NULL == vaCacheAccess_) {
+        return false;
+    }
+    vaCacheMap_ = new std::map<uintptr_t, device::Memory*>();
+    if (NULL == vaCacheMap_) {
+        return false;
+    }
+    return true;
+}
+
 bool
 Device::isAncestor(const Device* sub) const
 {
@@ -319,6 +344,62 @@ Device::registerDevice()
    devices_->push_back(this);
 }

+void
+Device::addVACache(device::Memory* memory) const
+{
+    // Make sure system memory has direct access
+    if (memory->isHostMemDirectAccess()) {
+        // VA cache access must be serialised
+        amd::ScopedLock lk(*vaCacheAccess_);
+        void*   start = memory->owner()->getHostMem();
+        size_t  offset;
+        device::Memory*   doubleMap = findMemoryFromVA(start, &offset);
+
+        if (doubleMap == nullptr) {
+            // Insert the new entry
+            vaCacheMap_->insert(std::pair<uintptr_t, device::Memory*>
+                (reinterpret_cast<uintptr_t>(start), memory));
+        }
+        else {
+            LogError("Unexpected double map() call from the app!");
+        }
+    }
+}
+
+void
+Device::removeVACache(const device::Memory* memory) const
+{
+    // Make sure system memory has direct access
+    if (memory->isHostMemDirectAccess() && memory->owner()) {
+        // VA cache access must be serialised
+        amd::ScopedLock lk(*vaCacheAccess_);
+        void*   start = memory->owner()->getHostMem();
+        vaCacheMap_->erase(reinterpret_cast<uintptr_t>(start));
+    }
+}
+
+device::Memory*
+Device::findMemoryFromVA(const void* ptr, size_t* offset) const
+{
+    // VA cache access must be serialised
+    amd::ScopedLock lk(*vaCacheAccess_);
+
+    uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
+    std::map<uintptr_t, device::Memory*>::iterator it = vaCacheMap_->upper_bound(
+        reinterpret_cast<uintptr_t>(ptr));
+    if (it == vaCacheMap_->begin()) {
+        return nullptr;
+    }
+
+    --it;
+    device::Memory* mem = it->second;
+    if (key >= it->first && key < (it->first + mem->size())) {
+        // ptr is in the range
+        *offset = key - it->first;
+        return mem;
+    }
+    return nullptr;
+}

 bool IsHsaRequested(cl_device_type requestedType) {
 // Depending on HSA_RUNTIME and hint flags CL_HSA_XXXXX_AMD,
@@ -1519,6 +1519,9 @@ public:
    Device(Device* parent = NULL);
    virtual ~Device();

+    //! Initializes abstraction layer device object
+    bool create();
+
    //! Increment the reference count
    uint retain() {
        // Only increment the reference count of sub-devices
@@ -1733,6 +1736,15 @@ public:
    //! Remove the Hardware Debug Manager
    virtual void hwDebugManagerRemove() {}

+    //! Adds GPU memory to the VA cache list
+    void addVACache(device::Memory* memory) const;
+
+    //! Removes GPU memory from the VA cache list
+    void removeVACache(const device::Memory* memory) const;
+
+    //! Finds GPU memory from virtual address
+    device::Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
+
 protected:
    //! Enable the specified extension
    char* getExtensionString();
@@ -1757,7 +1769,9 @@ private:
    typedef std::vector<Device*>::iterator device_iterator;
    static std::vector<Device*>* devices_; //!< All known devices

-    Device*         parent_;   //!< This device's parent
+    Device*     parent_;        //!< This device's parent
+    Monitor*    vaCacheAccess_; //!< Lock to serialize VA caching access
+    std::map<uintptr_t, device::Memory*>* vaCacheMap_;  //!< VA cache map
 };

 struct KernelParameterDescriptor
@@ -735,8 +735,6 @@ Device::Device()
    , mapCacheOps_(NULL)
    , xferRead_(NULL)
    , xferWrite_(NULL)
-    , vaCacheAccess_(NULL)
-    , vaCacheList_(NULL)
    , mapCache_(NULL)
    , resourceCache_(NULL)
    , heapInitComplete_(false)
@@ -752,9 +750,6 @@ Device::~Device()
    delete hwDebugMgr_;
    hwDebugMgr_ = NULL;

-    CondLog(vaCacheList_ == NULL ||
-        (vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
-
    delete srdManager_;

    for (uint s = 0; s < scratch_.size(); ++s) {
@@ -795,8 +790,6 @@ Device::~Device()
    delete vgpusAccess_;
    delete scratchAlloc_;
    delete mapCacheOps_;
-    delete vaCacheAccess_;
-    delete vaCacheList_;

    if (context_ != NULL) {
        context_->release();
@@ -811,6 +804,10 @@ extern const char* SchedulerSourceCode;
 bool
 Device::create(CALuint ordinal, CALuint numOfDevices)
 {
+    if (!amd::Device::create()) {
+        return false;
+    }
+
    appProfile_.init();

    bool smallMemSystem = false;
@@ -875,15 +872,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
        return false;
    }

-    vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
-    if (NULL == vaCacheAccess_) {
-        return false;
-    }
-    vaCacheList_ = new std::list<VACacheEntry*>();
-    if (NULL == vaCacheList_) {
-        return false;
-    }
-
    mapCache_ = new std::vector<amd::Memory*>();
    if (mapCache_ == NULL) {
        return false;
@@ -1895,68 +1883,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
    return true;
 }

-void
-Device::addVACache(Memory* memory) const
-{
-    // Make sure system memory has direct access
-    if (memory->isHostMemDirectAccess()) {
-        // VA cache access must be serialised
-        amd::ScopedLock lk(*vaCacheAccess_);
-        void*   start = memory->owner()->getHostMem();
-        void*   end = reinterpret_cast<address>(start) + memory->owner()->getSize();
-        size_t  offset;
-        Memory*   doubleMap = findMemoryFromVA(start, &offset);
-
-        if (doubleMap == NULL) {
-            // Allocate a new entry
-            VACacheEntry*   entry = new VACacheEntry(start, end, memory);
-            if (entry != NULL) {
-                vaCacheList_->push_back(entry);
-            }
-        }
-        else {
-            LogError("Unexpected double map() call from the app!");
-        }
-    }
-}
-
-void
-Device::removeVACache(const Memory* memory) const
-{
-    // Make sure system memory has direct access
-    if (memory->isHostMemDirectAccess() && memory->owner()) {
-        // VA cache access must be serialised
-        amd::ScopedLock lk(*vaCacheAccess_);
-        void*   start = memory->owner()->getHostMem();
-        void*   end = reinterpret_cast<address>(start) + memory->owner()->getSize();
-
-        // Find VA cache entry for the specified memory
-        for (const auto& entry : *vaCacheList_) {
-            if (entry->startAddress_ == start) {
-                CondLog((entry->endAddress_ != end), "Incorrect VA range");
-                delete entry;
-                vaCacheList_->remove(entry);
-                break;
-            }
-        }
-    }
-}
-
-Memory*
-Device::findMemoryFromVA(const void* ptr, size_t* offset) const
-{
-    // VA cache access must be serialised
-    amd::ScopedLock lk(*vaCacheAccess_);
-    for (const auto& entry : *vaCacheList_) {
-        if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
-            *offset = static_cast<size_t>(reinterpret_cast<const char*>(ptr) -
-                reinterpret_cast<char*>(entry->startAddress_));
-            return entry->memory_;
-        }
-    }
-    return NULL;
-}
-
 amd::Memory*
 Device::findMapTarget(size_t size) const
 {
@@ -310,25 +310,6 @@ public:
        const Device&           gpuDevice_;     //!< GPU device object
    };

-    //! Virtual address cache entry
-    struct VACacheEntry : public amd::HeapObject
-    {
-        void*   startAddress_;  //!< Start virtual address
-        void*   endAddress_;    //!< End virtual address
-        Memory* memory_;        //!< GPU memory, associated with the range
-
-        //! Constructor
-        VACacheEntry(
-            void*   startAddress,   //!< Start virtual address
-            void*   endAddress,     //!< End virtual address
-            Memory* memory          //!< GPU memory object
-            ): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
-
-    private:
-        //! Disable default constructor
-        VACacheEntry();
-    };
-
    struct ScratchBuffer : public amd::HeapObject
    {
        uint    regNum_;    //!< The number of used scratch registers
@@ -502,15 +483,6 @@ public:
    //! Returns transfer buffer object
    XferBuffers& xferRead() const { return *xferRead_; }

-    //! Adds GPU memory to the VA cache list
-    void addVACache(Memory* memory) const;
-
-    //! Removes GPU memory from the VA cache list
-    void removeVACache(const Memory* memory) const;
-
-    //! Finds GPU memory from virtual address
-    Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
-
    //! Finds an appropriate map target
    amd::Memory* findMapTarget(size_t size) const;

@@ -618,8 +590,6 @@ private:
    XferBuffers*    xferRead_;      //!< Transfer buffers read
    XferBuffers*    xferWrite_;     //!< Transfer buffers write

-    amd::Monitor*   vaCacheAccess_; //!< Lock to serialize VA caching access
-    std::list<VACacheEntry*>*   vaCacheList_; //!< VA cache list
    std::vector<amd::Memory*>*  mapCache_;  //!< Map cache info structure
    ResourceCache*  resourceCache_; //!< Resource cache
    Engines         engines_;       //!< Available engines on device
@@ -660,7 +660,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)

    size_t offset = 0;
    // Find if virtual address is a CL allocation
-    gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
+    device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);

    profilingBegin(vcmd, true);

@@ -765,7 +765,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
    gpu::Memory* memory = dev().getGpuMemory(&vcmd.destination());
    size_t offset = 0;
    // Find if virtual address is a CL allocation
-    gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
+    device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);

    profilingBegin(vcmd, true);

@@ -559,8 +559,6 @@ Device::Device()
    , mapCacheOps_(nullptr)
    , xferRead_(nullptr)
    , xferWrite_(nullptr)
-    , vaCacheAccess_(nullptr)
-    , vaCacheList_(nullptr)
    , mapCache_(nullptr)
    , resourceCache_(nullptr)
    , numComputeEngines_(0)
@@ -578,9 +576,6 @@ Device::~Device()
    delete hwDebugMgr_;
    hwDebugMgr_ = nullptr;

-    CondLog(vaCacheList_ == nullptr ||
-        (vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
-
    delete srdManager_;

    for (uint s = 0; s < scratch_.size(); ++s) {
@@ -618,8 +613,6 @@ Device::~Device()
    delete vgpusAccess_;
    delete scratchAlloc_;
    delete mapCacheOps_;
-    delete vaCacheAccess_;
-    delete vaCacheList_;

    if (context_ != nullptr) {
        context_->release();
@@ -633,6 +626,10 @@ extern const char* SchedulerSourceCode;
 bool
 Device::create(Pal::IDevice* device)
 {
+    if (!amd::Device::create()) {
+        return false;
+    }
+
    appProfile_.init();
    device_ = device;
    Pal::Result result;
@@ -721,15 +718,6 @@ Device::create(Pal::IDevice* device)
        return false;
    }

-    vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
-    if (nullptr == vaCacheAccess_) {
-        return false;
-    }
-    vaCacheList_ = new std::list<VACacheEntry*>();
-    if (nullptr == vaCacheList_) {
-        return false;
-    }
-
    mapCache_ = new std::vector<amd::Memory*>();
    if (mapCache_ == nullptr) {
        return false;
@@ -1630,68 +1618,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
    return true;
 }

-void
-Device::addVACache(Memory* memory) const
-{
-    // Make sure system memory has direct access
-    if (memory->isHostMemDirectAccess()) {
-        // VA cache access must be serialised
-        amd::ScopedLock lk(*vaCacheAccess_);
-        void*   start = memory->owner()->getHostMem();
-        void*   end = reinterpret_cast<address>(start) + memory->owner()->getSize();
-        size_t  offset;
-        Memory*   doubleMap = findMemoryFromVA(start, &offset);
-
-        if (doubleMap == nullptr) {
-            // Allocate a new entry
-            VACacheEntry*   entry = new VACacheEntry(start, end, memory);
-            if (entry != nullptr) {
-                vaCacheList_->push_back(entry);
-            }
-        }
-        else {
-            LogError("Unexpected double map() call from the app!");
-        }
-    }
-}
-
-void
-Device::removeVACache(const Memory* memory) const
-{
-    // Make sure system memory has direct access
-    if (memory->isHostMemDirectAccess() && memory->owner()) {
-        // VA cache access must be serialised
-        amd::ScopedLock lk(*vaCacheAccess_);
-        void*   start = memory->owner()->getHostMem();
-        void*   end = reinterpret_cast<address>(start) + memory->owner()->getSize();
-
-        // Find VA cache entry for the specified memory
-        for (const auto& entry : *vaCacheList_) {
-            if (entry->startAddress_ == start) {
-                CondLog((entry->endAddress_ != end), "Incorrect VA range");
-                delete entry;
-                vaCacheList_->remove(entry);
-                break;
-            }
-        }
-    }
-}
-
-Memory*
-Device::findMemoryFromVA(const void* ptr, size_t* offset) const
-{
-    // VA cache access must be serialised
-    amd::ScopedLock lk(*vaCacheAccess_);
-    for (const auto& entry : *vaCacheList_) {
-        if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
-            *offset = static_cast<size_t>(reinterpret_cast<const char*>(ptr) -
-                reinterpret_cast<char*>(entry->startAddress_));
-            return entry->memory_;
-        }
-    }
-    return nullptr;
-}
-
 amd::Memory*
 Device::findMapTarget(size_t size) const
 {
@@ -242,25 +242,6 @@ public:
        const Device&           gpuDevice_;     //!< GPU device object
    };

-    //! Virtual address cache entry
-    struct VACacheEntry : public amd::HeapObject
-    {
-        void*   startAddress_;  //!< Start virtual address
-        void*   endAddress_;    //!< End virtual address
-        Memory* memory_;        //!< GPU memory, associated with the range
-
-        //! Constructor
-        VACacheEntry(
-            void*   startAddress,   //!< Start virtual address
-            void*   endAddress,     //!< End virtual address
-            Memory* memory          //!< GPU memory object
-            ): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
-
-    private:
-        //! Disable default constructor
-        VACacheEntry();
-    };
-
    struct ScratchBuffer : public amd::HeapObject
    {
        uint    regNum_;    //!< The number of used scratch registers
@@ -418,15 +399,6 @@ public:
    //! Returns transfer buffer object
    XferBuffers& xferRead() const { return *xferRead_; }

-    //! Adds GPU memory to the VA cache list
-    void addVACache(Memory* memory) const;
-
-    //! Removes GPU memory from the VA cache list
-    void removeVACache(const Memory* memory) const;
-
-    //! Finds GPU memory from virtual address
-    Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
-
    //! Finds an appropriate map target
    amd::Memory* findMapTarget(size_t size) const;

@@ -569,8 +541,6 @@ private:
    amd::Monitor*   mapCacheOps_;   //!< Lock to serialise cache for the map resources
    XferBuffers*    xferRead_;      //!< Transfer buffers read
    XferBuffers*    xferWrite_;     //!< Transfer buffers write
-    amd::Monitor*   vaCacheAccess_; //!< Lock to serialize VA caching access
-    std::list<VACacheEntry*>*   vaCacheList_; //!< VA cache list
    std::vector<amd::Memory*>*  mapCache_;  //!< Map cache info structure
    ResourceCache*  resourceCache_; //!< Resource cache
    uint            numComputeEngines_; //!< The number of available compute engines
@@ -922,7 +922,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)

    size_t offset = 0;
    // Find if virtual address is a CL allocation
-    pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
+    device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);

    profilingBegin(vcmd, true);

@@ -1027,7 +1027,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
    pal::Memory* memory = dev().getGpuMemory(&vcmd.destination());
    size_t offset = 0;
    // Find if virtual address is a CL allocation
-    pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
+    device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);

    profilingBegin(vcmd, true);