From 5b5b3b8cdcbe77c2d6fbf44d69d8ce123cec835d Mon Sep 17 00:00:00 2001
From: foreman
Date: Mon, 4 Apr 2016 11:25:36 -0400
Subject: [PATCH] P4 to Git Change 1254144 by gandryey@gera-rcf-lnx on
2016/04/04 11:14:17
SWDEV-79445 - OCL generic changes and code clean-up
- Move prepinned logic to the abstraciton layer
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#193 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#270 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#543 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#398 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#3 edit
[ROCm/clr commit: 8756fa14cbc6b533e562056e61446c4454006b3b]
---
projects/clr/rocclr/runtime/device/device.cpp | 83 ++++++++++++++++++-
projects/clr/rocclr/runtime/device/device.hpp | 16 +++-
.../rocclr/runtime/device/gpu/gpudevice.cpp | 82 +-----------------
.../rocclr/runtime/device/gpu/gpudevice.hpp | 30 -------
.../rocclr/runtime/device/gpu/gpuvirtual.cpp | 4 +-
.../rocclr/runtime/device/pal/paldevice.cpp | 82 +-----------------
.../rocclr/runtime/device/pal/paldevice.hpp | 30 -------
.../rocclr/runtime/device/pal/palvirtual.cpp | 4 +-
8 files changed, 109 insertions(+), 222 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp
index e085459449..449eb52202 100644
--- a/projects/clr/rocclr/runtime/device/device.cpp
+++ b/projects/clr/rocclr/runtime/device/device.cpp
@@ -225,7 +225,13 @@ Device::tearDown()
}
Device::Device(Device* parent)
- : settings_(NULL), online_(true), blitProgram_(NULL), hwDebugMgr_(NULL), parent_(parent)
+ : settings_(NULL)
+ , online_(true)
+ , blitProgram_(NULL)
+ , hwDebugMgr_(NULL)
+ , parent_(parent)
+ , vaCacheAccess_(nullptr)
+ , vaCacheMap_(nullptr)
{
memset(&info_, '\0', sizeof(info_));
if (parent_ != NULL) {
@@ -235,6 +241,11 @@ Device::Device(Device* parent)
Device::~Device()
{
+ CondLog((vaCacheMap_ != nullptr) &&
+ (vaCacheMap_->size() != 0), "Application didn't unmap all host memory!");
+ delete vaCacheMap_;
+ delete vaCacheAccess_;
+
// Destroy device settings
if (settings_ != NULL) {
delete settings_;
@@ -255,6 +266,20 @@ Device::~Device()
}
}
+bool
+Device::create()
+{
+ vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
+ if (NULL == vaCacheAccess_) {
+ return false;
+ }
+ vaCacheMap_ = new std::map();
+ if (NULL == vaCacheMap_) {
+ return false;
+ }
+ return true;
+}
+
bool
Device::isAncestor(const Device* sub) const
{
@@ -319,6 +344,62 @@ Device::registerDevice()
devices_->push_back(this);
}
+void
+Device::addVACache(device::Memory* memory) const
+{
+ // Make sure system memory has direct access
+ if (memory->isHostMemDirectAccess()) {
+ // VA cache access must be serialised
+ amd::ScopedLock lk(*vaCacheAccess_);
+ void* start = memory->owner()->getHostMem();
+ size_t offset;
+ device::Memory* doubleMap = findMemoryFromVA(start, &offset);
+
+ if (doubleMap == nullptr) {
+ // Insert the new entry
+ vaCacheMap_->insert(std::pair
+ (reinterpret_cast(start), memory));
+ }
+ else {
+ LogError("Unexpected double map() call from the app!");
+ }
+ }
+}
+
+void
+Device::removeVACache(const device::Memory* memory) const
+{
+ // Make sure system memory has direct access
+ if (memory->isHostMemDirectAccess() && memory->owner()) {
+ // VA cache access must be serialised
+ amd::ScopedLock lk(*vaCacheAccess_);
+ void* start = memory->owner()->getHostMem();
+ vaCacheMap_->erase(reinterpret_cast(start));
+ }
+}
+
+device::Memory*
+Device::findMemoryFromVA(const void* ptr, size_t* offset) const
+{
+ // VA cache access must be serialised
+ amd::ScopedLock lk(*vaCacheAccess_);
+
+ uintptr_t key = reinterpret_cast(ptr);
+ std::map::iterator it = vaCacheMap_->upper_bound(
+ reinterpret_cast(ptr));
+ if (it == vaCacheMap_->begin()) {
+ return nullptr;
+ }
+
+ --it;
+ device::Memory* mem = it->second;
+ if (key >= it->first && key < (it->first + mem->size())) {
+ // ptr is in the range
+ *offset = key - it->first;
+ return mem;
+ }
+ return nullptr;
+}
bool IsHsaRequested(cl_device_type requestedType) {
// Depending on HSA_RUNTIME and hint flags CL_HSA_XXXXX_AMD,
diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp
index a94ada3787..71e1bb0493 100644
--- a/projects/clr/rocclr/runtime/device/device.hpp
+++ b/projects/clr/rocclr/runtime/device/device.hpp
@@ -1519,6 +1519,9 @@ public:
Device(Device* parent = NULL);
virtual ~Device();
+ //! Initializes abstraction layer device object
+ bool create();
+
//! Increment the reference count
uint retain() {
// Only increment the reference count of sub-devices
@@ -1733,6 +1736,15 @@ public:
//! Remove the Hardware Debug Manager
virtual void hwDebugManagerRemove() {}
+ //! Adds GPU memory to the VA cache list
+ void addVACache(device::Memory* memory) const;
+
+ //! Removes GPU memory from the VA cache list
+ void removeVACache(const device::Memory* memory) const;
+
+ //! Finds GPU memory from virtual address
+ device::Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
+
protected:
//! Enable the specified extension
char* getExtensionString();
@@ -1757,7 +1769,9 @@ private:
typedef std::vector::iterator device_iterator;
static std::vector* devices_; //!< All known devices
- Device* parent_; //!< This device's parent
+ Device* parent_; //!< This device's parent
+ Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
+ std::map* vaCacheMap_; //!< VA cache map
};
struct KernelParameterDescriptor
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index 792b96a4e4..d15cea66be 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -735,8 +735,6 @@ Device::Device()
, mapCacheOps_(NULL)
, xferRead_(NULL)
, xferWrite_(NULL)
- , vaCacheAccess_(NULL)
- , vaCacheList_(NULL)
, mapCache_(NULL)
, resourceCache_(NULL)
, heapInitComplete_(false)
@@ -752,9 +750,6 @@ Device::~Device()
delete hwDebugMgr_;
hwDebugMgr_ = NULL;
- CondLog(vaCacheList_ == NULL ||
- (vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
-
delete srdManager_;
for (uint s = 0; s < scratch_.size(); ++s) {
@@ -795,8 +790,6 @@ Device::~Device()
delete vgpusAccess_;
delete scratchAlloc_;
delete mapCacheOps_;
- delete vaCacheAccess_;
- delete vaCacheList_;
if (context_ != NULL) {
context_->release();
@@ -811,6 +804,10 @@ extern const char* SchedulerSourceCode;
bool
Device::create(CALuint ordinal, CALuint numOfDevices)
{
+ if (!amd::Device::create()) {
+ return false;
+ }
+
appProfile_.init();
bool smallMemSystem = false;
@@ -875,15 +872,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
return false;
}
- vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
- if (NULL == vaCacheAccess_) {
- return false;
- }
- vaCacheList_ = new std::list();
- if (NULL == vaCacheList_) {
- return false;
- }
-
mapCache_ = new std::vector();
if (mapCache_ == NULL) {
return false;
@@ -1895,68 +1883,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
return true;
}
-void
-Device::addVACache(Memory* memory) const
-{
- // Make sure system memory has direct access
- if (memory->isHostMemDirectAccess()) {
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- void* start = memory->owner()->getHostMem();
- void* end = reinterpret_cast(start) + memory->owner()->getSize();
- size_t offset;
- Memory* doubleMap = findMemoryFromVA(start, &offset);
-
- if (doubleMap == NULL) {
- // Allocate a new entry
- VACacheEntry* entry = new VACacheEntry(start, end, memory);
- if (entry != NULL) {
- vaCacheList_->push_back(entry);
- }
- }
- else {
- LogError("Unexpected double map() call from the app!");
- }
- }
-}
-
-void
-Device::removeVACache(const Memory* memory) const
-{
- // Make sure system memory has direct access
- if (memory->isHostMemDirectAccess() && memory->owner()) {
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- void* start = memory->owner()->getHostMem();
- void* end = reinterpret_cast(start) + memory->owner()->getSize();
-
- // Find VA cache entry for the specified memory
- for (const auto& entry : *vaCacheList_) {
- if (entry->startAddress_ == start) {
- CondLog((entry->endAddress_ != end), "Incorrect VA range");
- delete entry;
- vaCacheList_->remove(entry);
- break;
- }
- }
- }
-}
-
-Memory*
-Device::findMemoryFromVA(const void* ptr, size_t* offset) const
-{
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- for (const auto& entry : *vaCacheList_) {
- if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
- *offset = static_cast(reinterpret_cast(ptr) -
- reinterpret_cast(entry->startAddress_));
- return entry->memory_;
- }
- }
- return NULL;
-}
-
amd::Memory*
Device::findMapTarget(size_t size) const
{
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
index 02957051a2..55328d48ae 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -310,25 +310,6 @@ public:
const Device& gpuDevice_; //!< GPU device object
};
- //! Virtual address cache entry
- struct VACacheEntry : public amd::HeapObject
- {
- void* startAddress_; //!< Start virtual address
- void* endAddress_; //!< End virtual address
- Memory* memory_; //!< GPU memory, associated with the range
-
- //! Constructor
- VACacheEntry(
- void* startAddress, //!< Start virtual address
- void* endAddress, //!< End virtual address
- Memory* memory //!< GPU memory object
- ): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
-
- private:
- //! Disable default constructor
- VACacheEntry();
- };
-
struct ScratchBuffer : public amd::HeapObject
{
uint regNum_; //!< The number of used scratch registers
@@ -502,15 +483,6 @@ public:
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
- //! Adds GPU memory to the VA cache list
- void addVACache(Memory* memory) const;
-
- //! Removes GPU memory from the VA cache list
- void removeVACache(const Memory* memory) const;
-
- //! Finds GPU memory from virtual address
- Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
-
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
@@ -618,8 +590,6 @@ private:
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
- amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
- std::list* vaCacheList_; //!< VA cache list
std::vector* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
Engines engines_; //!< Available engines on device
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 843c7adf1c..30c268429c 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -660,7 +660,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)
size_t offset = 0;
// Find if virtual address is a CL allocation
- gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
+ device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
profilingBegin(vcmd, true);
@@ -765,7 +765,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
gpu::Memory* memory = dev().getGpuMemory(&vcmd.destination());
size_t offset = 0;
// Find if virtual address is a CL allocation
- gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
+ device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
profilingBegin(vcmd, true);
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index f8b754fda3..2406795ae8 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -559,8 +559,6 @@ Device::Device()
, mapCacheOps_(nullptr)
, xferRead_(nullptr)
, xferWrite_(nullptr)
- , vaCacheAccess_(nullptr)
- , vaCacheList_(nullptr)
, mapCache_(nullptr)
, resourceCache_(nullptr)
, numComputeEngines_(0)
@@ -578,9 +576,6 @@ Device::~Device()
delete hwDebugMgr_;
hwDebugMgr_ = nullptr;
- CondLog(vaCacheList_ == nullptr ||
- (vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
-
delete srdManager_;
for (uint s = 0; s < scratch_.size(); ++s) {
@@ -618,8 +613,6 @@ Device::~Device()
delete vgpusAccess_;
delete scratchAlloc_;
delete mapCacheOps_;
- delete vaCacheAccess_;
- delete vaCacheList_;
if (context_ != nullptr) {
context_->release();
@@ -633,6 +626,10 @@ extern const char* SchedulerSourceCode;
bool
Device::create(Pal::IDevice* device)
{
+ if (!amd::Device::create()) {
+ return false;
+ }
+
appProfile_.init();
device_ = device;
Pal::Result result;
@@ -721,15 +718,6 @@ Device::create(Pal::IDevice* device)
return false;
}
- vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
- if (nullptr == vaCacheAccess_) {
- return false;
- }
- vaCacheList_ = new std::list();
- if (nullptr == vaCacheList_) {
- return false;
- }
-
mapCache_ = new std::vector();
if (mapCache_ == nullptr) {
return false;
@@ -1630,68 +1618,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
return true;
}
-void
-Device::addVACache(Memory* memory) const
-{
- // Make sure system memory has direct access
- if (memory->isHostMemDirectAccess()) {
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- void* start = memory->owner()->getHostMem();
- void* end = reinterpret_cast(start) + memory->owner()->getSize();
- size_t offset;
- Memory* doubleMap = findMemoryFromVA(start, &offset);
-
- if (doubleMap == nullptr) {
- // Allocate a new entry
- VACacheEntry* entry = new VACacheEntry(start, end, memory);
- if (entry != nullptr) {
- vaCacheList_->push_back(entry);
- }
- }
- else {
- LogError("Unexpected double map() call from the app!");
- }
- }
-}
-
-void
-Device::removeVACache(const Memory* memory) const
-{
- // Make sure system memory has direct access
- if (memory->isHostMemDirectAccess() && memory->owner()) {
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- void* start = memory->owner()->getHostMem();
- void* end = reinterpret_cast(start) + memory->owner()->getSize();
-
- // Find VA cache entry for the specified memory
- for (const auto& entry : *vaCacheList_) {
- if (entry->startAddress_ == start) {
- CondLog((entry->endAddress_ != end), "Incorrect VA range");
- delete entry;
- vaCacheList_->remove(entry);
- break;
- }
- }
- }
-}
-
-Memory*
-Device::findMemoryFromVA(const void* ptr, size_t* offset) const
-{
- // VA cache access must be serialised
- amd::ScopedLock lk(*vaCacheAccess_);
- for (const auto& entry : *vaCacheList_) {
- if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
- *offset = static_cast(reinterpret_cast(ptr) -
- reinterpret_cast(entry->startAddress_));
- return entry->memory_;
- }
- }
- return nullptr;
-}
-
amd::Memory*
Device::findMapTarget(size_t size) const
{
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
index 7439682df1..a1c32b595d 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
@@ -242,25 +242,6 @@ public:
const Device& gpuDevice_; //!< GPU device object
};
- //! Virtual address cache entry
- struct VACacheEntry : public amd::HeapObject
- {
- void* startAddress_; //!< Start virtual address
- void* endAddress_; //!< End virtual address
- Memory* memory_; //!< GPU memory, associated with the range
-
- //! Constructor
- VACacheEntry(
- void* startAddress, //!< Start virtual address
- void* endAddress, //!< End virtual address
- Memory* memory //!< GPU memory object
- ): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
-
- private:
- //! Disable default constructor
- VACacheEntry();
- };
-
struct ScratchBuffer : public amd::HeapObject
{
uint regNum_; //!< The number of used scratch registers
@@ -418,15 +399,6 @@ public:
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
- //! Adds GPU memory to the VA cache list
- void addVACache(Memory* memory) const;
-
- //! Removes GPU memory from the VA cache list
- void removeVACache(const Memory* memory) const;
-
- //! Finds GPU memory from virtual address
- Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
-
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
@@ -569,8 +541,6 @@ private:
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
- amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
- std::list* vaCacheList_; //!< VA cache list
std::vector* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
uint numComputeEngines_; //!< The number of available compute engines
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 1cc00ef49f..1b1c80c93b 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -922,7 +922,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)
size_t offset = 0;
// Find if virtual address is a CL allocation
- pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
+ device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
profilingBegin(vcmd, true);
@@ -1027,7 +1027,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
pal::Memory* memory = dev().getGpuMemory(&vcmd.destination());
size_t offset = 0;
// Find if virtual address is a CL allocation
- pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
+ device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
profilingBegin(vcmd, true);