P4 to Git Change 1254144 by gandryey@gera-rcf-lnx on 2016/04/04 11:14:17

SWDEV-79445 - OCL generic changes and code clean-up
	- Move prepinned logic to the abstraciton layer

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#193 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#270 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#543 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#398 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsamemory.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#3 edit


[ROCm/clr commit: 8756fa14cb]
This commit is contained in:
foreman
2016-04-04 11:25:36 -04:00
parent 841cb089da
commit 5b5b3b8cdc
8 changed files with 109 additions and 222 deletions
+82 -1
View File
@@ -225,7 +225,13 @@ Device::tearDown()
}
Device::Device(Device* parent)
: settings_(NULL), online_(true), blitProgram_(NULL), hwDebugMgr_(NULL), parent_(parent)
: settings_(NULL)
, online_(true)
, blitProgram_(NULL)
, hwDebugMgr_(NULL)
, parent_(parent)
, vaCacheAccess_(nullptr)
, vaCacheMap_(nullptr)
{
memset(&info_, '\0', sizeof(info_));
if (parent_ != NULL) {
@@ -235,6 +241,11 @@ Device::Device(Device* parent)
Device::~Device()
{
CondLog((vaCacheMap_ != nullptr) &&
(vaCacheMap_->size() != 0), "Application didn't unmap all host memory!");
delete vaCacheMap_;
delete vaCacheAccess_;
// Destroy device settings
if (settings_ != NULL) {
delete settings_;
@@ -255,6 +266,20 @@ Device::~Device()
}
}
bool
Device::create()
{
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
if (NULL == vaCacheAccess_) {
return false;
}
vaCacheMap_ = new std::map<uintptr_t, device::Memory*>();
if (NULL == vaCacheMap_) {
return false;
}
return true;
}
bool
Device::isAncestor(const Device* sub) const
{
@@ -319,6 +344,62 @@ Device::registerDevice()
devices_->push_back(this);
}
void
Device::addVACache(device::Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
size_t offset;
device::Memory* doubleMap = findMemoryFromVA(start, &offset);
if (doubleMap == nullptr) {
// Insert the new entry
vaCacheMap_->insert(std::pair<uintptr_t, device::Memory*>
(reinterpret_cast<uintptr_t>(start), memory));
}
else {
LogError("Unexpected double map() call from the app!");
}
}
}
void
Device::removeVACache(const device::Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess() && memory->owner()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
vaCacheMap_->erase(reinterpret_cast<uintptr_t>(start));
}
}
device::Memory*
Device::findMemoryFromVA(const void* ptr, size_t* offset) const
{
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
std::map<uintptr_t, device::Memory*>::iterator it = vaCacheMap_->upper_bound(
reinterpret_cast<uintptr_t>(ptr));
if (it == vaCacheMap_->begin()) {
return nullptr;
}
--it;
device::Memory* mem = it->second;
if (key >= it->first && key < (it->first + mem->size())) {
// ptr is in the range
*offset = key - it->first;
return mem;
}
return nullptr;
}
bool IsHsaRequested(cl_device_type requestedType) {
// Depending on HSA_RUNTIME and hint flags CL_HSA_XXXXX_AMD,
+15 -1
View File
@@ -1519,6 +1519,9 @@ public:
Device(Device* parent = NULL);
virtual ~Device();
//! Initializes abstraction layer device object
bool create();
//! Increment the reference count
uint retain() {
// Only increment the reference count of sub-devices
@@ -1733,6 +1736,15 @@ public:
//! Remove the Hardware Debug Manager
virtual void hwDebugManagerRemove() {}
//! Adds GPU memory to the VA cache list
void addVACache(device::Memory* memory) const;
//! Removes GPU memory from the VA cache list
void removeVACache(const device::Memory* memory) const;
//! Finds GPU memory from virtual address
device::Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
protected:
//! Enable the specified extension
char* getExtensionString();
@@ -1757,7 +1769,9 @@ private:
typedef std::vector<Device*>::iterator device_iterator;
static std::vector<Device*>* devices_; //!< All known devices
Device* parent_; //!< This device's parent
Device* parent_; //!< This device's parent
Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
std::map<uintptr_t, device::Memory*>* vaCacheMap_; //!< VA cache map
};
struct KernelParameterDescriptor
@@ -735,8 +735,6 @@ Device::Device()
, mapCacheOps_(NULL)
, xferRead_(NULL)
, xferWrite_(NULL)
, vaCacheAccess_(NULL)
, vaCacheList_(NULL)
, mapCache_(NULL)
, resourceCache_(NULL)
, heapInitComplete_(false)
@@ -752,9 +750,6 @@ Device::~Device()
delete hwDebugMgr_;
hwDebugMgr_ = NULL;
CondLog(vaCacheList_ == NULL ||
(vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
delete srdManager_;
for (uint s = 0; s < scratch_.size(); ++s) {
@@ -795,8 +790,6 @@ Device::~Device()
delete vgpusAccess_;
delete scratchAlloc_;
delete mapCacheOps_;
delete vaCacheAccess_;
delete vaCacheList_;
if (context_ != NULL) {
context_->release();
@@ -811,6 +804,10 @@ extern const char* SchedulerSourceCode;
bool
Device::create(CALuint ordinal, CALuint numOfDevices)
{
if (!amd::Device::create()) {
return false;
}
appProfile_.init();
bool smallMemSystem = false;
@@ -875,15 +872,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
return false;
}
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
if (NULL == vaCacheAccess_) {
return false;
}
vaCacheList_ = new std::list<VACacheEntry*>();
if (NULL == vaCacheList_) {
return false;
}
mapCache_ = new std::vector<amd::Memory*>();
if (mapCache_ == NULL) {
return false;
@@ -1895,68 +1883,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
return true;
}
void
Device::addVACache(Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
void* end = reinterpret_cast<address>(start) + memory->owner()->getSize();
size_t offset;
Memory* doubleMap = findMemoryFromVA(start, &offset);
if (doubleMap == NULL) {
// Allocate a new entry
VACacheEntry* entry = new VACacheEntry(start, end, memory);
if (entry != NULL) {
vaCacheList_->push_back(entry);
}
}
else {
LogError("Unexpected double map() call from the app!");
}
}
}
void
Device::removeVACache(const Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess() && memory->owner()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
void* end = reinterpret_cast<address>(start) + memory->owner()->getSize();
// Find VA cache entry for the specified memory
for (const auto& entry : *vaCacheList_) {
if (entry->startAddress_ == start) {
CondLog((entry->endAddress_ != end), "Incorrect VA range");
delete entry;
vaCacheList_->remove(entry);
break;
}
}
}
}
Memory*
Device::findMemoryFromVA(const void* ptr, size_t* offset) const
{
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
for (const auto& entry : *vaCacheList_) {
if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
*offset = static_cast<size_t>(reinterpret_cast<const char*>(ptr) -
reinterpret_cast<char*>(entry->startAddress_));
return entry->memory_;
}
}
return NULL;
}
amd::Memory*
Device::findMapTarget(size_t size) const
{
@@ -310,25 +310,6 @@ public:
const Device& gpuDevice_; //!< GPU device object
};
//! Virtual address cache entry
struct VACacheEntry : public amd::HeapObject
{
void* startAddress_; //!< Start virtual address
void* endAddress_; //!< End virtual address
Memory* memory_; //!< GPU memory, associated with the range
//! Constructor
VACacheEntry(
void* startAddress, //!< Start virtual address
void* endAddress, //!< End virtual address
Memory* memory //!< GPU memory object
): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
private:
//! Disable default constructor
VACacheEntry();
};
struct ScratchBuffer : public amd::HeapObject
{
uint regNum_; //!< The number of used scratch registers
@@ -502,15 +483,6 @@ public:
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
//! Adds GPU memory to the VA cache list
void addVACache(Memory* memory) const;
//! Removes GPU memory from the VA cache list
void removeVACache(const Memory* memory) const;
//! Finds GPU memory from virtual address
Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
@@ -618,8 +590,6 @@ private:
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
std::list<VACacheEntry*>* vaCacheList_; //!< VA cache list
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
Engines engines_; //!< Available engines on device
@@ -660,7 +660,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)
size_t offset = 0;
// Find if virtual address is a CL allocation
gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
profilingBegin(vcmd, true);
@@ -765,7 +765,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
gpu::Memory* memory = dev().getGpuMemory(&vcmd.destination());
size_t offset = 0;
// Find if virtual address is a CL allocation
gpu::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
profilingBegin(vcmd, true);
@@ -559,8 +559,6 @@ Device::Device()
, mapCacheOps_(nullptr)
, xferRead_(nullptr)
, xferWrite_(nullptr)
, vaCacheAccess_(nullptr)
, vaCacheList_(nullptr)
, mapCache_(nullptr)
, resourceCache_(nullptr)
, numComputeEngines_(0)
@@ -578,9 +576,6 @@ Device::~Device()
delete hwDebugMgr_;
hwDebugMgr_ = nullptr;
CondLog(vaCacheList_ == nullptr ||
(vaCacheList_->size() != 0), "Application didn't unmap all host memory!");
delete srdManager_;
for (uint s = 0; s < scratch_.size(); ++s) {
@@ -618,8 +613,6 @@ Device::~Device()
delete vgpusAccess_;
delete scratchAlloc_;
delete mapCacheOps_;
delete vaCacheAccess_;
delete vaCacheList_;
if (context_ != nullptr) {
context_->release();
@@ -633,6 +626,10 @@ extern const char* SchedulerSourceCode;
bool
Device::create(Pal::IDevice* device)
{
if (!amd::Device::create()) {
return false;
}
appProfile_.init();
device_ = device;
Pal::Result result;
@@ -721,15 +718,6 @@ Device::create(Pal::IDevice* device)
return false;
}
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
if (nullptr == vaCacheAccess_) {
return false;
}
vaCacheList_ = new std::list<VACacheEntry*>();
if (nullptr == vaCacheList_) {
return false;
}
mapCache_ = new std::vector<amd::Memory*>();
if (mapCache_ == nullptr) {
return false;
@@ -1630,68 +1618,6 @@ Device::globalFreeMemory(size_t* freeMemory) const
return true;
}
void
Device::addVACache(Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
void* end = reinterpret_cast<address>(start) + memory->owner()->getSize();
size_t offset;
Memory* doubleMap = findMemoryFromVA(start, &offset);
if (doubleMap == nullptr) {
// Allocate a new entry
VACacheEntry* entry = new VACacheEntry(start, end, memory);
if (entry != nullptr) {
vaCacheList_->push_back(entry);
}
}
else {
LogError("Unexpected double map() call from the app!");
}
}
}
void
Device::removeVACache(const Memory* memory) const
{
// Make sure system memory has direct access
if (memory->isHostMemDirectAccess() && memory->owner()) {
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
void* start = memory->owner()->getHostMem();
void* end = reinterpret_cast<address>(start) + memory->owner()->getSize();
// Find VA cache entry for the specified memory
for (const auto& entry : *vaCacheList_) {
if (entry->startAddress_ == start) {
CondLog((entry->endAddress_ != end), "Incorrect VA range");
delete entry;
vaCacheList_->remove(entry);
break;
}
}
}
}
Memory*
Device::findMemoryFromVA(const void* ptr, size_t* offset) const
{
// VA cache access must be serialised
amd::ScopedLock lk(*vaCacheAccess_);
for (const auto& entry : *vaCacheList_) {
if ((entry->startAddress_ <= ptr) && (entry->endAddress_ > ptr)) {
*offset = static_cast<size_t>(reinterpret_cast<const char*>(ptr) -
reinterpret_cast<char*>(entry->startAddress_));
return entry->memory_;
}
}
return nullptr;
}
amd::Memory*
Device::findMapTarget(size_t size) const
{
@@ -242,25 +242,6 @@ public:
const Device& gpuDevice_; //!< GPU device object
};
//! Virtual address cache entry
struct VACacheEntry : public amd::HeapObject
{
void* startAddress_; //!< Start virtual address
void* endAddress_; //!< End virtual address
Memory* memory_; //!< GPU memory, associated with the range
//! Constructor
VACacheEntry(
void* startAddress, //!< Start virtual address
void* endAddress, //!< End virtual address
Memory* memory //!< GPU memory object
): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
private:
//! Disable default constructor
VACacheEntry();
};
struct ScratchBuffer : public amd::HeapObject
{
uint regNum_; //!< The number of used scratch registers
@@ -418,15 +399,6 @@ public:
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
//! Adds GPU memory to the VA cache list
void addVACache(Memory* memory) const;
//! Removes GPU memory from the VA cache list
void removeVACache(const Memory* memory) const;
//! Finds GPU memory from virtual address
Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
@@ -569,8 +541,6 @@ private:
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
std::list<VACacheEntry*>* vaCacheList_; //!< VA cache list
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
uint numComputeEngines_; //!< The number of available compute engines
@@ -922,7 +922,7 @@ VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd)
size_t offset = 0;
// Find if virtual address is a CL allocation
pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.destination(), &offset);
profilingBegin(vcmd, true);
@@ -1027,7 +1027,7 @@ VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd)
pal::Memory* memory = dev().getGpuMemory(&vcmd.destination());
size_t offset = 0;
// Find if virtual address is a CL allocation
pal::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
device::Memory* hostMemory = dev().findMemoryFromVA(vcmd.source(), &offset);
profilingBegin(vcmd, true);