From be77b7fac10a164e276abc817e471487749ede10 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 24 May 2018 18:49:01 -0400
Subject: [PATCH] P4 to Git Change 1559383 by gandryey@gera-w8 on 2018/05/24
18:38:36
SWDEV-79445 - OCL generic changes and code clean-up
- Remove memory reallocation interfaces. It was used for AMDIL path with HW constant buffers support. It seems AMDIL with SI+ doesn't enable HW const buffers.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#303 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#589 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#166 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#324 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#132 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#52 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#105 edit
---
rocclr/runtime/device/device.hpp | 10 ----
rocclr/runtime/device/gpu/gpudevice.cpp | 61 ------------------------
rocclr/runtime/device/gpu/gpudevice.hpp | 7 ---
rocclr/runtime/device/gpu/gpukernel.cpp | 15 ------
rocclr/runtime/device/gpu/gpukernel.hpp | 8 ----
rocclr/runtime/device/gpu/gpumemory.cpp | 46 ------------------
rocclr/runtime/device/gpu/gpumemory.hpp | 3 --
rocclr/runtime/device/pal/paldevice.cpp | 7 ---
rocclr/runtime/device/pal/paldevice.hpp | 7 ---
rocclr/runtime/device/pal/palkernel.cpp | 5 --
rocclr/runtime/device/pal/palkernel.hpp | 3 --
rocclr/runtime/device/pal/palmemory.cpp | 46 ------------------
rocclr/runtime/device/pal/palmemory.hpp | 3 --
rocclr/runtime/device/rocm/rocdevice.hpp | 9 ----
rocclr/runtime/platform/memory.hpp | 5 --
15 files changed, 235 deletions(-)
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index f6fc76532e..b3683bed81 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -842,13 +842,6 @@ class Kernel : public amd::HeapObject {
//! Default destructor
virtual ~Kernel();
- //! Validates memory argument
- virtual bool validateMemory(uint idx, //!< Argument's index
- amd::Memory* amdMem //!< memory object for validation
- ) const {
- return true;
- }
-
//! Returns the kernel info structure
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
@@ -1498,9 +1491,6 @@ class Device : public RuntimeObject {
const device::Memory& parent //!< Parent device memory object for the view
) const = 0;
- //! Reallocates device memory object
- virtual bool reallocMemory(Memory& owner) const = 0;
-
//! Return true if initialized external API interop, otherwise false
virtual bool bindExternalDevice(
uint flags, //!< Enum val. for ext.API type: GL, D3D10, etc.
diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp
index 21779155e3..76b9aec392 100644
--- a/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -1633,67 +1633,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
-//! \note reallocMemory() must be called only from outside of
-//! VirtualGPU submit commands methods.
-//! Otherwise a deadlock in lockVgpus() is possible
-
-bool Device::reallocMemory(amd::Memory& owner) const {
- bool directAccess = false;
-
- // For now we have to serialize reallocation code
- amd::ScopedLock lk(*lockAsyncOps_);
-
- // Read device memory after the lock,
- // since realloc from another thread can replace the pointer
- gpu::Memory* gpuMemory = getGpuMemory(&owner);
- if (gpuMemory == NULL) {
- return false;
- }
-
- if (gpuMemory->pinOffset() == 0) {
- return true;
- } else if (NULL != owner.parent()) {
- if (!reallocMemory(*owner.parent())) {
- return false;
- }
- }
-
- if (owner.asBuffer()) {
- gpuMemory = createBuffer(owner, directAccess);
- } else if (owner.asImage()) {
- return true;
- } else {
- LogError("Unknown memory type!");
- }
-
- if (gpuMemory != NULL) {
- gpu::Memory* newMemory = gpuMemory;
- gpu::Memory* oldMemory = getGpuMemory(&owner);
-
- // Transfer the object
- if (oldMemory != NULL) {
- if (!oldMemory->moveTo(*newMemory)) {
- delete newMemory;
- return false;
- }
- }
-
- // Attempt to pin system memory
- if ((newMemory->memoryType() != Resource::Pinned) &&
- ((owner.getHostMem() != NULL) ||
- ((NULL != owner.parent()) && (owner.getHostMem() != NULL)))) {
- bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
- ? owner.getHostMemRef()->size()
- : owner.getSize());
- //! \note: Ignore the pinning result for now
- }
-
- return true;
- }
-
- return false;
-}
-
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
size_t size = owner.getSize();
assert((owner.asImage() != NULL) && "View supports images only");
diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp
index 7db18a5404..e24e60f863 100644
--- a/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -79,9 +79,6 @@ class NullDevice : public amd::Device {
return NULL;
}
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner) const { return true; }
-
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -386,10 +383,6 @@ class Device : public NullDevice, public CALGSLDevice {
device::Sampler** sampler //!< device sampler object
) const;
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
- ) const;
-
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp
index 9ca9a11573..e49fe8b63b 100644
--- a/rocclr/runtime/device/gpu/gpukernel.cpp
+++ b/rocclr/runtime/device/gpu/gpukernel.cpp
@@ -2642,13 +2642,6 @@ bool NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
return true;
}
-bool Kernel::validateMemory(uint idx, amd::Memory* amdMem) const {
- // Check if memory doesn't require reallocation
- bool noRealloc = (!argument(idx)->memory_.realloc_ || amdMem->reallocedDeviceMemory(&dev()));
-
- return noRealloc;
-}
-
inline static HSAIL_ARG_TYPE GetHSAILArgType(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
@@ -3314,14 +3307,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
return true;
}
-bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
- // Check if memory doesn't require reallocation
- bool noRealloc = true;
- // amdMem->reallocedDeviceMemory(&dev()));
-
- return noRealloc;
-}
-
const Device& HSAILKernel::dev() const { return reinterpret_cast(dev_); }
const HSAILProgram& HSAILKernel::prog() const {
diff --git a/rocclr/runtime/device/gpu/gpukernel.hpp b/rocclr/runtime/device/gpu/gpukernel.hpp
index f7ca398777..029ecb1946 100644
--- a/rocclr/runtime/device/gpu/gpukernel.hpp
+++ b/rocclr/runtime/device/gpu/gpukernel.hpp
@@ -534,11 +534,6 @@ class Kernel : public NullKernel {
size_t binarySize = 0 //!< the machine code size
);
- //! Validates memory argument
- virtual bool validateMemory(uint idx, //!< Argument's index
- amd::Memory* amdMem //!< AMD memory object for validation
- ) const;
-
//! Initializes the CAL program grid for the kernel execution
void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object
size_t workDim, //!< work dimension
@@ -789,9 +784,6 @@ class HSAILKernel : public device::Kernel {
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
- //! Returns true if memory is valid for execution
- virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
-
//! Returns a pointer to the hsail argument
const Argument* argument(size_t i) const { return arguments_[i]; }
diff --git a/rocclr/runtime/device/gpu/gpumemory.cpp b/rocclr/runtime/device/gpu/gpumemory.cpp
index ba6e76b1de..7676a00a19 100644
--- a/rocclr/runtime/device/gpu/gpumemory.cpp
+++ b/rocclr/runtime/device/gpu/gpumemory.cpp
@@ -918,52 +918,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast(vDev)); }
-//! \note moveTo() must be called only from outside of
-//! VirtualGPU submit command methods.
-//! Otherwise a deadlock in lockVgpus() is possible.
-//! Also the logic in this function is very specific to
-//! the zero-copy functionality.
-
-bool Memory::moveTo(Memory& dst) {
- bool result = false;
-
- // Make sure that all virtual devices don't process any commands
- Device::ScopedLockVgpus lock(dev());
-
- // Wait for idle on all virtual GPUs
- //!@note It's enough to wait on the active queue only
- for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
- wait(*(dev().vgpus()[idx]));
- }
-
- static const bool Entire = true;
- amd::Coord3D origin(0, 0, 0);
- amd::Coord3D region(size());
-
- // Transfer the data from old location to a new one
- if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
- // Move all properties to the new object
- dst.mapMemory_ = mapMemory_;
- mapMemory_ = NULL;
-
- dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
- flags_ &= HostMemoryDirectAccess;
-
- dst.indirectMapCount_ = indirectMapCount_;
- indirectMapCount_ = 0;
-
- dst.pinnedMemory_ = pinnedMemory_;
- pinnedMemory_ = NULL;
-
- // Replace the device memory object
- //! @note: current object will be destroyed
- owner()->replaceDeviceMemory(&dev(), &dst);
- result = true;
- }
-
- return result;
-}
-
Memory* Memory::mapMemory() const {
Memory* map = NULL;
if (NULL != mapMemory_) {
diff --git a/rocclr/runtime/device/gpu/gpumemory.hpp b/rocclr/runtime/device/gpu/gpumemory.hpp
index e45a1e5709..095a1e3b55 100644
--- a/rocclr/runtime/device/gpu/gpumemory.hpp
+++ b/rocclr/runtime/device/gpu/gpumemory.hpp
@@ -142,9 +142,6 @@ class Memory : public device::Memory, public Resource {
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
- //! Transfers objects data to the destination object
- bool moveTo(Memory& dst);
-
//! Accessors for indirect map memory object
Memory* mapMemory() const;
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index 6947884915..e0663d7cdc 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -1541,13 +1541,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
-//! \note reallocMemory() must be called only from outside of VirtualGPU submit
-//! commands methods. Otherwise a deadlock in lockVgpus() is possible
-bool Device::reallocMemory(amd::Memory& owner) const {
- // Empty body, since HSAIL path doesn't require memory reallocations
- return true;
-}
-
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
assert((owner.asImage() != nullptr) && "View supports images only");
const amd::Image& image = *owner.asImage();
diff --git a/rocclr/runtime/device/pal/paldevice.hpp b/rocclr/runtime/device/pal/paldevice.hpp
index a129bd413c..e5a441562f 100644
--- a/rocclr/runtime/device/pal/paldevice.hpp
+++ b/rocclr/runtime/device/pal/paldevice.hpp
@@ -78,9 +78,6 @@ class NullDevice : public amd::Device {
return NULL;
}
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner) const { return true; }
-
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -334,10 +331,6 @@ class Device : public NullDevice {
device::Sampler** sampler //!< device sampler object
) const;
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
- ) const;
-
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp
index 612d6d52e5..6c4b779ce2 100644
--- a/rocclr/runtime/device/pal/palkernel.cpp
+++ b/rocclr/runtime/device/pal/palkernel.cpp
@@ -748,11 +748,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
return true;
}
-bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
- // HSAIL path doesn't require memory reallocations
- return true;
-}
-
const Device& HSAILKernel::dev() const { return reinterpret_cast(dev_); }
const HSAILProgram& HSAILKernel::prog() const {
diff --git a/rocclr/runtime/device/pal/palkernel.hpp b/rocclr/runtime/device/pal/palkernel.hpp
index 680306277b..66e4132055 100644
--- a/rocclr/runtime/device/pal/palkernel.hpp
+++ b/rocclr/runtime/device/pal/palkernel.hpp
@@ -128,9 +128,6 @@ class HSAILKernel : public device::Kernel {
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
- //! Returns true if memory is valid for execution
- virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
-
//! Returns the kernel argument list
const std::vector& arguments() const { return arguments_; }
diff --git a/rocclr/runtime/device/pal/palmemory.cpp b/rocclr/runtime/device/pal/palmemory.cpp
index 48dc958813..fcf2650b64 100644
--- a/rocclr/runtime/device/pal/palmemory.cpp
+++ b/rocclr/runtime/device/pal/palmemory.cpp
@@ -892,52 +892,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast(vDev)); }
-//! \note moveTo() must be called only from outside of
-//! VirtualGPU submit command methods.
-//! Otherwise a deadlock in lockVgpus() is possible.
-//! Also the logic in this function is very specific to
-//! the zero-copy functionality.
-
-bool Memory::moveTo(Memory& dst) {
- bool result = false;
-
- // Make sure that all virtual devices don't process any commands
- Device::ScopedLockVgpus lock(dev());
-
- // Wait for idle on all virtual GPUs
- //!@note It's enough to wait on the active queue only
- for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
- wait(*(dev().vgpus()[idx]));
- }
-
- static const bool Entire = true;
- amd::Coord3D origin(0, 0, 0);
- amd::Coord3D region(size());
-
- // Transfer the data from old location to a new one
- if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
- // Move all properties to the new object
- dst.mapMemory_ = mapMemory_;
- mapMemory_ = nullptr;
-
- dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
- flags_ &= HostMemoryDirectAccess;
-
- dst.indirectMapCount_ = indirectMapCount_;
- indirectMapCount_ = 0;
-
- dst.pinnedMemory_ = pinnedMemory_;
- pinnedMemory_ = nullptr;
-
- // Replace the device memory object
- //! @note: current object will be destroyed
- owner()->replaceDeviceMemory(&dev(), &dst);
- result = true;
- }
-
- return result;
-}
-
Memory* Memory::mapMemory() const {
Memory* map = nullptr;
if (nullptr != mapMemory_) {
diff --git a/rocclr/runtime/device/pal/palmemory.hpp b/rocclr/runtime/device/pal/palmemory.hpp
index 32ab00b919..3f47b22e12 100644
--- a/rocclr/runtime/device/pal/palmemory.hpp
+++ b/rocclr/runtime/device/pal/palmemory.hpp
@@ -122,9 +122,6 @@ class Memory : public device::Memory, public Resource {
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
- //! Transfers objects data to the destination object
- bool moveTo(Memory& dst);
-
//! Accessors for indirect map memory object
Memory* mapMemory() const;
diff --git a/rocclr/runtime/device/rocm/rocdevice.hpp b/rocclr/runtime/device/rocm/rocdevice.hpp
index 7efb53424d..3d934fcb88 100644
--- a/rocclr/runtime/device/rocm/rocdevice.hpp
+++ b/rocclr/runtime/device/rocm/rocdevice.hpp
@@ -147,12 +147,6 @@ class NullDevice : public amd::Device {
return;
}
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner) const {
- ShouldNotReachHere();
- return false;
- }
-
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -308,9 +302,6 @@ class Device : public NullDevice {
return nullptr;
}
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner) const { return true; }
-
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
diff --git a/rocclr/runtime/platform/memory.hpp b/rocclr/runtime/platform/memory.hpp
index ec31e4eebe..7648cd6e38 100644
--- a/rocclr/runtime/platform/memory.hpp
+++ b/rocclr/runtime/platform/memory.hpp
@@ -252,11 +252,6 @@ class Memory : public amd::RuntimeObject {
bool forceCopy = false //!< Force system memory allocation
);
- //! Checks if memory was reallocated
- bool reallocedDeviceMemory(const Device* dev) {
- return (AllocRealloced == deviceAlloced_[dev]) ? true : false;
- }
-
// Accessors
Memory* parent() const { return parent_; }
bool isParent() const { return isParent_; }