From be77b7fac10a164e276abc817e471487749ede10 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 24 May 2018 18:49:01 -0400 Subject: [PATCH] P4 to Git Change 1559383 by gandryey@gera-w8 on 2018/05/24 18:38:36 SWDEV-79445 - OCL generic changes and code clean-up - Remove memory reallocation interfaces. It was used for AMDIL path with HW constant buffers support. It seems AMDIL with SI+ doesn't enable HW const buffers. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#303 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#589 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#166 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#324 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#132 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#52 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#105 edit --- rocclr/runtime/device/device.hpp | 10 ---- rocclr/runtime/device/gpu/gpudevice.cpp | 61 ------------------------ rocclr/runtime/device/gpu/gpudevice.hpp | 7 --- rocclr/runtime/device/gpu/gpukernel.cpp | 15 ------ rocclr/runtime/device/gpu/gpukernel.hpp | 8 ---- rocclr/runtime/device/gpu/gpumemory.cpp | 46 ------------------ rocclr/runtime/device/gpu/gpumemory.hpp | 3 -- rocclr/runtime/device/pal/paldevice.cpp | 7 --- rocclr/runtime/device/pal/paldevice.hpp | 7 --- rocclr/runtime/device/pal/palkernel.cpp | 5 -- rocclr/runtime/device/pal/palkernel.hpp | 3 -- rocclr/runtime/device/pal/palmemory.cpp | 46 ------------------ rocclr/runtime/device/pal/palmemory.hpp | 3 -- rocclr/runtime/device/rocm/rocdevice.hpp | 9 ---- rocclr/runtime/platform/memory.hpp | 5 -- 15 files changed, 235 deletions(-) diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index f6fc76532e..b3683bed81 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -842,13 +842,6 @@ class Kernel : public amd::HeapObject { //! Default destructor virtual ~Kernel(); - //! Validates memory argument - virtual bool validateMemory(uint idx, //!< Argument's index - amd::Memory* amdMem //!< memory object for validation - ) const { - return true; - } - //! Returns the kernel info structure const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; } @@ -1498,9 +1491,6 @@ class Device : public RuntimeObject { const device::Memory& parent //!< Parent device memory object for the view ) const = 0; - //! Reallocates device memory object - virtual bool reallocMemory(Memory& owner) const = 0; - //! Return true if initialized external API interop, otherwise false virtual bool bindExternalDevice( uint flags, //!< Enum val. for ext.API type: GL, D3D10, etc. diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp index 21779155e3..76b9aec392 100644 --- a/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/rocclr/runtime/device/gpu/gpudevice.cpp @@ -1633,67 +1633,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler) return true; } -//! \note reallocMemory() must be called only from outside of -//! VirtualGPU submit commands methods. -//! Otherwise a deadlock in lockVgpus() is possible - -bool Device::reallocMemory(amd::Memory& owner) const { - bool directAccess = false; - - // For now we have to serialize reallocation code - amd::ScopedLock lk(*lockAsyncOps_); - - // Read device memory after the lock, - // since realloc from another thread can replace the pointer - gpu::Memory* gpuMemory = getGpuMemory(&owner); - if (gpuMemory == NULL) { - return false; - } - - if (gpuMemory->pinOffset() == 0) { - return true; - } else if (NULL != owner.parent()) { - if (!reallocMemory(*owner.parent())) { - return false; - } - } - - if (owner.asBuffer()) { - gpuMemory = createBuffer(owner, directAccess); - } else if (owner.asImage()) { - return true; - } else { - LogError("Unknown memory type!"); - } - - if (gpuMemory != NULL) { - gpu::Memory* newMemory = gpuMemory; - gpu::Memory* oldMemory = getGpuMemory(&owner); - - // Transfer the object - if (oldMemory != NULL) { - if (!oldMemory->moveTo(*newMemory)) { - delete newMemory; - return false; - } - } - - // Attempt to pin system memory - if ((newMemory->memoryType() != Resource::Pinned) && - ((owner.getHostMem() != NULL) || - ((NULL != owner.parent()) && (owner.getHostMem() != NULL)))) { - bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size()) - ? owner.getHostMemRef()->size() - : owner.getSize()); - //! \note: Ignore the pinning result for now - } - - return true; - } - - return false; -} - device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const { size_t size = owner.getSize(); assert((owner.asImage() != NULL) && "View supports images only"); diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp index 7db18a5404..e24e60f863 100644 --- a/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/rocclr/runtime/device/gpu/gpudevice.hpp @@ -79,9 +79,6 @@ class NullDevice : public amd::Device { return NULL; } - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner) const { return true; } - //! Acquire external graphics API object in the host thread //! Needed for OpenGL objects on CPU device @@ -386,10 +383,6 @@ class Device : public NullDevice, public CALGSLDevice { device::Sampler** sampler //!< device sampler object ) const; - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation - ) const; - //! Allocates a view object from the device memory virtual device::Memory* createView( amd::Memory& owner, //!< Owner memory object diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp index 9ca9a11573..e49fe8b63b 100644 --- a/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/rocclr/runtime/device/gpu/gpukernel.cpp @@ -2642,13 +2642,6 @@ bool NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount) return true; } -bool Kernel::validateMemory(uint idx, amd::Memory* amdMem) const { - // Check if memory doesn't require reallocation - bool noRealloc = (!argument(idx)->memory_.realloc_ || amdMem->reallocedDeviceMemory(&dev())); - - return noRealloc; -} - inline static HSAIL_ARG_TYPE GetHSAILArgType(const aclArgData* argInfo) { switch (argInfo->type) { case ARG_TYPE_POINTER: @@ -3314,14 +3307,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { return true; } -bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const { - // Check if memory doesn't require reallocation - bool noRealloc = true; - // amdMem->reallocedDeviceMemory(&dev())); - - return noRealloc; -} - const Device& HSAILKernel::dev() const { return reinterpret_cast(dev_); } const HSAILProgram& HSAILKernel::prog() const { diff --git a/rocclr/runtime/device/gpu/gpukernel.hpp b/rocclr/runtime/device/gpu/gpukernel.hpp index f7ca398777..029ecb1946 100644 --- a/rocclr/runtime/device/gpu/gpukernel.hpp +++ b/rocclr/runtime/device/gpu/gpukernel.hpp @@ -534,11 +534,6 @@ class Kernel : public NullKernel { size_t binarySize = 0 //!< the machine code size ); - //! Validates memory argument - virtual bool validateMemory(uint idx, //!< Argument's index - amd::Memory* amdMem //!< AMD memory object for validation - ) const; - //! Initializes the CAL program grid for the kernel execution void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object size_t workDim, //!< work dimension @@ -789,9 +784,6 @@ class HSAILKernel : public device::Kernel { //! finalizes the kernel if needed bool init(amd::hsa::loader::Symbol* sym, bool finalize = false); - //! Returns true if memory is valid for execution - virtual bool validateMemory(uint idx, amd::Memory* amdMem) const; - //! Returns a pointer to the hsail argument const Argument* argument(size_t i) const { return arguments_[i]; } diff --git a/rocclr/runtime/device/gpu/gpumemory.cpp b/rocclr/runtime/device/gpu/gpumemory.cpp index ba6e76b1de..7676a00a19 100644 --- a/rocclr/runtime/device/gpu/gpumemory.cpp +++ b/rocclr/runtime/device/gpu/gpumemory.cpp @@ -918,52 +918,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast(vDev)); } -//! \note moveTo() must be called only from outside of -//! VirtualGPU submit command methods. -//! Otherwise a deadlock in lockVgpus() is possible. -//! Also the logic in this function is very specific to -//! the zero-copy functionality. - -bool Memory::moveTo(Memory& dst) { - bool result = false; - - // Make sure that all virtual devices don't process any commands - Device::ScopedLockVgpus lock(dev()); - - // Wait for idle on all virtual GPUs - //!@note It's enough to wait on the active queue only - for (uint idx = 0; idx < dev().vgpus().size(); ++idx) { - wait(*(dev().vgpus()[idx])); - } - - static const bool Entire = true; - amd::Coord3D origin(0, 0, 0); - amd::Coord3D region(size()); - - // Transfer the data from old location to a new one - if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) { - // Move all properties to the new object - dst.mapMemory_ = mapMemory_; - mapMemory_ = NULL; - - dst.flags_ |= flags_ & ~HostMemoryDirectAccess; - flags_ &= HostMemoryDirectAccess; - - dst.indirectMapCount_ = indirectMapCount_; - indirectMapCount_ = 0; - - dst.pinnedMemory_ = pinnedMemory_; - pinnedMemory_ = NULL; - - // Replace the device memory object - //! @note: current object will be destroyed - owner()->replaceDeviceMemory(&dev(), &dst); - result = true; - } - - return result; -} - Memory* Memory::mapMemory() const { Memory* map = NULL; if (NULL != mapMemory_) { diff --git a/rocclr/runtime/device/gpu/gpumemory.hpp b/rocclr/runtime/device/gpu/gpumemory.hpp index e45a1e5709..095a1e3b55 100644 --- a/rocclr/runtime/device/gpu/gpumemory.hpp +++ b/rocclr/runtime/device/gpu/gpumemory.hpp @@ -142,9 +142,6 @@ class Memory : public device::Memory, public Resource { //! Allocates host memory for synchronization with MGPU context void mgpuCacheWriteBack(); - //! Transfers objects data to the destination object - bool moveTo(Memory& dst); - //! Accessors for indirect map memory object Memory* mapMemory() const; diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index 6947884915..e0663d7cdc 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -1541,13 +1541,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler) return true; } -//! \note reallocMemory() must be called only from outside of VirtualGPU submit -//! commands methods. Otherwise a deadlock in lockVgpus() is possible -bool Device::reallocMemory(amd::Memory& owner) const { - // Empty body, since HSAIL path doesn't require memory reallocations - return true; -} - device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const { assert((owner.asImage() != nullptr) && "View supports images only"); const amd::Image& image = *owner.asImage(); diff --git a/rocclr/runtime/device/pal/paldevice.hpp b/rocclr/runtime/device/pal/paldevice.hpp index a129bd413c..e5a441562f 100644 --- a/rocclr/runtime/device/pal/paldevice.hpp +++ b/rocclr/runtime/device/pal/paldevice.hpp @@ -78,9 +78,6 @@ class NullDevice : public amd::Device { return NULL; } - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner) const { return true; } - //! Acquire external graphics API object in the host thread //! Needed for OpenGL objects on CPU device @@ -334,10 +331,6 @@ class Device : public NullDevice { device::Sampler** sampler //!< device sampler object ) const; - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation - ) const; - //! Allocates a view object from the device memory virtual device::Memory* createView( amd::Memory& owner, //!< Owner memory object diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp index 612d6d52e5..6c4b779ce2 100644 --- a/rocclr/runtime/device/pal/palkernel.cpp +++ b/rocclr/runtime/device/pal/palkernel.cpp @@ -748,11 +748,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { return true; } -bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const { - // HSAIL path doesn't require memory reallocations - return true; -} - const Device& HSAILKernel::dev() const { return reinterpret_cast(dev_); } const HSAILProgram& HSAILKernel::prog() const { diff --git a/rocclr/runtime/device/pal/palkernel.hpp b/rocclr/runtime/device/pal/palkernel.hpp index 680306277b..66e4132055 100644 --- a/rocclr/runtime/device/pal/palkernel.hpp +++ b/rocclr/runtime/device/pal/palkernel.hpp @@ -128,9 +128,6 @@ class HSAILKernel : public device::Kernel { //! finalizes the kernel if needed bool init(amd::hsa::loader::Symbol* sym, bool finalize = false); - //! Returns true if memory is valid for execution - virtual bool validateMemory(uint idx, amd::Memory* amdMem) const; - //! Returns the kernel argument list const std::vector& arguments() const { return arguments_; } diff --git a/rocclr/runtime/device/pal/palmemory.cpp b/rocclr/runtime/device/pal/palmemory.cpp index 48dc958813..fcf2650b64 100644 --- a/rocclr/runtime/device/pal/palmemory.cpp +++ b/rocclr/runtime/device/pal/palmemory.cpp @@ -892,52 +892,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast(vDev)); } -//! \note moveTo() must be called only from outside of -//! VirtualGPU submit command methods. -//! Otherwise a deadlock in lockVgpus() is possible. -//! Also the logic in this function is very specific to -//! the zero-copy functionality. - -bool Memory::moveTo(Memory& dst) { - bool result = false; - - // Make sure that all virtual devices don't process any commands - Device::ScopedLockVgpus lock(dev()); - - // Wait for idle on all virtual GPUs - //!@note It's enough to wait on the active queue only - for (uint idx = 0; idx < dev().vgpus().size(); ++idx) { - wait(*(dev().vgpus()[idx])); - } - - static const bool Entire = true; - amd::Coord3D origin(0, 0, 0); - amd::Coord3D region(size()); - - // Transfer the data from old location to a new one - if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) { - // Move all properties to the new object - dst.mapMemory_ = mapMemory_; - mapMemory_ = nullptr; - - dst.flags_ |= flags_ & ~HostMemoryDirectAccess; - flags_ &= HostMemoryDirectAccess; - - dst.indirectMapCount_ = indirectMapCount_; - indirectMapCount_ = 0; - - dst.pinnedMemory_ = pinnedMemory_; - pinnedMemory_ = nullptr; - - // Replace the device memory object - //! @note: current object will be destroyed - owner()->replaceDeviceMemory(&dev(), &dst); - result = true; - } - - return result; -} - Memory* Memory::mapMemory() const { Memory* map = nullptr; if (nullptr != mapMemory_) { diff --git a/rocclr/runtime/device/pal/palmemory.hpp b/rocclr/runtime/device/pal/palmemory.hpp index 32ab00b919..3f47b22e12 100644 --- a/rocclr/runtime/device/pal/palmemory.hpp +++ b/rocclr/runtime/device/pal/palmemory.hpp @@ -122,9 +122,6 @@ class Memory : public device::Memory, public Resource { //! Allocates host memory for synchronization with MGPU context void mgpuCacheWriteBack(); - //! Transfers objects data to the destination object - bool moveTo(Memory& dst); - //! Accessors for indirect map memory object Memory* mapMemory() const; diff --git a/rocclr/runtime/device/rocm/rocdevice.hpp b/rocclr/runtime/device/rocm/rocdevice.hpp index 7efb53424d..3d934fcb88 100644 --- a/rocclr/runtime/device/rocm/rocdevice.hpp +++ b/rocclr/runtime/device/rocm/rocdevice.hpp @@ -147,12 +147,6 @@ class NullDevice : public amd::Device { return; } - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner) const { - ShouldNotReachHere(); - return false; - } - //! Acquire external graphics API object in the host thread //! Needed for OpenGL objects on CPU device @@ -308,9 +302,6 @@ class Device : public NullDevice { return nullptr; } - //! Reallocates the provided buffer object - virtual bool reallocMemory(amd::Memory& owner) const { return true; } - //! Acquire external graphics API object in the host thread //! Needed for OpenGL objects on CPU device virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext, diff --git a/rocclr/runtime/platform/memory.hpp b/rocclr/runtime/platform/memory.hpp index ec31e4eebe..7648cd6e38 100644 --- a/rocclr/runtime/platform/memory.hpp +++ b/rocclr/runtime/platform/memory.hpp @@ -252,11 +252,6 @@ class Memory : public amd::RuntimeObject { bool forceCopy = false //!< Force system memory allocation ); - //! Checks if memory was reallocated - bool reallocedDeviceMemory(const Device* dev) { - return (AllocRealloced == deviceAlloced_[dev]) ? true : false; - } - // Accessors Memory* parent() const { return parent_; } bool isParent() const { return isParent_; }