P4 to Git Change 1559383 by gandryey@gera-w8 on 2018/05/24 18:38:36
SWDEV-79445 - OCL generic changes and code clean-up - Remove memory reallocation interfaces. It was used for AMDIL path with HW constant buffers support. It seems AMDIL with SI+ doesn't enable HW const buffers. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#303 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#589 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#166 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#324 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#129 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#132 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#52 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#105 edit
This commit is contained in:
@@ -842,13 +842,6 @@ class Kernel : public amd::HeapObject {
|
||||
//! Default destructor
|
||||
virtual ~Kernel();
|
||||
|
||||
//! Validates memory argument
|
||||
virtual bool validateMemory(uint idx, //!< Argument's index
|
||||
amd::Memory* amdMem //!< memory object for validation
|
||||
) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
//! Returns the kernel info structure
|
||||
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
|
||||
|
||||
@@ -1498,9 +1491,6 @@ class Device : public RuntimeObject {
|
||||
const device::Memory& parent //!< Parent device memory object for the view
|
||||
) const = 0;
|
||||
|
||||
//! Reallocates device memory object
|
||||
virtual bool reallocMemory(Memory& owner) const = 0;
|
||||
|
||||
//! Return true if initialized external API interop, otherwise false
|
||||
virtual bool bindExternalDevice(
|
||||
uint flags, //!< Enum val. for ext.API type: GL, D3D10, etc.
|
||||
|
||||
@@ -1633,67 +1633,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
|
||||
return true;
|
||||
}
|
||||
|
||||
//! \note reallocMemory() must be called only from outside of
|
||||
//! VirtualGPU submit commands methods.
|
||||
//! Otherwise a deadlock in lockVgpus() is possible
|
||||
|
||||
bool Device::reallocMemory(amd::Memory& owner) const {
|
||||
bool directAccess = false;
|
||||
|
||||
// For now we have to serialize reallocation code
|
||||
amd::ScopedLock lk(*lockAsyncOps_);
|
||||
|
||||
// Read device memory after the lock,
|
||||
// since realloc from another thread can replace the pointer
|
||||
gpu::Memory* gpuMemory = getGpuMemory(&owner);
|
||||
if (gpuMemory == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (gpuMemory->pinOffset() == 0) {
|
||||
return true;
|
||||
} else if (NULL != owner.parent()) {
|
||||
if (!reallocMemory(*owner.parent())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (owner.asBuffer()) {
|
||||
gpuMemory = createBuffer(owner, directAccess);
|
||||
} else if (owner.asImage()) {
|
||||
return true;
|
||||
} else {
|
||||
LogError("Unknown memory type!");
|
||||
}
|
||||
|
||||
if (gpuMemory != NULL) {
|
||||
gpu::Memory* newMemory = gpuMemory;
|
||||
gpu::Memory* oldMemory = getGpuMemory(&owner);
|
||||
|
||||
// Transfer the object
|
||||
if (oldMemory != NULL) {
|
||||
if (!oldMemory->moveTo(*newMemory)) {
|
||||
delete newMemory;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to pin system memory
|
||||
if ((newMemory->memoryType() != Resource::Pinned) &&
|
||||
((owner.getHostMem() != NULL) ||
|
||||
((NULL != owner.parent()) && (owner.getHostMem() != NULL)))) {
|
||||
bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
|
||||
? owner.getHostMemRef()->size()
|
||||
: owner.getSize());
|
||||
//! \note: Ignore the pinning result for now
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
|
||||
size_t size = owner.getSize();
|
||||
assert((owner.asImage() != NULL) && "View supports images only");
|
||||
|
||||
@@ -79,9 +79,6 @@ class NullDevice : public amd::Device {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
|
||||
@@ -386,10 +383,6 @@ class Device : public NullDevice, public CALGSLDevice {
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const;
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
|
||||
) const;
|
||||
|
||||
//! Allocates a view object from the device memory
|
||||
virtual device::Memory* createView(
|
||||
amd::Memory& owner, //!< Owner memory object
|
||||
|
||||
@@ -2642,13 +2642,6 @@ bool NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel::validateMemory(uint idx, amd::Memory* amdMem) const {
|
||||
// Check if memory doesn't require reallocation
|
||||
bool noRealloc = (!argument(idx)->memory_.realloc_ || amdMem->reallocedDeviceMemory(&dev()));
|
||||
|
||||
return noRealloc;
|
||||
}
|
||||
|
||||
inline static HSAIL_ARG_TYPE GetHSAILArgType(const aclArgData* argInfo) {
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
@@ -3314,14 +3307,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
|
||||
// Check if memory doesn't require reallocation
|
||||
bool noRealloc = true;
|
||||
// amdMem->reallocedDeviceMemory(&dev()));
|
||||
|
||||
return noRealloc;
|
||||
}
|
||||
|
||||
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
|
||||
|
||||
const HSAILProgram& HSAILKernel::prog() const {
|
||||
|
||||
@@ -534,11 +534,6 @@ class Kernel : public NullKernel {
|
||||
size_t binarySize = 0 //!< the machine code size
|
||||
);
|
||||
|
||||
//! Validates memory argument
|
||||
virtual bool validateMemory(uint idx, //!< Argument's index
|
||||
amd::Memory* amdMem //!< AMD memory object for validation
|
||||
) const;
|
||||
|
||||
//! Initializes the CAL program grid for the kernel execution
|
||||
void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object
|
||||
size_t workDim, //!< work dimension
|
||||
@@ -789,9 +784,6 @@ class HSAILKernel : public device::Kernel {
|
||||
//! finalizes the kernel if needed
|
||||
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
|
||||
|
||||
//! Returns true if memory is valid for execution
|
||||
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
|
||||
|
||||
//! Returns a pointer to the hsail argument
|
||||
const Argument* argument(size_t i) const { return arguments_[i]; }
|
||||
|
||||
|
||||
@@ -918,52 +918,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
|
||||
|
||||
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
|
||||
|
||||
//! \note moveTo() must be called only from outside of
|
||||
//! VirtualGPU submit command methods.
|
||||
//! Otherwise a deadlock in lockVgpus() is possible.
|
||||
//! Also the logic in this function is very specific to
|
||||
//! the zero-copy functionality.
|
||||
|
||||
bool Memory::moveTo(Memory& dst) {
|
||||
bool result = false;
|
||||
|
||||
// Make sure that all virtual devices don't process any commands
|
||||
Device::ScopedLockVgpus lock(dev());
|
||||
|
||||
// Wait for idle on all virtual GPUs
|
||||
//!@note It's enough to wait on the active queue only
|
||||
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
||||
wait(*(dev().vgpus()[idx]));
|
||||
}
|
||||
|
||||
static const bool Entire = true;
|
||||
amd::Coord3D origin(0, 0, 0);
|
||||
amd::Coord3D region(size());
|
||||
|
||||
// Transfer the data from old location to a new one
|
||||
if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
|
||||
// Move all properties to the new object
|
||||
dst.mapMemory_ = mapMemory_;
|
||||
mapMemory_ = NULL;
|
||||
|
||||
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
|
||||
flags_ &= HostMemoryDirectAccess;
|
||||
|
||||
dst.indirectMapCount_ = indirectMapCount_;
|
||||
indirectMapCount_ = 0;
|
||||
|
||||
dst.pinnedMemory_ = pinnedMemory_;
|
||||
pinnedMemory_ = NULL;
|
||||
|
||||
// Replace the device memory object
|
||||
//! @note: current object will be destroyed
|
||||
owner()->replaceDeviceMemory(&dev(), &dst);
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Memory* Memory::mapMemory() const {
|
||||
Memory* map = NULL;
|
||||
if (NULL != mapMemory_) {
|
||||
|
||||
@@ -142,9 +142,6 @@ class Memory : public device::Memory, public Resource {
|
||||
//! Allocates host memory for synchronization with MGPU context
|
||||
void mgpuCacheWriteBack();
|
||||
|
||||
//! Transfers objects data to the destination object
|
||||
bool moveTo(Memory& dst);
|
||||
|
||||
//! Accessors for indirect map memory object
|
||||
Memory* mapMemory() const;
|
||||
|
||||
|
||||
@@ -1541,13 +1541,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
|
||||
return true;
|
||||
}
|
||||
|
||||
//! \note reallocMemory() must be called only from outside of VirtualGPU submit
|
||||
//! commands methods. Otherwise a deadlock in lockVgpus() is possible
|
||||
bool Device::reallocMemory(amd::Memory& owner) const {
|
||||
// Empty body, since HSAIL path doesn't require memory reallocations
|
||||
return true;
|
||||
}
|
||||
|
||||
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
|
||||
assert((owner.asImage() != nullptr) && "View supports images only");
|
||||
const amd::Image& image = *owner.asImage();
|
||||
|
||||
@@ -78,9 +78,6 @@ class NullDevice : public amd::Device {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
|
||||
@@ -334,10 +331,6 @@ class Device : public NullDevice {
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const;
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
|
||||
) const;
|
||||
|
||||
//! Allocates a view object from the device memory
|
||||
virtual device::Memory* createView(
|
||||
amd::Memory& owner, //!< Owner memory object
|
||||
|
||||
@@ -748,11 +748,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
|
||||
// HSAIL path doesn't require memory reallocations
|
||||
return true;
|
||||
}
|
||||
|
||||
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
|
||||
|
||||
const HSAILProgram& HSAILKernel::prog() const {
|
||||
|
||||
@@ -128,9 +128,6 @@ class HSAILKernel : public device::Kernel {
|
||||
//! finalizes the kernel if needed
|
||||
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
|
||||
|
||||
//! Returns true if memory is valid for execution
|
||||
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
|
||||
|
||||
//! Returns the kernel argument list
|
||||
const std::vector<Argument*>& arguments() const { return arguments_; }
|
||||
|
||||
|
||||
@@ -892,52 +892,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
|
||||
|
||||
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
|
||||
|
||||
//! \note moveTo() must be called only from outside of
|
||||
//! VirtualGPU submit command methods.
|
||||
//! Otherwise a deadlock in lockVgpus() is possible.
|
||||
//! Also the logic in this function is very specific to
|
||||
//! the zero-copy functionality.
|
||||
|
||||
bool Memory::moveTo(Memory& dst) {
|
||||
bool result = false;
|
||||
|
||||
// Make sure that all virtual devices don't process any commands
|
||||
Device::ScopedLockVgpus lock(dev());
|
||||
|
||||
// Wait for idle on all virtual GPUs
|
||||
//!@note It's enough to wait on the active queue only
|
||||
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
||||
wait(*(dev().vgpus()[idx]));
|
||||
}
|
||||
|
||||
static const bool Entire = true;
|
||||
amd::Coord3D origin(0, 0, 0);
|
||||
amd::Coord3D region(size());
|
||||
|
||||
// Transfer the data from old location to a new one
|
||||
if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
|
||||
// Move all properties to the new object
|
||||
dst.mapMemory_ = mapMemory_;
|
||||
mapMemory_ = nullptr;
|
||||
|
||||
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
|
||||
flags_ &= HostMemoryDirectAccess;
|
||||
|
||||
dst.indirectMapCount_ = indirectMapCount_;
|
||||
indirectMapCount_ = 0;
|
||||
|
||||
dst.pinnedMemory_ = pinnedMemory_;
|
||||
pinnedMemory_ = nullptr;
|
||||
|
||||
// Replace the device memory object
|
||||
//! @note: current object will be destroyed
|
||||
owner()->replaceDeviceMemory(&dev(), &dst);
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Memory* Memory::mapMemory() const {
|
||||
Memory* map = nullptr;
|
||||
if (nullptr != mapMemory_) {
|
||||
|
||||
@@ -122,9 +122,6 @@ class Memory : public device::Memory, public Resource {
|
||||
//! Allocates host memory for synchronization with MGPU context
|
||||
void mgpuCacheWriteBack();
|
||||
|
||||
//! Transfers objects data to the destination object
|
||||
bool moveTo(Memory& dst);
|
||||
|
||||
//! Accessors for indirect map memory object
|
||||
Memory* mapMemory() const;
|
||||
|
||||
|
||||
@@ -147,12 +147,6 @@ class NullDevice : public amd::Device {
|
||||
return;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
|
||||
@@ -308,9 +302,6 @@ class Device : public NullDevice {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
|
||||
|
||||
@@ -252,11 +252,6 @@ class Memory : public amd::RuntimeObject {
|
||||
bool forceCopy = false //!< Force system memory allocation
|
||||
);
|
||||
|
||||
//! Checks if memory was reallocated
|
||||
bool reallocedDeviceMemory(const Device* dev) {
|
||||
return (AllocRealloced == deviceAlloced_[dev]) ? true : false;
|
||||
}
|
||||
|
||||
// Accessors
|
||||
Memory* parent() const { return parent_; }
|
||||
bool isParent() const { return isParent_; }
|
||||
|
||||
Reference in New Issue
Block a user