P4 to Git Change 1559383 by gandryey@gera-w8 on 2018/05/24 18:38:36

SWDEV-79445 - OCL generic changes and code clean-up
	- Remove memory reallocation interfaces. It was used for AMDIL path with HW constant buffers support. It seems AMDIL with SI+ doesn't enable HW const buffers.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#303 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#589 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#166 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#324 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#129 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#132 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#52 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#105 edit
This commit is contained in:
foreman
2018-05-24 18:49:01 -04:00
rodzic 73dd22dba8
commit be77b7fac1
15 zmienionych plików z 0 dodań i 235 usunięć
-10
Wyświetl plik
@@ -842,13 +842,6 @@ class Kernel : public amd::HeapObject {
//! Default destructor
virtual ~Kernel();
//! Validates memory argument
virtual bool validateMemory(uint idx, //!< Argument's index
amd::Memory* amdMem //!< memory object for validation
) const {
return true;
}
//! Returns the kernel info structure
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
@@ -1498,9 +1491,6 @@ class Device : public RuntimeObject {
const device::Memory& parent //!< Parent device memory object for the view
) const = 0;
//! Reallocates device memory object
virtual bool reallocMemory(Memory& owner) const = 0;
//! Return true if initialized external API interop, otherwise false
virtual bool bindExternalDevice(
uint flags, //!< Enum val. for ext.API type: GL, D3D10, etc.
@@ -1633,67 +1633,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
//! \note reallocMemory() must be called only from outside of
//! VirtualGPU submit commands methods.
//! Otherwise a deadlock in lockVgpus() is possible
bool Device::reallocMemory(amd::Memory& owner) const {
bool directAccess = false;
// For now we have to serialize reallocation code
amd::ScopedLock lk(*lockAsyncOps_);
// Read device memory after the lock,
// since realloc from another thread can replace the pointer
gpu::Memory* gpuMemory = getGpuMemory(&owner);
if (gpuMemory == NULL) {
return false;
}
if (gpuMemory->pinOffset() == 0) {
return true;
} else if (NULL != owner.parent()) {
if (!reallocMemory(*owner.parent())) {
return false;
}
}
if (owner.asBuffer()) {
gpuMemory = createBuffer(owner, directAccess);
} else if (owner.asImage()) {
return true;
} else {
LogError("Unknown memory type!");
}
if (gpuMemory != NULL) {
gpu::Memory* newMemory = gpuMemory;
gpu::Memory* oldMemory = getGpuMemory(&owner);
// Transfer the object
if (oldMemory != NULL) {
if (!oldMemory->moveTo(*newMemory)) {
delete newMemory;
return false;
}
}
// Attempt to pin system memory
if ((newMemory->memoryType() != Resource::Pinned) &&
((owner.getHostMem() != NULL) ||
((NULL != owner.parent()) && (owner.getHostMem() != NULL)))) {
bool ok = newMemory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
? owner.getHostMemRef()->size()
: owner.getSize());
//! \note: Ignore the pinning result for now
}
return true;
}
return false;
}
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
size_t size = owner.getSize();
assert((owner.asImage() != NULL) && "View supports images only");
@@ -79,9 +79,6 @@ class NullDevice : public amd::Device {
return NULL;
}
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -386,10 +383,6 @@ class Device : public NullDevice, public CALGSLDevice {
device::Sampler** sampler //!< device sampler object
) const;
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
) const;
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
@@ -2642,13 +2642,6 @@ bool NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
return true;
}
bool Kernel::validateMemory(uint idx, amd::Memory* amdMem) const {
// Check if memory doesn't require reallocation
bool noRealloc = (!argument(idx)->memory_.realloc_ || amdMem->reallocedDeviceMemory(&dev()));
return noRealloc;
}
inline static HSAIL_ARG_TYPE GetHSAILArgType(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
@@ -3314,14 +3307,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
return true;
}
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
// Check if memory doesn't require reallocation
bool noRealloc = true;
// amdMem->reallocedDeviceMemory(&dev()));
return noRealloc;
}
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
const HSAILProgram& HSAILKernel::prog() const {
@@ -534,11 +534,6 @@ class Kernel : public NullKernel {
size_t binarySize = 0 //!< the machine code size
);
//! Validates memory argument
virtual bool validateMemory(uint idx, //!< Argument's index
amd::Memory* amdMem //!< AMD memory object for validation
) const;
//! Initializes the CAL program grid for the kernel execution
void setupProgramGrid(VirtualGPU& gpu, //!< virtual GPU device object
size_t workDim, //!< work dimension
@@ -789,9 +784,6 @@ class HSAILKernel : public device::Kernel {
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
//! Returns true if memory is valid for execution
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
//! Returns a pointer to the hsail argument
const Argument* argument(size_t i) const { return arguments_[i]; }
@@ -918,52 +918,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
//! \note moveTo() must be called only from outside of
//! VirtualGPU submit command methods.
//! Otherwise a deadlock in lockVgpus() is possible.
//! Also the logic in this function is very specific to
//! the zero-copy functionality.
bool Memory::moveTo(Memory& dst) {
bool result = false;
// Make sure that all virtual devices don't process any commands
Device::ScopedLockVgpus lock(dev());
// Wait for idle on all virtual GPUs
//!@note It's enough to wait on the active queue only
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
wait(*(dev().vgpus()[idx]));
}
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(size());
// Transfer the data from old location to a new one
if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
// Move all properties to the new object
dst.mapMemory_ = mapMemory_;
mapMemory_ = NULL;
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
flags_ &= HostMemoryDirectAccess;
dst.indirectMapCount_ = indirectMapCount_;
indirectMapCount_ = 0;
dst.pinnedMemory_ = pinnedMemory_;
pinnedMemory_ = NULL;
// Replace the device memory object
//! @note: current object will be destroyed
owner()->replaceDeviceMemory(&dev(), &dst);
result = true;
}
return result;
}
Memory* Memory::mapMemory() const {
Memory* map = NULL;
if (NULL != mapMemory_) {
@@ -142,9 +142,6 @@ class Memory : public device::Memory, public Resource {
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
//! Transfers objects data to the destination object
bool moveTo(Memory& dst);
//! Accessors for indirect map memory object
Memory* mapMemory() const;
@@ -1541,13 +1541,6 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
return true;
}
//! \note reallocMemory() must be called only from outside of VirtualGPU submit
//! commands methods. Otherwise a deadlock in lockVgpus() is possible
bool Device::reallocMemory(amd::Memory& owner) const {
// Empty body, since HSAIL path doesn't require memory reallocations
return true;
}
device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
assert((owner.asImage() != nullptr) && "View supports images only");
const amd::Image& image = *owner.asImage();
@@ -78,9 +78,6 @@ class NullDevice : public amd::Device {
return NULL;
}
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -334,10 +331,6 @@ class Device : public NullDevice {
device::Sampler** sampler //!< device sampler object
) const;
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner //!< Buffer for reallocation
) const;
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
@@ -748,11 +748,6 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
return true;
}
bool HSAILKernel::validateMemory(uint idx, amd::Memory* amdMem) const {
// HSAIL path doesn't require memory reallocations
return true;
}
const Device& HSAILKernel::dev() const { return reinterpret_cast<const Device&>(dev_); }
const HSAILProgram& HSAILKernel::prog() const {
@@ -128,9 +128,6 @@ class HSAILKernel : public device::Kernel {
//! finalizes the kernel if needed
bool init(amd::hsa::loader::Symbol* sym, bool finalize = false);
//! Returns true if memory is valid for execution
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
//! Returns the kernel argument list
const std::vector<Argument*>& arguments() const { return arguments_; }
@@ -892,52 +892,6 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
//! \note moveTo() must be called only from outside of
//! VirtualGPU submit command methods.
//! Otherwise a deadlock in lockVgpus() is possible.
//! Also the logic in this function is very specific to
//! the zero-copy functionality.
bool Memory::moveTo(Memory& dst) {
bool result = false;
// Make sure that all virtual devices don't process any commands
Device::ScopedLockVgpus lock(dev());
// Wait for idle on all virtual GPUs
//!@note It's enough to wait on the active queue only
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
wait(*(dev().vgpus()[idx]));
}
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(size());
// Transfer the data from old location to a new one
if (dev().xferMgr().copyBuffer(*this, dst, origin, origin, region, Entire)) {
// Move all properties to the new object
dst.mapMemory_ = mapMemory_;
mapMemory_ = nullptr;
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
flags_ &= HostMemoryDirectAccess;
dst.indirectMapCount_ = indirectMapCount_;
indirectMapCount_ = 0;
dst.pinnedMemory_ = pinnedMemory_;
pinnedMemory_ = nullptr;
// Replace the device memory object
//! @note: current object will be destroyed
owner()->replaceDeviceMemory(&dev(), &dst);
result = true;
}
return result;
}
Memory* Memory::mapMemory() const {
Memory* map = nullptr;
if (nullptr != mapMemory_) {
@@ -122,9 +122,6 @@ class Memory : public device::Memory, public Resource {
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
//! Transfers objects data to the destination object
bool moveTo(Memory& dst);
//! Accessors for indirect map memory object
Memory* mapMemory() const;
@@ -147,12 +147,6 @@ class NullDevice : public amd::Device {
return;
}
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner) const {
ShouldNotReachHere();
return false;
}
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
@@ -308,9 +302,6 @@ class Device : public NullDevice {
return nullptr;
}
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
-5
Wyświetl plik
@@ -252,11 +252,6 @@ class Memory : public amd::RuntimeObject {
bool forceCopy = false //!< Force system memory allocation
);
//! Checks if memory was reallocated
bool reallocedDeviceMemory(const Device* dev) {
return (AllocRealloced == deviceAlloced_[dev]) ? true : false;
}
// Accessors
Memory* parent() const { return parent_; }
bool isParent() const { return isParent_; }