Files
rocm-systems/rocclr/runtime/device/pal/palmemory.hpp
T
foreman 1ef3eda775 P4 to Git Change 1979444 by gandryey@gera-win10 on 2019/08/07 10:43:18
SWDEV-197488 - [Navi10][Navi14][19.30][CopySurfaceRegion] CopySurfaceRegion Failing Multiple Tests
	- Try to optimize the condition for image buffer workaround. The new logic will attempt to validate the custom pitch with the HW requirement and use the backing store only if it doesn't match

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#29 edit
2019-08-07 10:48:18 -04:00

257 строки
9.5 KiB
C++

//
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#pragma once
#include "top.hpp"
#include "thread/atomic.hpp"
#include "device/pal/palresource.hpp"
#include <map>
/*! \addtogroup GPU
* @{
*/
namespace device {
class Memory;
}
//! PAL Device Implementation
namespace pal {
class Device;
class Heap;
class Resource;
class Memory;
class VirtualGPU;
//! GPU memory object.
// Wrapper that can contain a heap block or an interop buffer/image.
class Memory : public device::Memory, public Resource {
public:
//! Constructor (with owner)
Memory(const Device& gpuDev, //!< GPU device object
amd::Memory& owner, //!< Abstraction layer memory object
size_t size //!< Memory size for allocation
);
//! Constructor (nonfat version for local scratch mem use without heap block)
Memory(const Device& gpuDev, //!< GPU device object
size_t size //!< Memory size for allocation
);
//! Constructor memory for images (without global heap allocation)
Memory(const Device& gpuDev, //!< GPU device object
amd::Memory& owner, //!< Abstraction layer memory object
size_t width, //!< Allocated memory width
size_t height, //!< Allocated memory height
size_t depth, //!< Allocated memory depth
cl_image_format format, //!< Memory format
cl_mem_object_type imageType, //!< CL image type
uint mipLevels //!< The number of mip levels
);
//! Constructor memory for images (without global heap allocation)
Memory(const Device& gpuDev, //!< GPU device object
size_t size, //!< Memory object size
size_t width, //!< Allocated memory width
size_t height, //!< Allocated memory height
size_t depth, //!< Allocated memory depth
cl_image_format format, //!< Memory format
cl_mem_object_type imageType, //!< CL image type
uint mipLevels //!< The number of mip levels
);
//! Default destructor
virtual ~Memory();
//! Creates the interop memory
bool createInterop();
//! Overloads the resource create method
virtual bool create(Resource::MemoryType memType, //!< Memory type
Resource::CreateParams* params = nullptr, //!< Prameters for create
bool forceLinear = false //!< Forces linear tiling for images
);
//! Allocate memory for API-level maps
virtual void* allocMapTarget(const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
//! Pins system memory associated with this memory object
virtual bool pinSystemMemory(void* hostPtr, //!< System memory address
size_t size //!< Size of allocated system memory
);
//! Releases indirect map surface
virtual void releaseIndirectMap() { decIndMapCount(); }
//! Map the device memory to CPU visible
virtual void* cpuMap(device::VirtualDevice& vDev, //!< Virtual device for map operaiton
uint flags = 0, //!< flags for the map operation
// Optimization for multilayer map/unmap
uint startLayer = 0, //!< Start layer for multilayer map
uint numLayers = 0, //!< End layer for multilayer map
size_t* rowPitch = NULL, //!< Row pitch for the device memory
size_t* slicePitch = NULL //!< Slice pitch for the device memory
);
//! Unmap the device memory
virtual void cpuUnmap(device::VirtualDevice& vDev //!< Virtual device for unmap operaiton
);
//! Updates device memory from the owner's host allocation
void syncCacheFromHost(VirtualGPU& gpu, //!< Virtual GPU device object
//! Synchronization flags
device::Memory::SyncFlags syncFlags = device::Memory::SyncFlags());
//! Updates the owner's host allocation from device memory
virtual void syncHostFromCache(
//! Synchronization flags
device::Memory::SyncFlags syncFlags = device::Memory::SyncFlags());
//! Creates a view from current resource
virtual Memory* createBufferView(
amd::Memory& subBufferOwner //!< The abstraction layer subbuf owner
);
virtual uint64_t virtualAddress() const override { return vmAddress(); }
virtual const address cpuSrd() const {
return reinterpret_cast<const address>(const_cast<void*>(hwState()));
}
//! Allocates host memory for synchronization with MGPU context
void mgpuCacheWriteBack();
//! Accessors for indirect map memory object
Memory* mapMemory() const;
// Decompress GL depth-stencil/MSAA resources for CL access
// Invalidates any FBOs the resource may be bound to, otherwise the GL driver may crash.
virtual bool processGLResource(GLResourceOP operation);
//! Returns the interop resource for this memory object
const Memory* parent() const { return parent_; }
//! Returns TRUE if direct map is acceaptable. The method detects
//! forced USWC memory on APU and will cause a switch to
//! indirect map for allocations with a possibility of host read
bool isDirectMap() {
return (isCacheable() || !isHostMemDirectAccess() ||
(owner()->getMemFlags() &
(CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY)));
}
//! Quick view update for managed buffers. It should avoid expensive object allocations
void updateView(Resource* view, size_t offset, size_t size) {
size_ = size;
flags_ |= HostMemoryDirectAccess;
Resource::updateView(view, offset, size);
}
//! Check if memory object requires cache coherency sync
bool isChacheCoherencySync() const {
return (!isHostMemDirectAccess() && (memoryType() != P2PAccess));
}
protected:
//! Decrement map count
void decIndMapCount();
//! Validates allocated memory for possible workarounds
virtual bool ValidateMemory(Resource::MemoryType memType) { return true; }
private:
//! Disable copy constructor
Memory(const Memory&);
//! Disable operator=
Memory& operator=(const Memory&);
Memory* pinnedMemory_; //!< Memory used as pinned system memory
const Memory* parent_; //!< Parent memory object
};
class Buffer : public pal::Memory {
public:
//! Buffer constructor
Buffer(const Device& gpuDev, //!< GPU device object
amd::Memory& owner, //!< Abstraction layer memory object
size_t size //!< Buffer size
)
: pal::Memory(gpuDev, owner, size) {}
//! Creates a view from current resource
virtual Memory* createBufferView(
amd::Memory& subBufferOwner //!< The abstraction layer subbuf owner
) const;
private:
//! Disable copy constructor
Buffer(const Buffer&);
//! Disable operator=
Buffer& operator=(const Buffer&);
};
class Image : public pal::Memory {
public:
//! Image constructor
Image(const Device& gpuDev, //!< GPU device object
amd::Memory& owner, //!< Abstraction layer memory object
size_t width, //!< Allocated memory width
size_t height, //!< Allocated memory height
size_t depth, //!< Allocated memory depth
cl_image_format format, //!< Memory format
cl_mem_object_type imageType, //!< CL image type
uint mipLevels //!< The number of mip levels
)
: pal::Memory(gpuDev, owner, width, height, depth, format, imageType, mipLevels),
copyImageBuffer_(nullptr) {}
//! Image constructor
Image(const Device& gpuDev, //!< GPU device object
size_t size, //!< Memory size
size_t width, //!< Allocated memory width
size_t height, //!< Allocated memory height
size_t depth, //!< Allocated memory depth
cl_image_format format, //!< Memory format
cl_mem_object_type imageType, //!< CL image type
uint mipLevels //!< The number of mip levels
)
: pal::Memory(gpuDev, size, width, height, depth, format, imageType, mipLevels),
copyImageBuffer_(nullptr) {}
virtual ~Image() { delete copyImageBuffer_; }
//! Allocate memory for API-level maps
virtual void* allocMapTarget(const amd::Coord3D& origin, //!< The map location in memory
const amd::Coord3D& region, //!< The map region in memory
uint mapFlags, //!< Map flags
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
size_t* slicePitch = NULL //!< Slice for the mapped memory
);
virtual uint64_t virtualAddress() const override { return hwSrd(); }
Image* CopyImageBuffer() const { return copyImageBuffer_; }
//! Validates allocated memory for possible workarounds
bool ValidateMemory(Resource::MemoryType memType) final;
private:
//! Disable copy constructor
Image(const Image&);
//! Disable operator=
Image& operator=(const Image&);
Image* copyImageBuffer_;
};
} // namespace pal