SWDEV-508632 - Align address to 2 MBs for hidden heap allocation. (#29)
[ROCm/clr commit: b5c9cbc236]
This commit is contained in:
committed by
GitHub
parent
29be7230eb
commit
b217d3a4e6
@@ -102,7 +102,8 @@ static void handlePayload(MessageHandler& messages, uint32_t service, uint64_t*
|
||||
}
|
||||
} else {
|
||||
amd::Context& ctx = dev.context();
|
||||
amd::Buffer* buf = new(ctx) amd::Buffer(ctx, CL_MEM_READ_WRITE, payload[1]);
|
||||
amd::Buffer* buf = new(ctx) amd::Buffer(ctx, CL_MEM_READ_WRITE, payload[1], NULL,
|
||||
(payload[1] == 2 * Mi) ? 2 * Mi : 0);
|
||||
uint64_t va = 0;
|
||||
if (buf) {
|
||||
if (buf->create()) {
|
||||
|
||||
@@ -1750,8 +1750,8 @@ class Device : public RuntimeObject {
|
||||
//! Allocate a chunk of device memory as a cache for a CL memory object
|
||||
virtual device::Memory* createMemory(Memory& owner) const = 0;
|
||||
|
||||
//! Allocate a chunk of device memory without owner class
|
||||
virtual device::Memory* createMemory(size_t size) const = 0;
|
||||
//! Allocate a chunk of device memory with address alignment
|
||||
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const = 0;
|
||||
|
||||
//! Allocate a device sampler object
|
||||
virtual bool createSampler(const Sampler&, device::Sampler**) const = 0;
|
||||
|
||||
@@ -1687,6 +1687,7 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
|
||||
if (owner.ipcShared()) {
|
||||
type = Resource::IpcMemory;
|
||||
}
|
||||
params.alignment_ = owner.getAlignment();
|
||||
// Create memory object
|
||||
result = gpuMemory->create(type, ¶ms);
|
||||
|
||||
@@ -1887,9 +1888,11 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
device::Memory* Device::createMemory(size_t size) const {
|
||||
device::Memory* Device::createMemory(size_t size, size_t alignment) const {
|
||||
auto buffer = new pal::Memory(*this, size);
|
||||
if ((buffer == nullptr) || !buffer->create(Resource::Local)) {
|
||||
Resource::CreateParams params {};
|
||||
params.alignment_ = alignment;
|
||||
if ((buffer == nullptr) || !buffer->create(Resource::Local, ¶ms)) {
|
||||
LogError("Couldn't allocate memory on device!");
|
||||
return nullptr;
|
||||
}
|
||||
@@ -2607,7 +2610,7 @@ void Device::HiddenHeapAlloc(const VirtualGPU& gpu) {
|
||||
heap_buffer_ = createMemory(HeapBufferSize);
|
||||
if (initial_heap_size_ != 0) {
|
||||
initial_heap_size_ = amd::alignUp(initial_heap_size_, 2 * Mi);
|
||||
initial_heap_buffer_ = createMemory(initial_heap_size_);
|
||||
initial_heap_buffer_ = createMemory(initial_heap_size_, 2 * Mi);
|
||||
}
|
||||
if (heap_buffer_ == nullptr) {
|
||||
LogError("Heap buffer allocation failed!");
|
||||
|
||||
@@ -92,8 +92,7 @@ class NullDevice : public amd::Device {
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createMemory(amd::Memory& owner) const { return nullptr; }
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createMemory(size_t size) const { return nullptr; }
|
||||
|
||||
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const { return nullptr; }
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
@@ -404,9 +403,7 @@ class Device : public NullDevice {
|
||||
//! Memory allocation
|
||||
virtual device::Memory* createMemory(amd::Memory& owner //!< abstraction layer memory object
|
||||
) const;
|
||||
virtual device::Memory* createMemory(size_t size //!< Size of memory allocation
|
||||
) const;
|
||||
|
||||
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const;
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
|
||||
@@ -1323,7 +1323,9 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
Pal::GpuMemoryCreateInfo createInfo = {};
|
||||
createInfo.size = desc().width_ * elementSize_;
|
||||
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
|
||||
createInfo.alignment = desc().scratch_ ? 64 * Ki : MaxGpuAlignment;
|
||||
createInfo.alignment = (params && params->alignment_ != 0)
|
||||
? params->alignment_
|
||||
: (desc().scratch_ ? 64 * Ki : MaxGpuAlignment);
|
||||
createInfo.vaRange = Pal::VaRange::Default;
|
||||
createInfo.priority = Pal::GpuMemPriority::Normal;
|
||||
|
||||
|
||||
@@ -105,7 +105,9 @@ class Resource : public amd::HeapObject {
|
||||
VirtualGPU* gpu_; //!< Resource won't be shared between multiple queues
|
||||
const Resource* svmBase_; //!< SVM base for MGPU allocations
|
||||
bool interprocess_; //!< Ressource can be used in the interprocess communication
|
||||
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false) {}
|
||||
size_t alignment_; //!< allocation address alignment
|
||||
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false),
|
||||
alignment_(0) {}
|
||||
};
|
||||
|
||||
struct PinnedParams : public CreateParams {
|
||||
|
||||
@@ -2044,7 +2044,7 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
device::Memory* Device::createMemory(size_t size) const {
|
||||
device::Memory* Device::createMemory(size_t size, size_t alignment) const {
|
||||
auto buffer = new roc::Buffer(*this, size);
|
||||
static constexpr bool LocalAlloc = true;
|
||||
if ((buffer == nullptr) || !buffer->create(LocalAlloc)) {
|
||||
|
||||
@@ -178,11 +178,10 @@ class NullDevice : public amd::Device {
|
||||
ShouldNotReachHere();
|
||||
return nullptr;
|
||||
}
|
||||
device::Memory* createMemory(size_t size) const override {
|
||||
device::Memory* createMemory(size_t size, size_t alignment = 0) const override {
|
||||
ShouldNotReachHere();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//! Sampler object allocation
|
||||
bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
@@ -369,8 +368,7 @@ class Device : public NullDevice {
|
||||
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr);
|
||||
|
||||
virtual device::Memory* createMemory(amd::Memory& owner) const;
|
||||
virtual device::Memory* createMemory(size_t size) const;
|
||||
|
||||
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const;
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
|
||||
@@ -77,7 +77,8 @@ void HostMemoryReference::deallocateMemory(const Context& context) {
|
||||
}
|
||||
}
|
||||
|
||||
Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmPtr)
|
||||
Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmPtr,
|
||||
size_t alignment)
|
||||
: numDevices_(0),
|
||||
deviceMemories_(NULL),
|
||||
destructorCallbacks_(NULL),
|
||||
@@ -96,7 +97,8 @@ Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmP
|
||||
svmHostAddress_(svmPtr),
|
||||
resOffset_(0),
|
||||
flagsEx_(0),
|
||||
lockMemoryOps_(true) /* Memory Ops Lock */ {
|
||||
lockMemoryOps_(true),
|
||||
alignment_(alignment) /* Memory Ops Lock */ {
|
||||
svmPtrCommited_ = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) ? true : false;
|
||||
canBeCached_ = true;
|
||||
}
|
||||
|
||||
@@ -216,7 +216,6 @@ class Memory : public amd::RuntimeObject {
|
||||
uint32_t uniqueId_ = 0;
|
||||
//! used to save the user data during memory allocation.
|
||||
UserData userData_;
|
||||
|
||||
private:
|
||||
//! Disable default assignment operator
|
||||
Memory& operator=(const Memory&);
|
||||
@@ -227,6 +226,7 @@ class Memory : public amd::RuntimeObject {
|
||||
Monitor lockMemoryOps_; //!< Lock to serialize memory operations
|
||||
std::set<Memory*> subBuffers_; //!< List of all subbuffers for this memory object
|
||||
device::Memory* svmBase_; //!< svmBase allocation for MGPU case
|
||||
size_t alignment_ = 0; //!< alignment for allocation address
|
||||
|
||||
protected:
|
||||
//! The constructor creates a memory object but does not allocate either host memory
|
||||
@@ -235,7 +235,8 @@ class Memory : public amd::RuntimeObject {
|
||||
Type type, //!< Memory type
|
||||
Flags flags, //!< Object's flags
|
||||
size_t size, //!< Memory size
|
||||
void* svmPtr = NULL //!< svm host memory address, NULL if no SVM mem object
|
||||
void* svmPtr = NULL, //!< svm host memory address, NULL if no SVM mem object
|
||||
size_t alignment = 0 //!< allocation addr alignment
|
||||
);
|
||||
Memory(Memory& parent, //!< Context object
|
||||
Flags flags, //!< Object's flags
|
||||
@@ -419,6 +420,9 @@ class Memory : public amd::RuntimeObject {
|
||||
|
||||
//! Validate memory access for vmm memory
|
||||
bool ValidateMemAccess(const Device& dev, bool read_write);
|
||||
|
||||
//! Get alignment_
|
||||
size_t getAlignment() const { return alignment_; }
|
||||
};
|
||||
|
||||
//! Buffers are a specialization of memory. Just a wrapper, really,
|
||||
@@ -436,8 +440,8 @@ class Buffer : public Memory {
|
||||
: Memory(context, type, flags, size) {}
|
||||
|
||||
public:
|
||||
Buffer(Context& context, Flags flags, size_t size, void* svmPtr = NULL)
|
||||
: Memory(context, CL_MEM_OBJECT_BUFFER, flags, size, svmPtr) {}
|
||||
Buffer(Context& context, Flags flags, size_t size, void* svmPtr = NULL, size_t alignment = 0)
|
||||
: Memory(context, CL_MEM_OBJECT_BUFFER, flags, size, svmPtr, alignment) {}
|
||||
Buffer(Memory& parent, Flags flags, size_t origin, size_t size)
|
||||
: Memory(parent, flags, origin, size) {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user