SWDEV-508632 - Align address to 2 MBs for hidden heap allocation. (#29)

[ROCm/clr commit: b5c9cbc236]
This commit is contained in:
Patel, Jaydeepkumar
2025-04-02 04:03:29 -07:00
committed by GitHub
parent 29be7230eb
commit b217d3a4e6
10 changed files with 33 additions and 24 deletions
+2 -1
View File
@@ -102,7 +102,8 @@ static void handlePayload(MessageHandler& messages, uint32_t service, uint64_t*
}
} else {
amd::Context& ctx = dev.context();
amd::Buffer* buf = new(ctx) amd::Buffer(ctx, CL_MEM_READ_WRITE, payload[1]);
amd::Buffer* buf = new(ctx) amd::Buffer(ctx, CL_MEM_READ_WRITE, payload[1], NULL,
(payload[1] == 2 * Mi) ? 2 * Mi : 0);
uint64_t va = 0;
if (buf) {
if (buf->create()) {
+2 -2
View File
@@ -1750,8 +1750,8 @@ class Device : public RuntimeObject {
//! Allocate a chunk of device memory as a cache for a CL memory object
virtual device::Memory* createMemory(Memory& owner) const = 0;
//! Allocate a chunk of device memory without owner class
virtual device::Memory* createMemory(size_t size) const = 0;
//! Allocate a chunk of device memory with address alignment
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const = 0;
//! Allocate a device sampler object
virtual bool createSampler(const Sampler&, device::Sampler**) const = 0;
+6 -3
View File
@@ -1687,6 +1687,7 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
if (owner.ipcShared()) {
type = Resource::IpcMemory;
}
params.alignment_ = owner.getAlignment();
// Create memory object
result = gpuMemory->create(type, &params);
@@ -1887,9 +1888,11 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
}
// ================================================================================================
device::Memory* Device::createMemory(size_t size) const {
device::Memory* Device::createMemory(size_t size, size_t alignment) const {
auto buffer = new pal::Memory(*this, size);
if ((buffer == nullptr) || !buffer->create(Resource::Local)) {
Resource::CreateParams params {};
params.alignment_ = alignment;
if ((buffer == nullptr) || !buffer->create(Resource::Local, &params)) {
LogError("Couldn't allocate memory on device!");
return nullptr;
}
@@ -2607,7 +2610,7 @@ void Device::HiddenHeapAlloc(const VirtualGPU& gpu) {
heap_buffer_ = createMemory(HeapBufferSize);
if (initial_heap_size_ != 0) {
initial_heap_size_ = amd::alignUp(initial_heap_size_, 2 * Mi);
initial_heap_buffer_ = createMemory(initial_heap_size_);
initial_heap_buffer_ = createMemory(initial_heap_size_, 2 * Mi);
}
if (heap_buffer_ == nullptr) {
LogError("Heap buffer allocation failed!");
+2 -5
View File
@@ -92,8 +92,7 @@ class NullDevice : public amd::Device {
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(amd::Memory& owner) const { return nullptr; }
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(size_t size) const { return nullptr; }
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const { return nullptr; }
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
@@ -404,9 +403,7 @@ class Device : public NullDevice {
//! Memory allocation
virtual device::Memory* createMemory(amd::Memory& owner //!< abstraction layer memory object
) const;
virtual device::Memory* createMemory(size_t size //!< Size of memory allocation
) const;
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const;
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
@@ -1323,7 +1323,9 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
Pal::GpuMemoryCreateInfo createInfo = {};
createInfo.size = desc().width_ * elementSize_;
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
createInfo.alignment = desc().scratch_ ? 64 * Ki : MaxGpuAlignment;
createInfo.alignment = (params && params->alignment_ != 0)
? params->alignment_
: (desc().scratch_ ? 64 * Ki : MaxGpuAlignment);
createInfo.vaRange = Pal::VaRange::Default;
createInfo.priority = Pal::GpuMemPriority::Normal;
@@ -105,7 +105,9 @@ class Resource : public amd::HeapObject {
VirtualGPU* gpu_; //!< Resource won't be shared between multiple queues
const Resource* svmBase_; //!< SVM base for MGPU allocations
bool interprocess_; //!< Ressource can be used in the interprocess communication
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false) {}
size_t alignment_; //!< allocation address alignment
CreateParams() : owner_(nullptr), gpu_(nullptr), svmBase_(nullptr), interprocess_(false),
alignment_(0) {}
};
struct PinnedParams : public CreateParams {
@@ -2044,7 +2044,7 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
}
// ================================================================================================
device::Memory* Device::createMemory(size_t size) const {
device::Memory* Device::createMemory(size_t size, size_t alignment) const {
auto buffer = new roc::Buffer(*this, size);
static constexpr bool LocalAlloc = true;
if ((buffer == nullptr) || !buffer->create(LocalAlloc)) {
@@ -178,11 +178,10 @@ class NullDevice : public amd::Device {
ShouldNotReachHere();
return nullptr;
}
device::Memory* createMemory(size_t size) const override {
device::Memory* createMemory(size_t size, size_t alignment = 0) const override {
ShouldNotReachHere();
return nullptr;
}
//! Sampler object allocation
bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
@@ -369,8 +368,7 @@ class Device : public NullDevice {
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr);
virtual device::Memory* createMemory(amd::Memory& owner) const;
virtual device::Memory* createMemory(size_t size) const;
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const;
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
+4 -2
View File
@@ -77,7 +77,8 @@ void HostMemoryReference::deallocateMemory(const Context& context) {
}
}
Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmPtr)
Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmPtr,
size_t alignment)
: numDevices_(0),
deviceMemories_(NULL),
destructorCallbacks_(NULL),
@@ -96,7 +97,8 @@ Memory::Memory(Context& context, Type type, Flags flags, size_t size, void* svmP
svmHostAddress_(svmPtr),
resOffset_(0),
flagsEx_(0),
lockMemoryOps_(true) /* Memory Ops Lock */ {
lockMemoryOps_(true),
alignment_(alignment) /* Memory Ops Lock */ {
svmPtrCommited_ = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) ? true : false;
canBeCached_ = true;
}
+8 -4
View File
@@ -216,7 +216,6 @@ class Memory : public amd::RuntimeObject {
uint32_t uniqueId_ = 0;
//! used to save the user data during memory allocation.
UserData userData_;
private:
//! Disable default assignment operator
Memory& operator=(const Memory&);
@@ -227,6 +226,7 @@ class Memory : public amd::RuntimeObject {
Monitor lockMemoryOps_; //!< Lock to serialize memory operations
std::set<Memory*> subBuffers_; //!< List of all subbuffers for this memory object
device::Memory* svmBase_; //!< svmBase allocation for MGPU case
size_t alignment_ = 0; //!< alignment for allocation address
protected:
//! The constructor creates a memory object but does not allocate either host memory
@@ -235,7 +235,8 @@ class Memory : public amd::RuntimeObject {
Type type, //!< Memory type
Flags flags, //!< Object's flags
size_t size, //!< Memory size
void* svmPtr = NULL //!< svm host memory address, NULL if no SVM mem object
void* svmPtr = NULL, //!< svm host memory address, NULL if no SVM mem object
size_t alignment = 0 //!< allocation addr alignment
);
Memory(Memory& parent, //!< Context object
Flags flags, //!< Object's flags
@@ -419,6 +420,9 @@ class Memory : public amd::RuntimeObject {
//! Validate memory access for vmm memory
bool ValidateMemAccess(const Device& dev, bool read_write);
//! Get alignment_
size_t getAlignment() const { return alignment_; }
};
//! Buffers are a specialization of memory. Just a wrapper, really,
@@ -436,8 +440,8 @@ class Buffer : public Memory {
: Memory(context, type, flags, size) {}
public:
Buffer(Context& context, Flags flags, size_t size, void* svmPtr = NULL)
: Memory(context, CL_MEM_OBJECT_BUFFER, flags, size, svmPtr) {}
Buffer(Context& context, Flags flags, size_t size, void* svmPtr = NULL, size_t alignment = 0)
: Memory(context, CL_MEM_OBJECT_BUFFER, flags, size, svmPtr, alignment) {}
Buffer(Memory& parent, Flags flags, size_t origin, size_t size)
: Memory(parent, flags, origin, size) {}