SWDEV-322620 - Virtual Memory Management

Adding virtual memory management APIs to rocclr.
The HIP layer will handle virtual allocs on devices.

Change-Id: Ia978f105c2c3fed3959c77580ba228e845105754


[ROCm/clr commit: b5f555f9ec]
This commit is contained in:
Christophe Paquot
2022-04-13 15:46:26 -07:00
committed by Christophe Paquot
parent 4b4137ae63
commit 1024cb58a7
8 changed files with 142 additions and 12 deletions
+33
View File
@@ -1718,6 +1718,39 @@ class Device : public RuntimeObject {
*/
virtual void svmFree(void* ptr) const = 0;
/**
* Reserve a VA range with no backing store
*
* @param addr Start address requested
* @param size Size of the range in bytes
* @param alignment Alignment in bytes
*/
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) = 0;
/**
* Free a VA range
*
* @param addr Start address of the range
*/
virtual void virtualFree(void* addr) = 0;
/**
* Map a memory to a VA range
*
* @param addr Start address of the range
* @param mem Backing store
* @param size Size to be mapped in bytes
*/
virtual void virtualMap(void* addr, Memory& mem, size_t size) = 0;
/**
* Unmap a memory from a VA
*
* @param addr Address of the VA range
* @param size Size of the range to be unmapped in bytes
*/
virtual void virtualUnmap(void* addr, size_t size) = 0;
/**
* @return True if the device successfully applied the SVM attributes in HMM for device memory
*/
@@ -1439,6 +1439,8 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
type = Resource::BusAddressable;
} else if (owner.getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD) {
type = Resource::ExternalPhysical;
} else if (owner.getMemFlags() & CL_MEM_VA_RANGE_AMD) {
type = Resource::VaRange;
}
// Use direct access if it's possible
@@ -2233,6 +2235,43 @@ void Device::svmFree(void* ptr) const {
}
}
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
{
amd::Memory* mem = nullptr;
// create a hidden buffer, which will allocated on the device later
mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, addr);
if (mem == nullptr) {
LogError("failed to new a va range mem object!");
return nullptr;
}
if (!mem->create(nullptr, false)) {
LogError("failed to create a va range mem object");
mem->release();
return nullptr;
}
// if the device supports SVM FGS, return the committed CPU address directly.
pal::Memory* gpuMem = getGpuMemory(mem);
amd::MemObjMap::AddMemObj(mem->getSvmPtr(), mem);
void* svmPtr = mem->getSvmPtr();
return svmPtr;
}
void Device::virtualFree(void* addr)
{
}
void Device::virtualMap(void* addr, amd::Memory& mem, size_t size)
{
}
void Device::virtualUnmap(void* addr, size_t size)
{
}
bool Device::AcquireExclusiveGpuAccess() {
// Lock the virtual GPU list
vgpusAccess().lock();
@@ -143,6 +143,11 @@ class NullDevice : public amd::Device {
return NULL;
}
virtual void svmFree(void* ptr) const { return; }
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) { return nullptr; };
virtual void virtualFree(void* addr) { };
virtual void virtualMap(void* addr, amd::Memory& mem, size_t size) { };
virtual void virtualUnmap(void* addr, size_t size) { };
virtual bool importExtSemaphore(void** extSemaphore,const amd::Os::FileDesc& handle) { return false; }
virtual void DestroyExtSemaphore(void* extSemaphore) { }
@@ -517,6 +522,11 @@ class Device : public NullDevice {
//! SVM free
virtual void svmFree(void* ptr) const;
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
virtual void virtualFree(void* addr);
virtual void virtualMap(void* addr, amd::Memory& mem, size_t size);
virtual void virtualUnmap(void* addr, size_t size);
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
+15 -11
View File
@@ -1220,18 +1220,20 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
return CreateImage(params, forceLinear);
}
Pal::gpusize svmPtr = 0;
if ((nullptr != params) && (nullptr != params->owner_) &&
(nullptr != params->owner_->getSvmPtr())) {
svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
desc_.SVMRes_ = true;
svmPtr = (svmPtr == 1) ? 0 : svmPtr;
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
desc_.gl2CacheDisabled_ = true;
if (memoryType() != Resource::VaRange) {
Pal::gpusize svmPtr = 0;
if ((nullptr != params) && (nullptr != params->owner_) &&
(nullptr != params->owner_->getSvmPtr())) {
svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
desc_.SVMRes_ = true;
svmPtr = (svmPtr == 1) ? 0 : svmPtr;
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
desc_.gl2CacheDisabled_ = true;
}
}
if (desc_.SVMRes_) {
return CreateSvm(params, svmPtr);
}
}
if (desc_.SVMRes_) {
return CreateSvm(params, svmPtr);
}
Pal::GpuMemoryCreateInfo createInfo = {};
@@ -1248,6 +1250,8 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
createInfo.flags.sdiExternal = true;
} else if (memoryType() == BusAddressable) {
createInfo.flags.busAddressable = true;
} else if (memoryType() == VaRange) {
createInfo.flags.virtualAlloc = true;
}
memTypeToHeap(&createInfo);
@@ -172,7 +172,8 @@ class Resource : public amd::HeapObject {
Scratch, //!< resource is scratch memory
Shader, //!< resource is a shader
P2PAccess, //!< resource is a shared resource for P2P access
VkInterop //!< resource is a Vulkan memory object
VkInterop, //!< resource is a Vulkan memory object
VaRange //!< reousrce is a virtual address range
};
//! Resource map flags
@@ -2266,6 +2266,24 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
return svmPtr;
}
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
{
return nullptr;
}
void Device::virtualFree(void* addr)
{
}
void Device::virtualMap(void* addr, amd::Memory& mem, size_t size)
{
}
void Device::virtualUnmap(void* addr, size_t size)
{
}
// ================================================================================================
bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count,
amd::MemoryAdvice advice, bool first_alloc, bool use_cpu) const {
@@ -207,6 +207,25 @@ class NullDevice : public amd::Device {
ShouldNotReachHere();
return;
}
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) {
ShouldNotReachHere();
return nullptr;
}
virtual void virtualFree(void* addr) {
ShouldNotReachHere();
return;
}
virtual void virtualMap(void* addr, amd::Memory& mem, size_t size) {
ShouldNotReachHere();
return;
}
virtual void virtualUnmap(void* addr, size_t size) {
ShouldNotReachHere();
return;
}
//! Determine if we can use device memory for SVM
const bool forceFineGrain(amd::Memory* memory) const {
@@ -437,6 +456,11 @@ class Device : public NullDevice {
virtual bool GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
size_t num_attributes, const void* dev_ptr, size_t count) const;
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
virtual void virtualFree(void* addr);
virtual void virtualMap(void* addr, amd::Memory& mem, size_t size);
virtual void virtualUnmap(void* addr, size_t size);
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
+1
View File
@@ -40,6 +40,7 @@
#define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31)
#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30)
#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29)
#define CL_MEM_VA_RANGE_AMD (1u << 28)
namespace device {
class Memory;