SWDEV-413997 - Initial VMM changes for ROCm path.

Change-Id: I4405fd7b53182eb4c4622835c811c0dc08461537
Этот коммит содержится в:
kjayapra-amd
2023-07-21 18:46:33 -04:00
коммит произвёл Karthik Jayaprakash
родитель 8fe1d9dda1
Коммит 3ef829939a
13 изменённых файлов: 366 добавлений и 84 удалений
+103 -68
Просмотреть файл
@@ -22,6 +22,16 @@
#include "hip_internal.hpp"
#include "hip_vm.hpp"
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtNone)
== static_cast<uint32_t>(amd::Device::VmmAccess::kNone),
"Mem Access Flag None mismatch with ROCclr!");
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtRead)
== static_cast<uint32_t>(amd::Device::VmmAccess::kReadOnly),
"Mem Access Flag Read mismatch with ROCclr!");
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtReadWrite)
== static_cast<uint32_t>(amd::Device::VmmAccess::kReadWrite),
"Mem Access Flag Read Write mismatch with ROCclr!");
hipError_t hipMemAddressFree(void* devPtr, size_t size) {
HIP_INIT_API(hipMemAddressFree, devPtr, size);
@@ -29,52 +39,47 @@ hipError_t hipMemAddressFree(void* devPtr, size_t size) {
HIP_RETURN(hipErrorInvalidValue);
}
for (auto& dev: g_devices) {
dev->devices()[0]->virtualFree(devPtr);
}
// Single call frees address range for all devices.
g_devices[0]->devices()[0]->virtualFree(devPtr);
HIP_RETURN(hipSuccess);
}
hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, unsigned long long flags) {
hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr,
unsigned long long flags) {
HIP_INIT_API(hipMemAddressReserve, ptr, size, alignment, addr, flags);
if (ptr == nullptr ||
flags !=0) {
if (ptr == nullptr || flags != 0) {
HIP_RETURN(hipErrorInvalidValue);
}
const auto& dev_info = g_devices[0]->devices()[0]->info();
if (size == 0 || ((size % dev_info.virtualMemAllocGranularity_) != 0)) {
HIP_RETURN(hipErrorMemoryAllocation);
}
// Initialize the ptr, single virtual alloc call would reserve va range for all devices.
*ptr = nullptr;
*ptr = g_devices[0]->devices()[0]->virtualAlloc(addr, size, alignment);
if (*ptr == nullptr) {
HIP_RETURN(hipErrorOutOfMemory);
}
void* startAddress = addr;
for (auto& dev : g_devices) {
*ptr = dev->devices()[0]->virtualAlloc(startAddress, size, alignment);
// if addr==0 we generate the va and use it for other devices
if (startAddress == nullptr) {
startAddress = *ptr;
} else if (*ptr != startAddress) {
// if we cannot reserve the same VA on other devices, just fail
for (auto& d : g_devices) {
if (d == dev) HIP_RETURN(hipErrorOutOfMemory);
d->devices()[0]->virtualFree(startAddress);
}
}
// If requested address was not allocated, printf error message.
if (addr != nullptr && addr == *ptr) {
LogPrintfError("Requested address : 0x%x was not allocated. Allocated address : 0x%x ", *ptr);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, const hipMemAllocationProp* prop, unsigned long long flags) {
hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
const hipMemAllocationProp* prop, unsigned long long flags) {
HIP_INIT_API(hipMemCreate, handle, size, prop, flags);
if (handle == nullptr ||
size == 0 ||
flags != 0 ||
prop == nullptr ||
prop->type != hipMemAllocationTypePinned ||
prop->location.type != hipMemLocationTypeDevice ||
// Currently we do not support Pinned memory
if (handle == nullptr || size == 0 || flags != 0 || prop == nullptr ||
prop->type != hipMemAllocationTypePinned || prop->location.type != hipMemLocationTypeDevice ||
prop->location.id >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -84,6 +89,7 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, co
HIP_RETURN(hipErrorNotSupported);
}
// Device info validation
const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info();
if (dev_info.maxPhysicalMemAllocSize_ < size) {
@@ -95,34 +101,39 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, co
amd::Context* amdContext = g_devices[prop->location.id]->asContext();
void* ptr = amd::SvmBuffer::malloc(*amdContext, 0, size, dev_info.memBaseAddrAlign_,
nullptr);
// When ROCCLR_MEM_PHYMEM is set, ROCr impl gets and stores unique hsa handle. Flag no-op on PAL.
void* ptr = amd::SvmBuffer::malloc(*amdContext, ROCCLR_MEM_PHYMEM, size,
dev_info.memBaseAddrAlign_, nullptr);
// Handle out of memory cases,
if (ptr == nullptr) {
size_t free = 0, total =0;
hipError_t err = hipMemGetInfo(&free, &total);
if (err == hipSuccess) {
LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu \n", size, free, total);
hipError_t hip_error = hipMemGetInfo(&free, &total);
if (hip_error == hipSuccess) {
LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu"
"| total :%zu \n", size, free, total);
}
HIP_RETURN(hipErrorOutOfMemory);
}
// Add this to amd::Memory object, so this ptr is accesible for other hipmemory operations.
size_t offset = 0; //this is ignored
amd::Memory* memObj = getMemoryObject(ptr, offset);
//saves the current device id so that it can be accessed later
memObj->getUserData().deviceId = prop->location.id;
memObj->getUserData().data = new hip::GenericAllocation(ptr, size, *prop);
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(memObj->getUserData().data);
HIP_RETURN(hipSuccess);
}
hipError_t hipMemExportToShareableHandle(void* shareableHandle, hipMemGenericAllocationHandle_t handle, hipMemAllocationHandleType handleType, unsigned long long flags) {
hipError_t hipMemExportToShareableHandle(void* shareableHandle,
hipMemGenericAllocationHandle_t handle,
hipMemAllocationHandleType handleType,
unsigned long long flags) {
HIP_INIT_API(hipMemExportToShareableHandle, shareableHandle, handle, handleType, flags);
if (flags != 0 ||
handle == nullptr ||
shareableHandle == nullptr) {
if (flags != 0 || handle == nullptr || shareableHandle == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -132,23 +143,30 @@ hipError_t hipMemExportToShareableHandle(void* shareableHandle, hipMemGenericAll
hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr) {
HIP_INIT_API(hipMemGetAccess, flags, location, ptr);
if (flags == nullptr ||
location == nullptr ||
ptr == nullptr) {
if (flags == nullptr || location == nullptr || ptr == nullptr
|| location->type != hipMemLocationTypeDevice || location->id >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue)
}
// Convert the access flags to amd::Device access flag
auto& dev = g_devices[location->id];
amd::Device::VmmAccess access_flags = static_cast<amd::Device::VmmAccess>(0);
if (!dev->devices()[0]->GetMemAccess(ptr, &access_flags)) {
HIP_RETURN(hipErrorInvalidValue);
}
*flags = static_cast<unsigned long long>(access_flags);
HIP_RETURN(hipSuccess);
}
hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, hipMemAllocationGranularity_flags option) {
hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop,
hipMemAllocationGranularity_flags option) {
HIP_INIT_API(hipMemGetAllocationGranularity, granularity, prop, option);
if (granularity == nullptr ||
prop == nullptr ||
prop->type != hipMemAllocationTypePinned ||
prop->location.type != hipMemLocationTypeDevice ||
prop->location.id >= g_devices.size()) {
if (granularity == nullptr || prop == nullptr || prop->type != hipMemAllocationTypePinned ||
prop->location.type != hipMemLocationTypeDevice || prop->location.id >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -171,7 +189,8 @@ hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, h
HIP_RETURN(hipSuccess);
}
hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, hipMemAllocationHandleType shHandleType) {
hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle,
hipMemAllocationHandleType shHandleType) {
HIP_INIT_API(hipMemImportFromShareableHandle, handle, osHandle, shHandleType);
if (handle == nullptr || osHandle == nullptr) {
@@ -181,22 +200,23 @@ hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* hand
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, unsigned long long flags) {
hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle,
unsigned long long flags) {
HIP_INIT_API(hipMemMap, ptr, size, offset, handle, flags);
if (ptr == nullptr ||
handle == nullptr ||
size == 0 ||
offset != 0 ||
flags != 0) {
if (ptr == nullptr || handle == nullptr || size == 0 || offset != 0 || flags != 0) {
HIP_RETURN(hipErrorInvalidValue);
}
// Re-interpret the ga handle and set the mapped flag
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(handle);
ga->retain();
auto& queue = *g_devices[ga->GetProperties().location.id]->NullStream();
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, &ga->asAmdMemory());
// Map the physical address to virtual address
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
&ga->asAmdMemory());
cmd->enqueue();
cmd->awaitCompletion();
cmd->release();
@@ -220,11 +240,13 @@ hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count
hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) {
HIP_INIT_API(hipMemRelease, handle);
if (handle == nullptr) HIP_RETURN(hipErrorInvalidValue);
if (handle == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
// Re-interpret the ga handle and make sure it is not already released.
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(handle);
delete ga;
ga->release();
HIP_RETURN(hipSuccess);
}
@@ -232,7 +254,9 @@ hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) {
hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr) {
HIP_INIT_API(hipMemRetainAllocationHandle, handle, addr);
if (handle == nullptr || addr == nullptr) HIP_RETURN(hipErrorInvalidValue);
if (handle == nullptr || addr == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Memory* mem = amd::MemObjMap::FindMemObj(addr);
@@ -252,10 +276,18 @@ hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle,
hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count) {
HIP_INIT_API(hipMemSetAccess, ptr, size, desc, count);
if (ptr == nullptr ||
size == 0 ||
desc == nullptr ||
count == 0) {
if (ptr == nullptr || size == 0 || desc == nullptr || count == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
if (desc->location.id >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue)
}
auto& dev = g_devices[desc->location.id];
amd::Device::VmmAccess access_flags = static_cast<amd::Device::VmmAccess>(desc->flags);
if (!dev->devices()[0]->SetMemAccess(ptr, size, access_flags, count)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -265,13 +297,15 @@ hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc,
hipError_t hipMemUnmap(void* ptr, size_t size) {
HIP_INIT_API(hipMemUnmap, ptr, size);
if (ptr == nullptr) HIP_RETURN(hipErrorInvalidValue);
if (ptr == nullptr || size == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Memory* va = amd::MemObjMap::FindMemObj(ptr);
auto& queue = *g_devices[va->getUserData().deviceId]->NullStream();
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, nullptr);
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
nullptr);
cmd->enqueue();
cmd->awaitCompletion();
cmd->release();
@@ -280,6 +314,7 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(va->getUserData().data);
va->setSvmPtr(ga->genericAddress());
HIP_RETURN(hipSuccess);
}
ga->release();
HIP_RETURN(hipSuccess);
}
+16 -7
Просмотреть файл
@@ -24,6 +24,8 @@
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include "platform/object.hpp"
hipError_t ihipFree(void* ptr);
namespace hip {
@@ -36,22 +38,29 @@ struct MemMapAllocUserData {
MemMapAllocUserData(void* ptr, size_t size, amd::Memory* va) : ptr_(ptr), size_(size), va_(va) {}
};
class GenericAllocation {
void* ptr_;
size_t size_;
hipMemAllocationProp properties_;
class GenericAllocation : public amd::RuntimeObject {
void* ptr_; //<! Device ptr
size_t size_; //<! Allocated size
hipMemAllocationProp properties_; //<! Allocation Properties
public:
GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop): ptr_(ptr), size_(size), properties_(prop) {}
~GenericAllocation() { hipError_t err = ihipFree(ptr_); }
GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop)
: ptr_(ptr), size_(size), properties_(prop) {}
~GenericAllocation() {
hipError_t err = ihipFree(ptr_);
}
const hipMemAllocationProp& GetProperties() const { return properties_; }
hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() { return reinterpret_cast<hipMemGenericAllocationHandle_t>(this); }
hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() {
return reinterpret_cast<hipMemGenericAllocationHandle_t>(this);
}
amd::Memory& asAmdMemory() {
size_t discardOffset;
return *getMemoryObject(genericAddress(), discardOffset);
}
void* genericAddress() const { return ptr_; }
virtual ObjectType objectType() const { return ObjectTypeVMMAlloc; }
};
};
+28
Просмотреть файл
@@ -1584,6 +1584,14 @@ class Device : public RuntimeObject {
kCacheStateSystem = 2
} CacheState;
//<! Enum describing the access permissions of Virtual memory
enum class VmmAccess {
kNone = 0x0,
kReadOnly = 0x1,
kWriteOnly = 0x2,
kReadWrite = 0x3
};
typedef std::pair<LinkAttribute, int32_t /* value */> LinkAttrType;
static constexpr size_t kP2PStagingSize = 4 * Mi;
@@ -1784,6 +1792,25 @@ class Device : public RuntimeObject {
*/
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) = 0;
/**
* Set Access permisions for a virtual memory object.
*
* @param va_addr Virtual Address ptr
* @param va_size Virtual Address Size
* @param access_flags Access permissions
* @param count Number of access permissions
*/
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
size_t count) = 0;
/**
* Get Access permisions for a virtual memory object.
*
* @param va_addr Virtual Address ptr
* @param access_flags_ptr Access permissions to be filled
*/
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) = 0;
/**
* Free a VA range
*
@@ -1966,6 +1993,7 @@ class Device : public RuntimeObject {
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset, size_t size = 0) {
return nullptr;
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const = 0;
+5 -1
Просмотреть файл
@@ -2385,7 +2385,11 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
return nullptr;
}
if (!mem->create(nullptr, false)) {
constexpr bool kSysMemAlloc = false;
constexpr bool kSkipAlloc = false;
constexpr bool kForceAlloc = true;
// Force the alloc now for VA_Range reservation.
if (!mem->create(nullptr, kSysMemAlloc, kSkipAlloc, kForceAlloc)) {
LogError("failed to create a va range mem object");
mem->release();
return nullptr;
+16
Просмотреть файл
@@ -147,6 +147,14 @@ class NullDevice : public amd::Device {
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) { return nullptr; };
virtual void virtualFree(void* addr) { };
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
return true;
}
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
return true;
}
virtual bool importExtSemaphore(void** extSemaphore,const amd::Os::FileDesc& handle,
amd::ExternalSemaphoreHandleType sem_handle_type) override {
return false;
@@ -535,6 +543,14 @@ class Device : public NullDevice {
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
virtual void virtualFree(void* addr);
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
return true;
}
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
return true;
}
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
+120 -5
Просмотреть файл
@@ -64,6 +64,20 @@
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
#define OPENCL_C_VERSION_STR XSTR(OPENCL_C_MAJOR) "." XSTR(OPENCL_C_MINOR)
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kNone)
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_NONE),
"Vmm Access Flag None mismatch with ROC-runtime!");
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kReadOnly)
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_RO),
"Vmm Access Flag Read mismatch with ROCr-runtime!");
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kWriteOnly)
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_WO),
"Vmm Access Flag Write mismatch with ROC-runtime!");
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kReadWrite)
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_RW),
"Vmm Access Flag Read Write mismatch with ROC-runtime!");
#ifndef WITHOUT_HSA_BACKEND
namespace {
@@ -935,6 +949,14 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
} else {
dev->info_.largeBar_ = ROC_ENABLE_LARGE_BAR;
}
// Query the recommended granularity for this pool.
stat = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&(dev->info_.virtualMemAllocGranularity_));
if (stat != HSA_STATUS_SUCCESS) {
LogPrintfError("Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE info"
"failed with hsa_status: %d \n", stat);
}
}
if (dev->gpuvm_segment_.handle == 0) {
@@ -1720,7 +1742,17 @@ bool Device::populateOCLDeviceConstants() {
maxSdmaReadMask_, maxSdmaWriteMask_);
info_.globalCUMask_ = {};
// Virtual memory Management Support, if set to true then the HW and SW Stack supports VMM.
info_.virtualMemoryManagement_ = false;
if (HIP_VMEM_MANAGE_SUPPORT) {
if (HSA_STATUS_SUCCESS != hsa_system_get_info(
static_cast<hsa_system_info_t>(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED),
&info_.virtualMemoryManagement_)) {
LogError("HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED query failed ");
}
}
switch (isa().versionMajor()) {
case (11):
if (isa().versionMinor() == 0) {
@@ -2212,6 +2244,19 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
return true;
}
uint64_t Device::deviceVmemAlloc(size_t size, uint64_t flags) const {
hsa_amd_vmem_alloc_handle_t hsa_vmem_handle {};
// We only allow pinned memory at this time.
hsa_status_t hsa_status = hsa_amd_vmem_handle_create(gpuvm_segment_, size, MEMORY_TYPE_PINNED,
flags, &hsa_vmem_handle);
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_handle_create! Failed with hsa status: %d \n", hsa_status);
}
return hsa_vmem_handle.handle;
}
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
@@ -2311,13 +2356,83 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
return svmPtr;
}
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
{
return nullptr;
void* Device::virtualAlloc(void* req_addr, size_t size, size_t alignment) {
void* vptr = nullptr;
// Reserves the address using HSA APIs, with requested address.
// There is no guarantee that we will get the requested address.
hsa_status_t hsa_status = hsa_amd_vmem_address_reserve(&vptr, size,
reinterpret_cast<uint64_t>(req_addr), 0);
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_address_reserve. Failed with status: %d \n", hsa_status);
return nullptr;
}
// This mem->create() does not create an actual memory but stores the memory info with given vptr.
auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, vptr);
if (mem == nullptr) {
LogError("failed to new a va range mem object!");
return nullptr;
}
if (!mem->create(nullptr, false)) {
LogError("failed to create a va range mem object");
mem->release();
return nullptr;
}
// Assert to make sure that amd::Memory object has set the right ptr.
guarantee(vptr == mem->getSvmPtr(), "amd::Memory object does not have the right ptr");
return mem->getSvmPtr();
}
void Device::virtualFree(void* addr)
{
void Device::virtualFree(void* addr) {
amd::Memory* memObj = amd::MemObjMap::FindVirtualMemObj(addr);
if (memObj == nullptr) {
LogPrintfError("Cannot find the Virtual MemObj entry for this addr 0x%x", addr);
}
hsa_status_t hsa_status = hsa_amd_vmem_address_free(memObj->getSvmPtr(), memObj->getSize());
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_address_free. Failed with status:%d \n", hsa_status);
}
}
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
hsa_amd_memory_access_desc_t desc;
desc.permissions = static_cast<hsa_access_permission_t>(access_flags);
desc.agent_handle = getBackendDevice();
if ((hsa_status = hsa_amd_vmem_set_access(va_addr, va_size, &desc, count))
!= HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_set_access. Failed with status:%d \n", hsa_status);
return false;
}
return true;
}
bool Device::GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
hsa_access_permission_t perms;
size_t discard_offset = 0;
amd::Memory* va_mem_obj = amd::MemObjMap::FindMemObj(va_addr, &discard_offset);
if (va_mem_obj == nullptr) {
LogPrintfError("Failed to get Memory Object for va_addr: 0x%x", va_addr);
return false;
}
if ((hsa_status = hsa_amd_vmem_get_access(va_mem_obj->getSvmPtr(), &perms, getBackendDevice()))
!= HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_get_access. Failed with status:%d \n", hsa_status);
return false;
}
*access_flags_ptr = static_cast<VmmAccess>(perms);
return true;
}
// ================================================================================================
+17 -3
Просмотреть файл
@@ -221,7 +221,7 @@ class NullDevice : public amd::Device {
ShouldNotReachHere();
return;
}
void* virtualAlloc(void* addr, size_t size, size_t alignment) override {
void* virtualAlloc(void* req_addr, size_t size, size_t alignment) override {
ShouldNotReachHere();
return nullptr;
}
@@ -231,6 +231,17 @@ class NullDevice : public amd::Device {
return;
}
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count)
override {
ShouldNotReachHere();
return false;
}
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) override {
ShouldNotReachHere();
return false;
}
//! Determine if we can use device memory for SVM
const bool forceFineGrain(amd::Memory* memory) const {
return (memory->getContext().devices().size() > 1);
@@ -439,7 +450,7 @@ class Device : public NullDevice {
bool deviceAllowAccess(void* dst) const;
bool allowPeerAccess(device::Memory* memory) const;
uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
void memFree(void* ptr, size_t size) const;
@@ -454,9 +465,12 @@ class Device : public NullDevice {
virtual bool GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
size_t num_attributes, const void* dev_ptr, size_t count) const;
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
virtual void* virtualAlloc(void* req_addr, size_t size, size_t alignment);
virtual void virtualFree(void* addr);
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count);
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr);
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
+10
Просмотреть файл
@@ -748,9 +748,19 @@ bool Buffer::create(bool alloc_local) {
owner()->setSvmPtr(orig_dev_ptr);
}
// Allocate backing storage in device local memory unless UHP or AHP are set
cl_mem_flags memFlags = owner()->getMemFlags();
if (memFlags & ROCCLR_MEM_PHYMEM) {
// If this is physical memory request, then get an handle and store it in user data
owner()->getUserData().hsa_handle = dev().deviceVmemAlloc(owner()->getSize(), 0);
if (owner()->getUserData().hsa_handle == 0) {
LogError("HSA Opaque Handle returned was null");
}
}
if ((owner()->parent() == nullptr) &&
(owner()->getSvmPtr() != nullptr)) {
if (dev().forceFineGrain(owner()) || dev().isFineGrainedSystem(true)) {
+45
Просмотреть файл
@@ -2539,6 +2539,51 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
profilingEnd(cmd);
}
// ================================================================================================
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
profilingBegin(vcmd);
// Find the amd::Memory object for virtual ptr.
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
profilingEnd(vcmd);
return;
}
// Get the amd::Memory object for the physical address
amd::Memory* pa = vcmd.memory();
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
// If Physical address is not set, then it is map command. If set, it is unmap command.
if (pa != nullptr) {
// Map the physical to virtual address the hsa api
hsa_amd_vmem_alloc_handle_t opaque_hsa_handle;
opaque_hsa_handle.handle = pa->getUserData().hsa_handle;
if ((hsa_status = hsa_amd_vmem_map(va->getSvmPtr(), va->getSize(), va->getOffset(),
opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
// Now that we have mapped physical addr to virtual addr, make an entry in the MemObjMap.
amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
} else {
LogError("HSA Command: hsa_amd_vmem_map failed!");
}
} else {
// Unmap the object, since the physical addr is set.
if ((hsa_status = hsa_amd_vmem_unmap(va->getSvmPtr(), va->getSize())) == HSA_STATUS_SUCCESS) {
// assert the va is mapped and needs to be removed
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
amd::MemObjMap::RemoveMemObj(vcmd.ptr());
} else {
LogError("HSA Command: hsa_amd_vmem_unmap failed");
}
}
profilingEnd(vcmd);
}
// ================================================================================================
void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd) {
// Make sure VirtualGPU has an exclusive access to the resources
+1
Просмотреть файл
@@ -336,6 +336,7 @@ class VirtualGPU : public device::VirtualDevice {
void flush(amd::Command* list = nullptr, bool wait = false);
void submitFillMemory(amd::FillMemoryCommand& cmd);
void submitStreamOperation(amd::StreamOperationCommand& cmd);
void submitVirtualMap(amd::VirtualMapCommand& cmd);
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
void submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd);
+2
Просмотреть файл
@@ -43,6 +43,7 @@
#define CL_MEM_VA_RANGE_AMD (1u << 28)
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
#define ROCCLR_MEM_INTERPROCESS (1u << 26)
#define ROCCLR_MEM_PHYMEM (1u << 25)
namespace device {
class Memory;
@@ -148,6 +149,7 @@ class Memory : public amd::RuntimeObject {
{
int deviceId = 0; //!< Device ID memory is allocated on
void* data = nullptr; //!< Opaque user data from CL or HIP or etc.
uint64_t hsa_handle = 0; //!<Opaque hsa handle saved for Virtual memories
unsigned int flags = 0; //!< HIP memory flags
//! hipMallocPitch allocates buffer using width & height and returns pitch & device pointer.
//! Since device pointer is void*, It looses the values of width & height used for allocation.
+1
Просмотреть файл
@@ -142,6 +142,7 @@ class RuntimeObject : public ReferenceCountedObject, public ICDDispatchedObject
ObjectTypeQueue = 8,
ObjectTypeSampler = 9,
ObjectTypeThreadTrace = 10,
ObjectTypeVMMAlloc = 11
};
virtual ObjectType objectType() const = 0;
+2
Просмотреть файл
@@ -239,6 +239,8 @@ release(cstring, HIPRTC_COMPILE_OPTIONS_APPEND, "", \
"Set compile options needed for hiprtc compilation") \
release(cstring, HIPRTC_LINK_OPTIONS_APPEND, "", \
"Set link options needed for hiprtc compilation") \
release(bool, HIP_VMEM_MANAGE_SUPPORT, false, \
"Virtual Memory Management Support") \
namespace amd {