SWDEV-413997 - Initial VMM changes for ROCm path.
Change-Id: I4405fd7b53182eb4c4622835c811c0dc08461537
Этот коммит содержится в:
коммит произвёл
Karthik Jayaprakash
родитель
8fe1d9dda1
Коммит
3ef829939a
+103
-68
@@ -22,6 +22,16 @@
|
||||
#include "hip_internal.hpp"
|
||||
#include "hip_vm.hpp"
|
||||
|
||||
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtNone)
|
||||
== static_cast<uint32_t>(amd::Device::VmmAccess::kNone),
|
||||
"Mem Access Flag None mismatch with ROCclr!");
|
||||
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtRead)
|
||||
== static_cast<uint32_t>(amd::Device::VmmAccess::kReadOnly),
|
||||
"Mem Access Flag Read mismatch with ROCclr!");
|
||||
static_assert(static_cast<uint32_t>(hipMemAccessFlagsProtReadWrite)
|
||||
== static_cast<uint32_t>(amd::Device::VmmAccess::kReadWrite),
|
||||
"Mem Access Flag Read Write mismatch with ROCclr!");
|
||||
|
||||
hipError_t hipMemAddressFree(void* devPtr, size_t size) {
|
||||
HIP_INIT_API(hipMemAddressFree, devPtr, size);
|
||||
|
||||
@@ -29,52 +39,47 @@ hipError_t hipMemAddressFree(void* devPtr, size_t size) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
for (auto& dev: g_devices) {
|
||||
dev->devices()[0]->virtualFree(devPtr);
|
||||
}
|
||||
// Single call frees address range for all devices.
|
||||
g_devices[0]->devices()[0]->virtualFree(devPtr);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, unsigned long long flags) {
|
||||
hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr,
|
||||
unsigned long long flags) {
|
||||
HIP_INIT_API(hipMemAddressReserve, ptr, size, alignment, addr, flags);
|
||||
|
||||
if (ptr == nullptr ||
|
||||
flags !=0) {
|
||||
if (ptr == nullptr || flags != 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
const auto& dev_info = g_devices[0]->devices()[0]->info();
|
||||
if (size == 0 || ((size % dev_info.virtualMemAllocGranularity_) != 0)) {
|
||||
HIP_RETURN(hipErrorMemoryAllocation);
|
||||
}
|
||||
|
||||
// Initialize the ptr, single virtual alloc call would reserve va range for all devices.
|
||||
*ptr = nullptr;
|
||||
*ptr = g_devices[0]->devices()[0]->virtualAlloc(addr, size, alignment);
|
||||
if (*ptr == nullptr) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
void* startAddress = addr;
|
||||
|
||||
for (auto& dev : g_devices) {
|
||||
*ptr = dev->devices()[0]->virtualAlloc(startAddress, size, alignment);
|
||||
|
||||
// if addr==0 we generate the va and use it for other devices
|
||||
if (startAddress == nullptr) {
|
||||
startAddress = *ptr;
|
||||
} else if (*ptr != startAddress) {
|
||||
// if we cannot reserve the same VA on other devices, just fail
|
||||
for (auto& d : g_devices) {
|
||||
if (d == dev) HIP_RETURN(hipErrorOutOfMemory);
|
||||
d->devices()[0]->virtualFree(startAddress);
|
||||
}
|
||||
}
|
||||
// If requested address was not allocated, printf error message.
|
||||
if (addr != nullptr && addr == *ptr) {
|
||||
LogPrintfError("Requested address : 0x%x was not allocated. Allocated address : 0x%x ", *ptr);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, const hipMemAllocationProp* prop, unsigned long long flags) {
|
||||
hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
|
||||
const hipMemAllocationProp* prop, unsigned long long flags) {
|
||||
HIP_INIT_API(hipMemCreate, handle, size, prop, flags);
|
||||
|
||||
if (handle == nullptr ||
|
||||
size == 0 ||
|
||||
flags != 0 ||
|
||||
prop == nullptr ||
|
||||
prop->type != hipMemAllocationTypePinned ||
|
||||
prop->location.type != hipMemLocationTypeDevice ||
|
||||
// Currently we do not support Pinned memory
|
||||
if (handle == nullptr || size == 0 || flags != 0 || prop == nullptr ||
|
||||
prop->type != hipMemAllocationTypePinned || prop->location.type != hipMemLocationTypeDevice ||
|
||||
prop->location.id >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -84,6 +89,7 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, co
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
// Device info validation
|
||||
const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info();
|
||||
|
||||
if (dev_info.maxPhysicalMemAllocSize_ < size) {
|
||||
@@ -95,34 +101,39 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, co
|
||||
|
||||
amd::Context* amdContext = g_devices[prop->location.id]->asContext();
|
||||
|
||||
void* ptr = amd::SvmBuffer::malloc(*amdContext, 0, size, dev_info.memBaseAddrAlign_,
|
||||
nullptr);
|
||||
// When ROCCLR_MEM_PHYMEM is set, ROCr impl gets and stores unique hsa handle. Flag no-op on PAL.
|
||||
void* ptr = amd::SvmBuffer::malloc(*amdContext, ROCCLR_MEM_PHYMEM, size,
|
||||
dev_info.memBaseAddrAlign_, nullptr);
|
||||
|
||||
// Handle out of memory cases,
|
||||
if (ptr == nullptr) {
|
||||
size_t free = 0, total =0;
|
||||
hipError_t err = hipMemGetInfo(&free, &total);
|
||||
if (err == hipSuccess) {
|
||||
LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu \n", size, free, total);
|
||||
hipError_t hip_error = hipMemGetInfo(&free, &total);
|
||||
if (hip_error == hipSuccess) {
|
||||
LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu"
|
||||
"| total :%zu \n", size, free, total);
|
||||
}
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
// Add this to amd::Memory object, so this ptr is accesible for other hipmemory operations.
|
||||
size_t offset = 0; //this is ignored
|
||||
amd::Memory* memObj = getMemoryObject(ptr, offset);
|
||||
//saves the current device id so that it can be accessed later
|
||||
memObj->getUserData().deviceId = prop->location.id;
|
||||
memObj->getUserData().data = new hip::GenericAllocation(ptr, size, *prop);
|
||||
|
||||
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(memObj->getUserData().data);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemExportToShareableHandle(void* shareableHandle, hipMemGenericAllocationHandle_t handle, hipMemAllocationHandleType handleType, unsigned long long flags) {
|
||||
hipError_t hipMemExportToShareableHandle(void* shareableHandle,
|
||||
hipMemGenericAllocationHandle_t handle,
|
||||
hipMemAllocationHandleType handleType,
|
||||
unsigned long long flags) {
|
||||
HIP_INIT_API(hipMemExportToShareableHandle, shareableHandle, handle, handleType, flags);
|
||||
|
||||
if (flags != 0 ||
|
||||
handle == nullptr ||
|
||||
shareableHandle == nullptr) {
|
||||
if (flags != 0 || handle == nullptr || shareableHandle == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -132,23 +143,30 @@ hipError_t hipMemExportToShareableHandle(void* shareableHandle, hipMemGenericAll
|
||||
hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr) {
|
||||
HIP_INIT_API(hipMemGetAccess, flags, location, ptr);
|
||||
|
||||
if (flags == nullptr ||
|
||||
location == nullptr ||
|
||||
ptr == nullptr) {
|
||||
if (flags == nullptr || location == nullptr || ptr == nullptr
|
||||
|| location->type != hipMemLocationTypeDevice || location->id >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidValue)
|
||||
}
|
||||
|
||||
// Convert the access flags to amd::Device access flag
|
||||
auto& dev = g_devices[location->id];
|
||||
amd::Device::VmmAccess access_flags = static_cast<amd::Device::VmmAccess>(0);
|
||||
|
||||
if (!dev->devices()[0]->GetMemAccess(ptr, &access_flags)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*flags = static_cast<unsigned long long>(access_flags);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, hipMemAllocationGranularity_flags option) {
|
||||
hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop,
|
||||
hipMemAllocationGranularity_flags option) {
|
||||
HIP_INIT_API(hipMemGetAllocationGranularity, granularity, prop, option);
|
||||
|
||||
if (granularity == nullptr ||
|
||||
prop == nullptr ||
|
||||
prop->type != hipMemAllocationTypePinned ||
|
||||
prop->location.type != hipMemLocationTypeDevice ||
|
||||
prop->location.id >= g_devices.size()) {
|
||||
if (granularity == nullptr || prop == nullptr || prop->type != hipMemAllocationTypePinned ||
|
||||
prop->location.type != hipMemLocationTypeDevice || prop->location.id >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -171,7 +189,8 @@ hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, h
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, hipMemAllocationHandleType shHandleType) {
|
||||
hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle,
|
||||
hipMemAllocationHandleType shHandleType) {
|
||||
HIP_INIT_API(hipMemImportFromShareableHandle, handle, osHandle, shHandleType);
|
||||
|
||||
if (handle == nullptr || osHandle == nullptr) {
|
||||
@@ -181,22 +200,23 @@ hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* hand
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, unsigned long long flags) {
|
||||
hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle,
|
||||
unsigned long long flags) {
|
||||
HIP_INIT_API(hipMemMap, ptr, size, offset, handle, flags);
|
||||
|
||||
if (ptr == nullptr ||
|
||||
handle == nullptr ||
|
||||
size == 0 ||
|
||||
offset != 0 ||
|
||||
flags != 0) {
|
||||
if (ptr == nullptr || handle == nullptr || size == 0 || offset != 0 || flags != 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Re-interpret the ga handle and set the mapped flag
|
||||
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(handle);
|
||||
ga->retain();
|
||||
|
||||
auto& queue = *g_devices[ga->GetProperties().location.id]->NullStream();
|
||||
|
||||
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, &ga->asAmdMemory());
|
||||
// Map the physical address to virtual address
|
||||
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
|
||||
&ga->asAmdMemory());
|
||||
cmd->enqueue();
|
||||
cmd->awaitCompletion();
|
||||
cmd->release();
|
||||
@@ -220,11 +240,13 @@ hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count
|
||||
hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) {
|
||||
HIP_INIT_API(hipMemRelease, handle);
|
||||
|
||||
if (handle == nullptr) HIP_RETURN(hipErrorInvalidValue);
|
||||
if (handle == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Re-interpret the ga handle and make sure it is not already released.
|
||||
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(handle);
|
||||
|
||||
delete ga;
|
||||
ga->release();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
@@ -232,7 +254,9 @@ hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) {
|
||||
hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr) {
|
||||
HIP_INIT_API(hipMemRetainAllocationHandle, handle, addr);
|
||||
|
||||
if (handle == nullptr || addr == nullptr) HIP_RETURN(hipErrorInvalidValue);
|
||||
if (handle == nullptr || addr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Memory* mem = amd::MemObjMap::FindMemObj(addr);
|
||||
|
||||
@@ -252,10 +276,18 @@ hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle,
|
||||
hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count) {
|
||||
HIP_INIT_API(hipMemSetAccess, ptr, size, desc, count);
|
||||
|
||||
if (ptr == nullptr ||
|
||||
size == 0 ||
|
||||
desc == nullptr ||
|
||||
count == 0) {
|
||||
if (ptr == nullptr || size == 0 || desc == nullptr || count == 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (desc->location.id >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidValue)
|
||||
}
|
||||
|
||||
auto& dev = g_devices[desc->location.id];
|
||||
amd::Device::VmmAccess access_flags = static_cast<amd::Device::VmmAccess>(desc->flags);
|
||||
|
||||
if (!dev->devices()[0]->SetMemAccess(ptr, size, access_flags, count)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -265,13 +297,15 @@ hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc,
|
||||
hipError_t hipMemUnmap(void* ptr, size_t size) {
|
||||
HIP_INIT_API(hipMemUnmap, ptr, size);
|
||||
|
||||
if (ptr == nullptr) HIP_RETURN(hipErrorInvalidValue);
|
||||
if (ptr == nullptr || size == 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Memory* va = amd::MemObjMap::FindMemObj(ptr);
|
||||
|
||||
auto& queue = *g_devices[va->getUserData().deviceId]->NullStream();
|
||||
|
||||
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, nullptr);
|
||||
amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size,
|
||||
nullptr);
|
||||
cmd->enqueue();
|
||||
cmd->awaitCompletion();
|
||||
cmd->release();
|
||||
@@ -280,6 +314,7 @@ hipError_t hipMemUnmap(void* ptr, size_t size) {
|
||||
hip::GenericAllocation* ga = reinterpret_cast<hip::GenericAllocation*>(va->getUserData().data);
|
||||
va->setSvmPtr(ga->genericAddress());
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
ga->release();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
@@ -24,6 +24,8 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
#include "platform/object.hpp"
|
||||
|
||||
hipError_t ihipFree(void* ptr);
|
||||
|
||||
namespace hip {
|
||||
@@ -36,22 +38,29 @@ struct MemMapAllocUserData {
|
||||
MemMapAllocUserData(void* ptr, size_t size, amd::Memory* va) : ptr_(ptr), size_(size), va_(va) {}
|
||||
};
|
||||
|
||||
class GenericAllocation {
|
||||
void* ptr_;
|
||||
size_t size_;
|
||||
hipMemAllocationProp properties_;
|
||||
class GenericAllocation : public amd::RuntimeObject {
|
||||
void* ptr_; //<! Device ptr
|
||||
size_t size_; //<! Allocated size
|
||||
hipMemAllocationProp properties_; //<! Allocation Properties
|
||||
|
||||
public:
|
||||
GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop): ptr_(ptr), size_(size), properties_(prop) {}
|
||||
~GenericAllocation() { hipError_t err = ihipFree(ptr_); }
|
||||
GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop)
|
||||
: ptr_(ptr), size_(size), properties_(prop) {}
|
||||
~GenericAllocation() {
|
||||
hipError_t err = ihipFree(ptr_);
|
||||
}
|
||||
|
||||
const hipMemAllocationProp& GetProperties() const { return properties_; }
|
||||
hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() { return reinterpret_cast<hipMemGenericAllocationHandle_t>(this); }
|
||||
hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() {
|
||||
return reinterpret_cast<hipMemGenericAllocationHandle_t>(this);
|
||||
}
|
||||
amd::Memory& asAmdMemory() {
|
||||
size_t discardOffset;
|
||||
return *getMemoryObject(genericAddress(), discardOffset);
|
||||
}
|
||||
void* genericAddress() const { return ptr_; }
|
||||
|
||||
virtual ObjectType objectType() const { return ObjectTypeVMMAlloc; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -1584,6 +1584,14 @@ class Device : public RuntimeObject {
|
||||
kCacheStateSystem = 2
|
||||
} CacheState;
|
||||
|
||||
//<! Enum describing the access permissions of Virtual memory
|
||||
enum class VmmAccess {
|
||||
kNone = 0x0,
|
||||
kReadOnly = 0x1,
|
||||
kWriteOnly = 0x2,
|
||||
kReadWrite = 0x3
|
||||
};
|
||||
|
||||
typedef std::pair<LinkAttribute, int32_t /* value */> LinkAttrType;
|
||||
|
||||
static constexpr size_t kP2PStagingSize = 4 * Mi;
|
||||
@@ -1784,6 +1792,25 @@ class Device : public RuntimeObject {
|
||||
*/
|
||||
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) = 0;
|
||||
|
||||
/**
|
||||
* Set Access permisions for a virtual memory object.
|
||||
*
|
||||
* @param va_addr Virtual Address ptr
|
||||
* @param va_size Virtual Address Size
|
||||
* @param access_flags Access permissions
|
||||
* @param count Number of access permissions
|
||||
*/
|
||||
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
|
||||
size_t count) = 0;
|
||||
|
||||
/**
|
||||
* Get Access permisions for a virtual memory object.
|
||||
*
|
||||
* @param va_addr Virtual Address ptr
|
||||
* @param access_flags_ptr Access permissions to be filled
|
||||
*/
|
||||
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) = 0;
|
||||
|
||||
/**
|
||||
* Free a VA range
|
||||
*
|
||||
@@ -1966,6 +1993,7 @@ class Device : public RuntimeObject {
|
||||
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset, size_t size = 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const = 0;
|
||||
|
||||
@@ -2385,7 +2385,11 @@ void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!mem->create(nullptr, false)) {
|
||||
constexpr bool kSysMemAlloc = false;
|
||||
constexpr bool kSkipAlloc = false;
|
||||
constexpr bool kForceAlloc = true;
|
||||
// Force the alloc now for VA_Range reservation.
|
||||
if (!mem->create(nullptr, kSysMemAlloc, kSkipAlloc, kForceAlloc)) {
|
||||
LogError("failed to create a va range mem object");
|
||||
mem->release();
|
||||
return nullptr;
|
||||
|
||||
@@ -147,6 +147,14 @@ class NullDevice : public amd::Device {
|
||||
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) { return nullptr; };
|
||||
virtual void virtualFree(void* addr) { };
|
||||
|
||||
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool importExtSemaphore(void** extSemaphore,const amd::Os::FileDesc& handle,
|
||||
amd::ExternalSemaphoreHandleType sem_handle_type) override {
|
||||
return false;
|
||||
@@ -535,6 +543,14 @@ class Device : public NullDevice {
|
||||
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
|
||||
virtual void virtualFree(void* addr);
|
||||
|
||||
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//! Returns SRD manger object
|
||||
SrdManager& srds() const { return *srdManager_; }
|
||||
|
||||
|
||||
@@ -64,6 +64,20 @@
|
||||
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
|
||||
#define OPENCL_C_VERSION_STR XSTR(OPENCL_C_MAJOR) "." XSTR(OPENCL_C_MINOR)
|
||||
|
||||
|
||||
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kNone)
|
||||
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_NONE),
|
||||
"Vmm Access Flag None mismatch with ROC-runtime!");
|
||||
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kReadOnly)
|
||||
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_RO),
|
||||
"Vmm Access Flag Read mismatch with ROCr-runtime!");
|
||||
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kWriteOnly)
|
||||
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_WO),
|
||||
"Vmm Access Flag Write mismatch with ROC-runtime!");
|
||||
static_assert(static_cast<uint32_t>(amd::Device::VmmAccess::kReadWrite)
|
||||
== static_cast<uint32_t>(HSA_ACCESS_PERMISSION_RW),
|
||||
"Vmm Access Flag Read Write mismatch with ROC-runtime!");
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace {
|
||||
@@ -935,6 +949,14 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
|
||||
} else {
|
||||
dev->info_.largeBar_ = ROC_ENABLE_LARGE_BAR;
|
||||
}
|
||||
|
||||
// Query the recommended granularity for this pool.
|
||||
stat = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
&(dev->info_.virtualMemAllocGranularity_));
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE info"
|
||||
"failed with hsa_status: %d \n", stat);
|
||||
}
|
||||
}
|
||||
|
||||
if (dev->gpuvm_segment_.handle == 0) {
|
||||
@@ -1720,7 +1742,17 @@ bool Device::populateOCLDeviceConstants() {
|
||||
maxSdmaReadMask_, maxSdmaWriteMask_);
|
||||
|
||||
info_.globalCUMask_ = {};
|
||||
|
||||
// Virtual memory Management Support, if set to true then the HW and SW Stack supports VMM.
|
||||
info_.virtualMemoryManagement_ = false;
|
||||
if (HIP_VMEM_MANAGE_SUPPORT) {
|
||||
if (HSA_STATUS_SUCCESS != hsa_system_get_info(
|
||||
static_cast<hsa_system_info_t>(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED),
|
||||
&info_.virtualMemoryManagement_)) {
|
||||
LogError("HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED query failed ");
|
||||
}
|
||||
}
|
||||
|
||||
switch (isa().versionMajor()) {
|
||||
case (11):
|
||||
if (isa().versionMinor() == 0) {
|
||||
@@ -2212,6 +2244,19 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t Device::deviceVmemAlloc(size_t size, uint64_t flags) const {
|
||||
hsa_amd_vmem_alloc_handle_t hsa_vmem_handle {};
|
||||
|
||||
// We only allow pinned memory at this time.
|
||||
hsa_status_t hsa_status = hsa_amd_vmem_handle_create(gpuvm_segment_, size, MEMORY_TYPE_PINNED,
|
||||
flags, &hsa_vmem_handle);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_handle_create! Failed with hsa status: %d \n", hsa_status);
|
||||
}
|
||||
|
||||
return hsa_vmem_handle.handle;
|
||||
}
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
|
||||
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
|
||||
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
@@ -2311,13 +2356,83 @@ void* Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_
|
||||
return svmPtr;
|
||||
}
|
||||
|
||||
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment)
|
||||
{
|
||||
return nullptr;
|
||||
void* Device::virtualAlloc(void* req_addr, size_t size, size_t alignment) {
|
||||
void* vptr = nullptr;
|
||||
// Reserves the address using HSA APIs, with requested address.
|
||||
// There is no guarantee that we will get the requested address.
|
||||
hsa_status_t hsa_status = hsa_amd_vmem_address_reserve(&vptr, size,
|
||||
reinterpret_cast<uint64_t>(req_addr), 0);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_address_reserve. Failed with status: %d \n", hsa_status);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// This mem->create() does not create an actual memory but stores the memory info with given vptr.
|
||||
auto mem = new (context()) amd::Buffer(context(), CL_MEM_VA_RANGE_AMD, size, vptr);
|
||||
if (mem == nullptr) {
|
||||
LogError("failed to new a va range mem object!");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!mem->create(nullptr, false)) {
|
||||
LogError("failed to create a va range mem object");
|
||||
mem->release();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Assert to make sure that amd::Memory object has set the right ptr.
|
||||
guarantee(vptr == mem->getSvmPtr(), "amd::Memory object does not have the right ptr");
|
||||
|
||||
return mem->getSvmPtr();
|
||||
}
|
||||
|
||||
void Device::virtualFree(void* addr)
|
||||
{
|
||||
void Device::virtualFree(void* addr) {
|
||||
amd::Memory* memObj = amd::MemObjMap::FindVirtualMemObj(addr);
|
||||
if (memObj == nullptr) {
|
||||
LogPrintfError("Cannot find the Virtual MemObj entry for this addr 0x%x", addr);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_status = hsa_amd_vmem_address_free(memObj->getSvmPtr(), memObj->getSize());
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_address_free. Failed with status:%d \n", hsa_status);
|
||||
}
|
||||
}
|
||||
|
||||
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count) {
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
hsa_amd_memory_access_desc_t desc;
|
||||
desc.permissions = static_cast<hsa_access_permission_t>(access_flags);
|
||||
desc.agent_handle = getBackendDevice();
|
||||
|
||||
if ((hsa_status = hsa_amd_vmem_set_access(va_addr, va_size, &desc, count))
|
||||
!= HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_set_access. Failed with status:%d \n", hsa_status);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Device::GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) {
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
hsa_access_permission_t perms;
|
||||
|
||||
size_t discard_offset = 0;
|
||||
amd::Memory* va_mem_obj = amd::MemObjMap::FindMemObj(va_addr, &discard_offset);
|
||||
if (va_mem_obj == nullptr) {
|
||||
LogPrintfError("Failed to get Memory Object for va_addr: 0x%x", va_addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((hsa_status = hsa_amd_vmem_get_access(va_mem_obj->getSvmPtr(), &perms, getBackendDevice()))
|
||||
!= HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError("Failed hsa_amd_vmem_get_access. Failed with status:%d \n", hsa_status);
|
||||
return false;
|
||||
}
|
||||
|
||||
*access_flags_ptr = static_cast<VmmAccess>(perms);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -221,7 +221,7 @@ class NullDevice : public amd::Device {
|
||||
ShouldNotReachHere();
|
||||
return;
|
||||
}
|
||||
void* virtualAlloc(void* addr, size_t size, size_t alignment) override {
|
||||
void* virtualAlloc(void* req_addr, size_t size, size_t alignment) override {
|
||||
ShouldNotReachHere();
|
||||
return nullptr;
|
||||
}
|
||||
@@ -231,6 +231,17 @@ class NullDevice : public amd::Device {
|
||||
return;
|
||||
}
|
||||
|
||||
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count)
|
||||
override {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) override {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Determine if we can use device memory for SVM
|
||||
const bool forceFineGrain(amd::Memory* memory) const {
|
||||
return (memory->getContext().devices().size() > 1);
|
||||
@@ -439,7 +450,7 @@ class Device : public NullDevice {
|
||||
bool deviceAllowAccess(void* dst) const;
|
||||
|
||||
bool allowPeerAccess(device::Memory* memory) const;
|
||||
|
||||
uint64_t deviceVmemAlloc(size_t size, uint64_t flags) const;
|
||||
void* deviceLocalAlloc(size_t size, bool atomics = false, bool pseudo_fine_grain=false) const;
|
||||
|
||||
void memFree(void* ptr, size_t size) const;
|
||||
@@ -454,9 +465,12 @@ class Device : public NullDevice {
|
||||
virtual bool GetSvmAttributes(void** data, size_t* data_sizes, int* attributes,
|
||||
size_t num_attributes, const void* dev_ptr, size_t count) const;
|
||||
|
||||
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
|
||||
virtual void* virtualAlloc(void* req_addr, size_t size, size_t alignment);
|
||||
virtual void virtualFree(void* addr);
|
||||
|
||||
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags, size_t count);
|
||||
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr);
|
||||
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
|
||||
|
||||
|
||||
@@ -748,9 +748,19 @@ bool Buffer::create(bool alloc_local) {
|
||||
owner()->setSvmPtr(orig_dev_ptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Allocate backing storage in device local memory unless UHP or AHP are set
|
||||
cl_mem_flags memFlags = owner()->getMemFlags();
|
||||
|
||||
if (memFlags & ROCCLR_MEM_PHYMEM) {
|
||||
// If this is physical memory request, then get an handle and store it in user data
|
||||
owner()->getUserData().hsa_handle = dev().deviceVmemAlloc(owner()->getSize(), 0);
|
||||
if (owner()->getUserData().hsa_handle == 0) {
|
||||
LogError("HSA Opaque Handle returned was null");
|
||||
}
|
||||
}
|
||||
|
||||
if ((owner()->parent() == nullptr) &&
|
||||
(owner()->getSvmPtr() != nullptr)) {
|
||||
if (dev().forceFineGrain(owner()) || dev().isFineGrainedSystem(true)) {
|
||||
|
||||
@@ -2539,6 +2539,51 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
|
||||
profilingEnd(cmd);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
profilingBegin(vcmd);
|
||||
|
||||
// Find the amd::Memory object for virtual ptr.
|
||||
amd::Memory* va = amd::MemObjMap::FindVirtualMemObj(vcmd.ptr());
|
||||
if (va == nullptr || !(va->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
|
||||
profilingEnd(vcmd);
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the amd::Memory object for the physical address
|
||||
amd::Memory* pa = vcmd.memory();
|
||||
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
|
||||
|
||||
// If Physical address is not set, then it is map command. If set, it is unmap command.
|
||||
if (pa != nullptr) {
|
||||
// Map the physical to virtual address the hsa api
|
||||
hsa_amd_vmem_alloc_handle_t opaque_hsa_handle;
|
||||
opaque_hsa_handle.handle = pa->getUserData().hsa_handle;
|
||||
if ((hsa_status = hsa_amd_vmem_map(va->getSvmPtr(), va->getSize(), va->getOffset(),
|
||||
opaque_hsa_handle, 0)) == HSA_STATUS_SUCCESS) {
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) == nullptr);
|
||||
// Now that we have mapped physical addr to virtual addr, make an entry in the MemObjMap.
|
||||
amd::MemObjMap::AddMemObj(vcmd.ptr(), vcmd.memory());
|
||||
} else {
|
||||
LogError("HSA Command: hsa_amd_vmem_map failed!");
|
||||
}
|
||||
} else {
|
||||
// Unmap the object, since the physical addr is set.
|
||||
if ((hsa_status = hsa_amd_vmem_unmap(va->getSvmPtr(), va->getSize())) == HSA_STATUS_SUCCESS) {
|
||||
// assert the va is mapped and needs to be removed
|
||||
assert(amd::MemObjMap::FindMemObj(vcmd.ptr()) != nullptr);
|
||||
amd::MemObjMap::RemoveMemObj(vcmd.ptr());
|
||||
} else {
|
||||
LogError("HSA Command: hsa_amd_vmem_unmap failed");
|
||||
}
|
||||
}
|
||||
|
||||
profilingEnd(vcmd);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
|
||||
@@ -336,6 +336,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
void flush(amd::Command* list = nullptr, bool wait = false);
|
||||
void submitFillMemory(amd::FillMemoryCommand& cmd);
|
||||
void submitStreamOperation(amd::StreamOperationCommand& cmd);
|
||||
void submitVirtualMap(amd::VirtualMapCommand& cmd);
|
||||
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
|
||||
|
||||
void submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd);
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#define CL_MEM_VA_RANGE_AMD (1u << 28)
|
||||
#define ROCCLR_MEM_HSA_UNCACHED (1u << 27)
|
||||
#define ROCCLR_MEM_INTERPROCESS (1u << 26)
|
||||
#define ROCCLR_MEM_PHYMEM (1u << 25)
|
||||
|
||||
namespace device {
|
||||
class Memory;
|
||||
@@ -148,6 +149,7 @@ class Memory : public amd::RuntimeObject {
|
||||
{
|
||||
int deviceId = 0; //!< Device ID memory is allocated on
|
||||
void* data = nullptr; //!< Opaque user data from CL or HIP or etc.
|
||||
uint64_t hsa_handle = 0; //!<Opaque hsa handle saved for Virtual memories
|
||||
unsigned int flags = 0; //!< HIP memory flags
|
||||
//! hipMallocPitch allocates buffer using width & height and returns pitch & device pointer.
|
||||
//! Since device pointer is void*, It looses the values of width & height used for allocation.
|
||||
|
||||
@@ -142,6 +142,7 @@ class RuntimeObject : public ReferenceCountedObject, public ICDDispatchedObject
|
||||
ObjectTypeQueue = 8,
|
||||
ObjectTypeSampler = 9,
|
||||
ObjectTypeThreadTrace = 10,
|
||||
ObjectTypeVMMAlloc = 11
|
||||
};
|
||||
|
||||
virtual ObjectType objectType() const = 0;
|
||||
|
||||
@@ -239,6 +239,8 @@ release(cstring, HIPRTC_COMPILE_OPTIONS_APPEND, "", \
|
||||
"Set compile options needed for hiprtc compilation") \
|
||||
release(cstring, HIPRTC_LINK_OPTIONS_APPEND, "", \
|
||||
"Set link options needed for hiprtc compilation") \
|
||||
release(bool, HIP_VMEM_MANAGE_SUPPORT, false, \
|
||||
"Virtual Memory Management Support") \
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user