SWDEV-546146 - Added support for hipMemLocationTypeHost in hipMemSetAccess (#682)

Этот коммит содержится в:
Ioannis Assiouras
2025-09-10 23:06:20 +01:00
коммит произвёл GitHub
родитель dd1a2dbf8a
Коммит 35629e433d
11 изменённых файлов: 197 добавлений и 37 удалений
+48 -22
Просмотреть файл
@@ -87,7 +87,8 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
// Currently we do not support Pinned memory
if (handle == nullptr || size == 0 || flags != 0 || prop == nullptr ||
(prop->type != hipMemAllocationTypePinned && prop->type != hipMemAllocationTypeUncached) ||
prop->location.type != hipMemLocationTypeDevice) {
(prop->location.type != hipMemLocationTypeDevice &&
prop->location.type != hipMemLocationTypeHost)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -106,8 +107,15 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
ihipFlags |= CL_MEM_SVM_ATOMICS | ROCCLR_MEM_HSA_UNCACHED;
}
// Device info validation
const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info();
bool useHostDevice = (prop->location.type == hipMemLocationTypeHost);
amd::Context* curDevContext = hip::getCurrentDevice()->asContext();
amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext;
if (amdContext == nullptr) {
return hipErrorOutOfMemory;
}
const auto& dev_info = amdContext->devices()[0]->info();
if (dev_info.maxPhysicalMemAllocSize_ < size) {
HIP_RETURN(hipErrorOutOfMemory);
@@ -116,10 +124,8 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
HIP_RETURN(hipErrorInvalidValue);
}
amd::Context* amdContext = g_devices[prop->location.id]->asContext();
void* ptr = amd::SvmBuffer::malloc(*amdContext, ihipFlags, size,
dev_info.memBaseAddrAlign_, nullptr);
void* ptr = amd::SvmBuffer::malloc(*amdContext, ihipFlags, size, dev_info.memBaseAddrAlign_,
useHostDevice ? curDevContext->svmDevices()[0] : nullptr);
// Handle out of memory cases,
if (ptr == nullptr) {
@@ -139,6 +145,7 @@ hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size,
amd::Memory* phys_mem_obj = getMemoryObject(ptr, offset);
// saves the current device id so that it can be accessed later
phys_mem_obj->getUserData().deviceId = prop->location.id;
phys_mem_obj->getUserData().locationType = prop->location.type;
phys_mem_obj->getUserData().data = new hip::GenericAllocation(*phys_mem_obj, size, *prop);
*handle = reinterpret_cast<hipMemGenericAllocationHandle_t>(phys_mem_obj->getUserData().data);
@@ -203,17 +210,23 @@ hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* loca
hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop,
hipMemAllocationGranularity_flags option) {
HIP_INIT_API(hipMemGetAllocationGranularity, granularity, prop, option);
if (granularity == nullptr || prop == nullptr || (prop->type != hipMemAllocationTypePinned &&
prop->type != hipMemAllocationTypeUncached) ||
prop->location.type != hipMemLocationTypeDevice || prop->location.id >= g_devices.size() ||
(prop->location.type != hipMemLocationTypeDevice &&
prop->location.type != hipMemLocationTypeHost) ||
prop->location.id >= g_devices.size() ||
(option != hipMemAllocationGranularityMinimum &&
option != hipMemAllocationGranularityRecommended)) {
HIP_RETURN(hipErrorInvalidValue);
}
const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info();
bool useHostDevice = (prop->location.type == hipMemLocationTypeHost);
amd::Context* curDevContext = hip::getCurrentDevice()->asContext();
amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext;
const auto& dev_info = amdContext->devices()[0]->info();
*granularity = dev_info.virtualMemAllocGranularity_;
@@ -347,26 +360,38 @@ hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc,
// Ensure that the specified size parameter matches the total size of a complete set of
// sub-buffers, disallowing partial sub-buffer coverage
auto mem_object = amd::MemObjMap::FindMemObj(ptr);
if (mem_object && mem_object->parent()) {
size_t accumulated_buffer_size = 0;
for (auto sub_buffer : mem_object->parent()->subBuffers()) {
accumulated_buffer_size += sub_buffer->getSize();
if (accumulated_buffer_size > size) {
hipMemLocationType memLocationType = hipMemLocationTypeNone;
if (mem_object) {
memLocationType = static_cast<hipMemLocationType>(mem_object->getUserData().locationType);
if (mem_object->parent()) {
size_t accumulated_buffer_size = 0;
for (auto sub_buffer : mem_object->parent()->subBuffers()) {
accumulated_buffer_size += sub_buffer->getSize();
if (accumulated_buffer_size > size) {
HIP_RETURN(hipErrorInvalidValue);
} else if (accumulated_buffer_size == size) {
break;
}
}
if (accumulated_buffer_size != size) {
HIP_RETURN(hipErrorInvalidValue);
} else if (accumulated_buffer_size == size) {
break;
}
}
if (accumulated_buffer_size != size) {
HIP_RETURN(hipErrorInvalidValue);
}
} else {
HIP_RETURN(hipErrorInvalidValue);
}
for (size_t desc_idx = 0; desc_idx < count; ++desc_idx) {
if (desc[desc_idx].location.type != hipMemLocationTypeDevice) {
hipMemLocationType accessLocationType = desc[desc_idx].location.type;
if (accessLocationType != hipMemLocationTypeDevice && accessLocationType != hipMemLocationTypeHost) {
HIP_RETURN(hipErrorInvalidValue);
}
if (accessLocationType == hipMemLocationTypeHost &&
memLocationType != hipMemLocationTypeHost) {
HIP_RETURN(hipErrorInvalidValue)
}
if (desc[desc_idx].location.id >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue)
@@ -380,7 +405,8 @@ hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc,
HIP_RETURN(hipErrorInvalidValue);
}
if (!dev->devices()[0]->SetMemAccess(ptr, size, access_flags)) {
if (!dev->devices()[0]->SetMemAccess(ptr, size, access_flags,
static_cast<amd::Device::VmmLocationType>(accessLocationType))) {
HIP_RETURN(hipErrorInvalidValue);
}
}
+2 -1
Просмотреть файл
@@ -526,7 +526,7 @@ bool Device::ValidateVirtualAddressRange(amd::Memory* vaddr_base_obj, amd::Memor
//==================================================================================================
amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vptr, size_t size,
int deviceId, bool parent, bool kForceAlloc) {
int deviceId, int locationType, bool parent, bool kForceAlloc) {
amd::Memory* vaddr_base_obj = nullptr;
amd::Memory* vaddr_sub_obj = nullptr;
constexpr bool kSysMemAlloc = false;
@@ -572,6 +572,7 @@ amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vpt
}
vaddr_sub_obj->getUserData().deviceId = deviceId;
vaddr_sub_obj->getUserData().locationType = locationType;
if (!ValidateVirtualAddressRange(vaddr_base_obj, vaddr_sub_obj)) {
LogError("Validation failed on address range, returning nullptr");
+5 -2
Просмотреть файл
@@ -1658,6 +1658,8 @@ class Device : public RuntimeObject {
//<! Enum describing the access permissions of Virtual memory
enum class VmmAccess { kNone = 0x0, kReadOnly = 0x1, kReadWrite = 0x3 };
//<! Enum describing the location of Virtual memory
enum class VmmLocationType { kNone = 0x0, kDevice = 0x1, kHost = 0x2 };
typedef std::pair<LinkAttribute, int32_t /* value */> LinkAttrType;
@@ -1892,7 +1894,7 @@ class Device : public RuntimeObject {
* @param ForceAlloc force_alloc
*/
amd::Memory* CreateVirtualBuffer(Context& device_context, void* vptr, size_t size, int deviceId,
bool parent, bool kForceAlloc = false);
int locationType, bool parent, bool kForceAlloc = false);
/**
* Deletes Virtual Buffer and creates memob
@@ -1918,7 +1920,8 @@ class Device : public RuntimeObject {
* @param access_flags Access permissions
* @param count Number of access permissions
*/
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags) = 0;
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice) = 0;
/**
* Get Access permisions for a virtual memory object.
+3 -2
Просмотреть файл
@@ -2528,7 +2528,7 @@ void Device::svmFree(void* ptr) const {
void* Device::virtualAlloc(void* addr, size_t size, size_t alignment) {
constexpr bool kParent = true;
constexpr bool kForceAlloc = true;
amd::Memory* mem = CreateVirtualBuffer(context(), addr, size, -1, kParent, kForceAlloc);
amd::Memory* mem = CreateVirtualBuffer(context(), addr, size, -1, -1, kParent, kForceAlloc);
assert(mem != nullptr);
return mem->getSvmPtr();
}
@@ -2549,7 +2549,8 @@ bool Device::virtualFree(void* addr) {
}
// ================================================================================================
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags) {
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType access_location) {
amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(va_addr);
if (amd_mem_obj == nullptr) {
// If the amd_mem_obj is null, the check if this is a valid va_addr, but not-mapped,
+6 -2
Просмотреть файл
@@ -145,7 +145,10 @@ class NullDevice : public amd::Device {
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) { return nullptr; };
virtual bool virtualFree(void* addr) { return true; }
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags) { return true; }
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice) {
return true;
}
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) const { return true; }
@@ -555,7 +558,8 @@ class Device : public NullDevice {
virtual bool virtualFree(void* addr);
//! Set/Get memory access set by the app
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags);
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice);
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) const;
virtual bool ValidateMemAccess(amd::Memory& mem, bool read_write) const;
+1 -1
Просмотреть файл
@@ -2285,7 +2285,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
constexpr bool kParent = false;
vaddr_sub_obj = phys_mem_obj->getContext().devices()[0]->CreateVirtualBuffer(
phys_mem_obj->getContext(), const_cast<void*>(vcmd.ptr()), vcmd.size(),
phys_mem_obj->getUserData().deviceId, kParent);
phys_mem_obj->getUserData().deviceId, phys_mem_obj->getUserData().locationType, kParent);
// Calculate the offset from the original pointer.
vaddr_offset = (reinterpret_cast<address>(vaddr_sub_obj->getSvmPtr()) -
+5 -3
Просмотреть файл
@@ -2290,7 +2290,7 @@ void* Device::virtualAlloc(void* req_addr, size_t size, size_t alignment) {
}
constexpr bool kParent = true;
amd::Memory* mem = CreateVirtualBuffer(context(), vptr, size, -1, kParent);
amd::Memory* mem = CreateVirtualBuffer(context(), vptr, size, -1, -1, kParent);
if (mem == nullptr) {
LogPrintfError("Cannot create Virtual Buffer for vptr: %p of size: %u", vptr, size);
}
@@ -2316,11 +2316,13 @@ bool Device::virtualFree(void* addr) {
return true;
}
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags) {
bool Device::SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType access_location) {
hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
hsa_amd_memory_access_desc_t desc;
desc.permissions = static_cast<hsa_access_permission_t>(access_flags);
desc.agent_handle = getBackendDevice();
desc.agent_handle =
access_location == VmmLocationType::kDevice ? getBackendDevice() : getCpuAgent();
if ((hsa_status = hsa_amd_vmem_set_access(va_addr, va_size, &desc, 1)) != HSA_STATUS_SUCCESS) {
LogPrintfError("Failed hsa_amd_vmem_set_access. Failed with status:%d \n", hsa_status);
+4 -2
Просмотреть файл
@@ -230,7 +230,8 @@ class NullDevice : public amd::Device {
return true;
}
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags) override {
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice) override {
ShouldNotReachHere();
return false;
}
@@ -442,7 +443,8 @@ class Device : public NullDevice {
virtual void* virtualAlloc(void* req_addr, size_t size, size_t alignment);
virtual bool virtualFree(void* addr);
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags);
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice);
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) const;
virtual bool ValidateMemAccess(amd::Memory& mem, bool read_write) const { return true; }
+1 -1
Просмотреть файл
@@ -2948,7 +2948,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
constexpr bool kParent = false;
amd::Memory* vaddr_sub_obj = phys_mem_obj->getContext().devices()[0]->CreateVirtualBuffer(
phys_mem_obj->getContext(), const_cast<void*>(vcmd.ptr()), vcmd.size(),
phys_mem_obj->getUserData().deviceId, kParent);
phys_mem_obj->getUserData().deviceId, phys_mem_obj->getUserData().locationType, kParent);
// Map the physical to virtual address the hsa api
hsa_amd_vmem_alloc_handle_t opaque_hsa_handle;
opaque_hsa_handle.handle = phys_mem_obj->getUserData().hsa_handle;
+3 -1
Просмотреть файл
@@ -146,7 +146,9 @@ class Memory : public amd::RuntimeObject {
};
struct UserData {
int deviceId = 0; //!< Device ID memory is allocated on
int deviceId = 0; //!< Device ID memory is allocated on
int locationType =
0; //!< The type of the location (i.e. device or host) memory is allocated on
void* data = nullptr; //!< Opaque user data from CL or HIP or etc.
amd::Memory* phys_mem_obj = nullptr; //<! Physical mem obj, only set on virtual mem
amd::Memory* vaddr_mem_obj = nullptr; //<! Virtual address mem obj, only set on virtual mem
+119
Просмотреть файл
@@ -59,6 +59,12 @@ static __global__ void square_kernel(int* Buff) {
Buff[i] = temp;
}
// Simple HIP kernel: read from host-backed memory and write to a device buffer
__global__ void copyFromHostMem(const int* hostMem, int* devOut, int N) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < N) devOut[i] = hostMem[i];
}
/**
* Test Description
* ------------------------
@@ -1320,6 +1326,119 @@ TEST_CASE("Unit_hipMemSetAccess_negative") {
CTX_DESTROY();
}
TEST_CASE("Unit_hipMemSetAccessHostDevice_hostalloc") {
// Ensure device 0 is selected
REQUIRE(hipSetDevice(0) == hipSuccess);
// ---- Describe a HOST-backed allocation (NUMA-unaware) ----
hipMemAllocationProp prop{};
prop.type = hipMemAllocationTypePinned; // pinned system RAM
prop.location.type = hipMemLocationTypeHost; // generic host
prop.location.id = 0; // host id must be 0
prop.requestedHandleType = hipMemHandleTypeNone;
constexpr size_t N = 1024;
constexpr size_t bytes = N * sizeof(int);
// get minimum granularity
size_t gran = 0;
HIP_CHECK(hipMemGetAllocationGranularity(&gran, &prop, hipMemAllocationGranularityMinimum));
size_t mapSize = ((bytes + gran - 1) / gran) * gran;
// Create host-backed allocation handle
hipMemGenericAllocationHandle_t handle{};
HIP_CHECK(hipMemCreate(&handle, mapSize, &prop, 0 /*flags*/));
// Reserve VA and map
void* addr = nullptr;
HIP_CHECK(hipMemAddressReserve(&addr, mapSize, 0 /*align*/, 0 /*addr*/, 0 /*flags*/));
HIP_CHECK(hipMemMap(addr, mapSize, 0 /*offset*/, handle, 0 /*flags*/));
// Grant HOST access so the CPU can touch the VA range
hipMemAccessDesc accHost{};
accHost.flags = hipMemAccessFlagsProtReadWrite;
accHost.location.type = hipMemLocationTypeHost;
accHost.location.id = 0;
HIP_CHECK(hipMemSetAccess(addr, mapSize, &accHost, 1));
// Also grant DEVICE access so GPU can read/write it
hipMemAccessDesc accDev{};
accDev.flags = hipMemAccessFlagsProtReadWrite;
accDev.location.type = hipMemLocationTypeDevice;
accDev.location.id = 0;
HIP_CHECK(hipMemSetAccess(addr, mapSize, &accDev, 1));
// ---- CPU can now safely write to the mapping ----
int* hostPtr = reinterpret_cast<int*>(addr);
for (size_t i = 0; i < N; ++i) hostPtr[i] = static_cast<int>(i);
// Device output buffer
int* dOut = nullptr;
HIP_CHECK(hipMalloc(&dOut, bytes));
// Launch kernel to read host memory and write to device buffer
dim3 block(256), grid((N + block.x - 1) / block.x);
hipLaunchKernelGGL(copyFromHostMem, grid, block, 0, 0, reinterpret_cast<const int*>(addr), dOut,
static_cast<int>(N));
HIP_CHECK(hipDeviceSynchronize());
// Verify
std::vector<int> out(N, -1);
HIP_CHECK(hipMemcpy(out.data(), dOut, bytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; ++i) {
REQUIRE(out[i] == static_cast<int>(i));
}
// Cleanup
HIP_CHECK(hipFree(dOut));
HIP_CHECK(hipMemUnmap(addr, mapSize));
HIP_CHECK(hipMemAddressFree(addr, mapSize));
HIP_CHECK(hipMemRelease(handle));
}
TEST_CASE("Unit_hipMemSetAccessHost_devicealloc") {
// Ensure device 0 is selected
REQUIRE(hipSetDevice(0) == hipSuccess);
// ---- Describe a DEVICE-backed allocation
hipMemAllocationProp prop{};
prop.type = hipMemAllocationTypePinned; // pinned system RAM
prop.location.type = hipMemLocationTypeDevice; // generic host
prop.location.id = 0; // host id must be 0
prop.requestedHandleType = hipMemHandleTypeNone;
constexpr size_t N = 1024;
constexpr size_t bytes = N * sizeof(int);
//get minimum granularity
size_t gran = 0;
HIP_CHECK(hipMemGetAllocationGranularity(&gran, &prop, hipMemAllocationGranularityMinimum));
size_t mapSize = ((bytes + gran - 1) / gran) * gran;
// Create host-backed allocation handle
hipMemGenericAllocationHandle_t handle{};
HIP_CHECK(hipMemCreate(&handle, mapSize, &prop, 0 /*flags*/));
// Reserve VA and map
void* addr = nullptr;
HIP_CHECK(hipMemAddressReserve(&addr, mapSize, 0 /*align*/, 0 /*addr*/, 0 /*flags*/));
HIP_CHECK(hipMemMap(addr, mapSize, 0 /*offset*/, handle, 0 /*flags*/));
// Grant HOST access.
hipMemAccessDesc accHost{};
accHost.flags = hipMemAccessFlagsProtReadWrite;
accHost.location.type = hipMemLocationTypeHost;
accHost.location.id = 0;
HIP_CHECK_ERROR(hipMemSetAccess(addr, mapSize, &accHost, 1), hipErrorInvalidValue);
HIP_CHECK(hipMemUnmap(addr, mapSize));
HIP_CHECK(hipMemAddressFree(addr, mapSize));
HIP_CHECK(hipMemRelease(handle));
}
/**
* End doxygen group VirtualMemoryManagementTest.
* @}