rocr: Improve memory protection and WSL compatibility (#2274)

* rocr: Add ProtectMemory API and use it in RemoveAccess
Replace munmap + mmap with mprotect when removing memory access.
This improves performance by 5-10x, ensures atomicity (no race
condition window), and prepares for WSL/DXG compatibility fixes.

Suggested-by: David Yat Sin <David.YatSin@amd.com>
Signed-off-by: Flora Cui <flora.cui@amd.com>
Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>

* rocr: Skip CPU mapping operations on WSL
On WSL, CPU cannot access GPU VRAM due to platform restrictions.
CPU access would fault-in system RAM instead, causing data corruption
and memory leaks. Return HSA_STATUS_ERROR to fail fast rather than
silently creating broken mappings. GPU-to-GPU mappings remain functional.

Signed-off-by: Flora Cui <flora.cui@amd.com>
Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>

* rocr: reduce ifdef linux
v2: Fix IsDXG check logic

Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>

---------
Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Signed-off-by: Flora Cui <flora.cui@amd.com>
This commit is contained in:
hongkzha-amd
2026-01-14 02:08:20 +08:00
zatwierdzone przez GitHub
rodzic 2e8c863341
commit b3c4e94e70
4 zmienionych plików z 31 dodań i 14 usunięć
@@ -3810,11 +3810,13 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() {
hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) {
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
return HSA_STATUS_ERROR;
}
#if defined(__linux__)
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return HSA_STATUS_ERROR;
#endif
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
return HSA_STATUS_ERROR;
}
} else {
hsa_status_t status = targetAgent->driver().Map(
@@ -3829,12 +3831,11 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
#if defined(__linux__)
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return HSA_STATUS_ERROR;
#endif
hsa_access_permission_t perms = HSA_ACCESS_PERMISSION_NONE;
if (!rocr::os::UnmapMemory(va, size)) {
return HSA_STATUS_ERROR;
}
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
if (!rocr::os::ProtectMemory(va, size, PermissionsToMemProt(perms))) {
return HSA_STATUS_ERROR;
}
permissions = perms;
@@ -3855,17 +3856,19 @@ Runtime::MappedHandle::MappedHandle(MemoryHandle *mem_handle, AddressHandle *add
{
/* Create a CPU mapping with PROT_NONE */
#if defined(__linux__)
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return;
#endif
auto cpu_agent = static_cast<AMD::GpuAgent*>(agentOwner())->GetNearestCpuAgent();
auto agentPermsIt = allowed_agents.emplace(std::piecewise_construct,
std::forward_as_tuple(cpu_agent),
std::forward_as_tuple(this, cpu_agent, va,
size, HSA_ACCESS_PERMISSION_NONE))
std::forward_as_tuple(cpu_agent),
std::forward_as_tuple(this, cpu_agent, va,
size, HSA_ACCESS_PERMISSION_NONE))
.first;
auto ret = agentPermsIt->second.EnableAccess(HSA_ACCESS_PERMISSION_NONE);
if (ret != HSA_STATUS_SUCCESS)
throw AMD::hsa_exception(ret, "Failed to create default CPU mapping");
#endif
}
// Note: VMemorySetAccessPerHandle should be called with &memory_lock_ held
@@ -930,6 +930,10 @@ bool UncommitMemory(void* addr, size_t size) {
0) != MAP_FAILED;
}
bool ProtectMemory(void* va, size_t size, MemProt perms) {
return ::mprotect(va, size, MemProtToOsProt(perms)) == 0;
}
uint64_t HostTotalPhysicalMemory() {
static uint64_t totalPhys = 0;
@@ -355,6 +355,8 @@ bool UncommitMemory(void* addr, size_t size);
bool UnmapMemory(void* addr, size_t size);
bool MapMemory(void* addr, size_t size, MemProt prot, int fd, uint64_t cpu_addr);
bool ProtectMemory(void* va, size_t size, MemProt perms);
uint64_t HostTotalPhysicalMemory();
/// Find First Set for any OS
@@ -470,6 +470,14 @@ bool MapMemory(void* addr, size_t size, MemProt perms, int fd [[maybe_unused]],
return VirtualProtect(addr, size, memProtToOsProt(perms), &OldProtect) != 0;
}
bool ProtectMemory(void* va, size_t size, MemProt perms) {
if (perms == MEM_PROT_NONE) {
return UncommitMemory(addr, size);
}
DWORD oldProt;
return VirtualProtect(va, size, memProtToOsProt(perms), &oldProt) != 0;
}
int Ffs(int i) {
int res = 0;
unsigned long index;