rocr: Improve memory protection and WSL compatibility (#2274)
* rocr: Add ProtectMemory API and use it in RemoveAccess Replace munmap + mmap with mprotect when removing memory access. This improves performance by 5-10x, ensures atomicity (no race condition window), and prepares for WSL/DXG compatibility fixes. Suggested-by: David Yat Sin <David.YatSin@amd.com> Signed-off-by: Flora Cui <flora.cui@amd.com> Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com> * rocr: Skip CPU mapping operations on WSL On WSL, CPU cannot access GPU VRAM due to platform restrictions. CPU access would fault-in system RAM instead, causing data corruption and memory leaks. Return HSA_STATUS_ERROR to fail fast rather than silently creating broken mappings. GPU-to-GPU mappings remain functional. Signed-off-by: Flora Cui <flora.cui@amd.com> Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com> * rocr: reduce ifdef linux v2: Fix IsDXG check logic Signed-off-by: David Yat Sin <David.YatSin@amd.com> Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com> --------- Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com> Signed-off-by: David Yat Sin <David.YatSin@amd.com> Signed-off-by: Flora Cui <flora.cui@amd.com>
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
2e8c863341
commit
b3c4e94e70
@@ -3810,11 +3810,13 @@ Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() {
|
||||
|
||||
hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) {
|
||||
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
|
||||
if (!core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) {
|
||||
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
|
||||
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
#if defined(__linux__)
|
||||
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return HSA_STATUS_ERROR;
|
||||
#endif
|
||||
|
||||
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
|
||||
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
} else {
|
||||
hsa_status_t status = targetAgent->driver().Map(
|
||||
@@ -3829,12 +3831,11 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
|
||||
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
|
||||
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
|
||||
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
|
||||
#if defined(__linux__)
|
||||
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return HSA_STATUS_ERROR;
|
||||
#endif
|
||||
hsa_access_permission_t perms = HSA_ACCESS_PERMISSION_NONE;
|
||||
if (!rocr::os::UnmapMemory(va, size)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
if (!rocr::os::MapMemory(va, size, PermissionsToMemProt(perms), mappedHandle->drm_fd,
|
||||
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr))) {
|
||||
if (!rocr::os::ProtectMemory(va, size, PermissionsToMemProt(perms))) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
permissions = perms;
|
||||
@@ -3855,17 +3856,19 @@ Runtime::MappedHandle::MappedHandle(MemoryHandle *mem_handle, AddressHandle *add
|
||||
{
|
||||
/* Create a CPU mapping with PROT_NONE */
|
||||
#if defined(__linux__)
|
||||
if (core::Runtime::runtime_singleton_->thunkLoader()->IsDXG()) return;
|
||||
#endif
|
||||
|
||||
auto cpu_agent = static_cast<AMD::GpuAgent*>(agentOwner())->GetNearestCpuAgent();
|
||||
auto agentPermsIt = allowed_agents.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(cpu_agent),
|
||||
std::forward_as_tuple(this, cpu_agent, va,
|
||||
size, HSA_ACCESS_PERMISSION_NONE))
|
||||
std::forward_as_tuple(cpu_agent),
|
||||
std::forward_as_tuple(this, cpu_agent, va,
|
||||
size, HSA_ACCESS_PERMISSION_NONE))
|
||||
.first;
|
||||
|
||||
auto ret = agentPermsIt->second.EnableAccess(HSA_ACCESS_PERMISSION_NONE);
|
||||
if (ret != HSA_STATUS_SUCCESS)
|
||||
throw AMD::hsa_exception(ret, "Failed to create default CPU mapping");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Note: VMemorySetAccessPerHandle should be called with &memory_lock_ held
|
||||
|
||||
@@ -930,6 +930,10 @@ bool UncommitMemory(void* addr, size_t size) {
|
||||
0) != MAP_FAILED;
|
||||
}
|
||||
|
||||
bool ProtectMemory(void* va, size_t size, MemProt perms) {
|
||||
return ::mprotect(va, size, MemProtToOsProt(perms)) == 0;
|
||||
}
|
||||
|
||||
uint64_t HostTotalPhysicalMemory() {
|
||||
static uint64_t totalPhys = 0;
|
||||
|
||||
|
||||
@@ -355,6 +355,8 @@ bool UncommitMemory(void* addr, size_t size);
|
||||
bool UnmapMemory(void* addr, size_t size);
|
||||
bool MapMemory(void* addr, size_t size, MemProt prot, int fd, uint64_t cpu_addr);
|
||||
|
||||
bool ProtectMemory(void* va, size_t size, MemProt perms);
|
||||
|
||||
uint64_t HostTotalPhysicalMemory();
|
||||
|
||||
/// Find First Set for any OS
|
||||
|
||||
@@ -470,6 +470,14 @@ bool MapMemory(void* addr, size_t size, MemProt perms, int fd [[maybe_unused]],
|
||||
return VirtualProtect(addr, size, memProtToOsProt(perms), &OldProtect) != 0;
|
||||
}
|
||||
|
||||
bool ProtectMemory(void* va, size_t size, MemProt perms) {
|
||||
if (perms == MEM_PROT_NONE) {
|
||||
return UncommitMemory(addr, size);
|
||||
}
|
||||
DWORD oldProt;
|
||||
return VirtualProtect(va, size, memProtToOsProt(perms), &oldProt) != 0;
|
||||
}
|
||||
|
||||
int Ffs(int i) {
|
||||
int res = 0;
|
||||
unsigned long index;
|
||||
|
||||
Reference in New Issue
Block a user