rocr: Fix Windows build and Ctz implementation (#1634)

このコミットが含まれているのは:
German Andryeyev
2025-11-03 12:07:11 -05:00
committed by GitHub
コミット ee1158b7b8
2個のファイルの変更19行の追加2行の削除
+17
ファイルの表示
@@ -377,6 +377,7 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
//track the exporter BO to clear meta data via set_metadata
//clear the set metadata here if possible if theres an existing ldrm_bo
if (it->second.ldrm_bo) {
#if defined(__linux__)
struct amdgpu_bo_info info = {0};
auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info);
@@ -384,6 +385,9 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
amdgpu_bo_metadata zero_metadata = {0};
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
#else
assert(!"Unimplemented!");
#endif
}
allocation_map_.erase(it);
@@ -1402,6 +1406,7 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
if (err != HSA_STATUS_SUCCESS) return err;
if (agent->device_type() == Agent::kAmdGpuDevice) {
#if defined(__linux__)
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
amdgpu_bo_import_result res;
@@ -1426,6 +1431,9 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
allocation_map_[ptr].ldrm_bo = res.buf_handle;
}
#else
assert(!"Unimplemented!");
#endif
}
close(dmabuf_fd);
@@ -1587,6 +1595,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if (ipc_dmabuf_supported_ && !isSysMem) {
#if defined(__linux__)
// use the bo from the allocation map
// Only check metadata for GPU memory
struct amdgpu_bo_info info = {0};
@@ -1597,6 +1606,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
#else
assert(!"Unimplemented!");
#endif
}
return HSA_STATUS_SUCCESS;
};
@@ -3777,6 +3789,7 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
#if defined(__linux__)
if (munmap(va, size) != 0) return HSA_STATUS_ERROR;
/* We need to keep the CPU mapping. So change it to PROT_NONE */
@@ -3787,6 +3800,10 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
return HSA_STATUS_ERROR;
permissions = HSA_ACCESS_PERMISSION_NONE;
#else
assert(!"Unimplemented!");
#endif
}
} else {
return targetAgent->driver().Unmap(
+2 -2
ファイルの表示
@@ -470,8 +470,8 @@ int Ffs(int i) {
int Ctz(uint64_t i) {
unsigned long index;
if (_BitScanReverse64(&index, i)) {
return sizeof(i) * 8 - 1 - index;
if (_BitScanForward64(&index, i)) {
return index;
} else {
return sizeof(i) * 8;
}