rocr: Fix Windows build and Ctz implementation (#1634)
このコミットが含まれているのは:
@@ -377,6 +377,7 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
|
||||
//track the exporter BO to clear meta data via set_metadata
|
||||
//clear the set metadata here if possible if theres an existing ldrm_bo
|
||||
if (it->second.ldrm_bo) {
|
||||
#if defined(__linux__)
|
||||
struct amdgpu_bo_info info = {0};
|
||||
auto err = amdgpu_bo_query_info(it->second.ldrm_bo, &info);
|
||||
|
||||
@@ -384,6 +385,9 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
|
||||
amdgpu_bo_metadata zero_metadata = {0};
|
||||
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
|
||||
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
|
||||
#else
|
||||
assert(!"Unimplemented!");
|
||||
#endif
|
||||
}
|
||||
|
||||
allocation_map_.erase(it);
|
||||
@@ -1402,6 +1406,7 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
if (agent->device_type() == Agent::kAmdGpuDevice) {
|
||||
#if defined(__linux__)
|
||||
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
|
||||
amdgpu_bo_import_result res;
|
||||
|
||||
@@ -1426,6 +1431,9 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
|
||||
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
|
||||
allocation_map_[ptr].ldrm_bo = res.buf_handle;
|
||||
}
|
||||
#else
|
||||
assert(!"Unimplemented!");
|
||||
#endif
|
||||
}
|
||||
|
||||
close(dmabuf_fd);
|
||||
@@ -1587,6 +1595,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
|
||||
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
if (ipc_dmabuf_supported_ && !isSysMem) {
|
||||
#if defined(__linux__)
|
||||
// use the bo from the allocation map
|
||||
// Only check metadata for GPU memory
|
||||
struct amdgpu_bo_info info = {0};
|
||||
@@ -1597,6 +1606,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
|
||||
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
#else
|
||||
assert(!"Unimplemented!");
|
||||
#endif
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
@@ -3777,6 +3789,7 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
|
||||
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
|
||||
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
|
||||
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
|
||||
#if defined(__linux__)
|
||||
if (munmap(va, size) != 0) return HSA_STATUS_ERROR;
|
||||
|
||||
/* We need to keep the CPU mapping. So change it to PROT_NONE */
|
||||
@@ -3787,6 +3800,10 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
permissions = HSA_ACCESS_PERMISSION_NONE;
|
||||
#else
|
||||
assert(!"Unimplemented!");
|
||||
#endif
|
||||
|
||||
}
|
||||
} else {
|
||||
return targetAgent->driver().Unmap(
|
||||
|
||||
@@ -470,8 +470,8 @@ int Ffs(int i) {
|
||||
|
||||
int Ctz(uint64_t i) {
|
||||
unsigned long index;
|
||||
if (_BitScanReverse64(&index, i)) {
|
||||
return sizeof(i) * 8 - 1 - index;
|
||||
if (_BitScanForward64(&index, i)) {
|
||||
return index;
|
||||
} else {
|
||||
return sizeof(i) * 8;
|
||||
}
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする