SWDEV-547108 - Fix compilation errors under Windows (#867)

Interop and numa are not enabled.
Esse commit está contido em:
Andryeyev, German
2025-08-17 02:33:31 -04:00
commit de GitHub
commit 0ac913e64c
14 arquivos alterados com 110 adições e 80 exclusões
+5 -1
Ver Arquivo
@@ -45,7 +45,11 @@ void init(bool* status) {
#if DISABLE_DIRECT_DISPATCH
constexpr bool kDirectDispatch = false;
#else
constexpr bool kDirectDispatch = IS_LINUX;
#ifndef WITHOUT_HSA_BACKEND
constexpr bool kDirectDispatch = true;
#else
constexpr bool kDirectDispatch = false;
#endif
#endif
AMD_DIRECT_DISPATCH = flagIsDefault(AMD_DIRECT_DISPATCH) ? kDirectDispatch : AMD_DIRECT_DISPATCH;
if (!amd::Runtime::init()) {
+3 -1
Ver Arquivo
@@ -239,7 +239,7 @@ struct mesa_glinterop_export_out {
/* Structure version 1 ends here. */
};
#if IS_LINUX
/**
* Query device information.
*
@@ -301,6 +301,8 @@ typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext con
struct mesa_glinterop_export_in *in,
struct mesa_glinterop_export_out *out);
#endif // IS_LINUX
#ifdef __cplusplus
}
#endif
+1 -1
Ver Arquivo
@@ -2750,7 +2750,7 @@ bool KernelBlitManager::runScheduler(uint64_t vqVM,
sp->child_queue = reinterpret_cast<uint64_t>(schedulerQueue);
sp->complete_signal = gpu().Barriers().ActiveSignal(kInitSignalValueOne, nullptr);
sp->vqueue_header = vqVM;
sp->parentAQL = reinterpret_cast<uint64_t>(aql_wrap);
sp->parentAQL = aql_wrap;
if (dev().info().maxEngineClockFrequency_ > 0) {
sp->eng_clk = (1000 * 1024) / dev().info().maxEngineClockFrequency_;
+7 -16
Ver Arquivo
@@ -56,7 +56,6 @@
#include <iostream>
#include <iomanip>
#include <memory>
#include <sys/resource.h>
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
#include <numa.h>
#include <numaif.h>
@@ -1017,7 +1016,7 @@ bool Sampler::create(const amd::Sampler& owner) {
return false;
}
hwSrd_ = reinterpret_cast<uint64_t>(hsa_sampler.handle);
hwSrd_ = hsa_sampler.handle;
hwState_ = reinterpret_cast<address>(hsa_sampler.handle);
return true;
@@ -1272,8 +1271,7 @@ bool Device::populateOCLDeviceConstants() {
assert(alloc_granularity_ > 0);
} else {
// We suppose half of physical memory can be used by GPU in APU system
info_.globalMemSize_ =
uint64_t(sysconf(_SC_PAGESIZE)) * uint64_t(sysconf(_SC_PHYS_PAGES)) / 2;
info_.globalMemSize_ = amd::Os::hostTotalPhysicalMemory() / 2;
info_.globalMemSize_ = std::max(info_.globalMemSize_, uint64_t(1 * Gi));
info_.globalMemSize_ = (static_cast<uint64_t>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
static_cast<uint64_t>(info_.globalMemSize_)) / 100u;
@@ -3161,19 +3159,16 @@ void Device::releaseQueue(hsa_queue_t* queue, const std::vector<uint32_t>& cuMas
void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue,
const std::vector<uint32_t>& cuMask) {
decltype(queuePool_)::value_type::iterator qIter;
bool found = false;
if (!coop_queue) {
for (auto &it : cuMask.size() == 0 ? queuePool_ : queueWithCUMaskPool_) {
qIter = it.find(queue);
if (qIter != it.end()) {
found = true;
break;
}
}
if (cuMask.size() == 0) {
assert(qIter != queuePool_[QueuePriority::High].end());
} else {
assert(qIter != queueWithCUMaskPool_[QueuePriority::High].end());
}
assert(found && "Couldn't find queue");
if (qIter->second.hostcallBuffer_) {
return qIter->second.hostcallBuffer_;
@@ -3408,9 +3403,7 @@ hsa_status_t Device::BackendErrorCallBackHandler(const hsa_amd_event_t* event, v
}
// Execute the default handler if a GPU core file should be generated ...
struct rlimit rlimit;
if ((getrlimit(RLIMIT_CORE, &rlimit) == 0 && rlimit.rlim_cur != 0) ||
!HIP_SKIP_ABORT_ON_GPU_ERROR) {
if (amd::Os::DumpCoreFile() || !HIP_SKIP_ABORT_ON_GPU_ERROR) {
return HSA_STATUS_ERROR;
}
@@ -3656,9 +3649,7 @@ void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) {
errorMsg, status);
}
struct rlimit rlimit;
if ((getrlimit(RLIMIT_CORE, &rlimit) == 0 && rlimit.rlim_cur != 0) ||
!HIP_SKIP_ABORT_ON_GPU_ERROR) {
if (amd::Os::DumpCoreFile() || !HIP_SKIP_ABORT_ON_GPU_ERROR) {
abort();
}
amd::Device::gpu_error_ = ConvertHSAErrorIntoCLError(status);
+6
Ver Arquivo
@@ -26,8 +26,14 @@
#include <GL/glx.h>
#include <EGL/egl.h>
#else
#include <windows.h>
#include <GL/gl.h>
#include <GL/glext.h>
#include <EGL/egl.h>
#ifndef GLX_H
struct _XDisplay;
struct __GLXcontextRec;
#endif
typedef _XDisplay Display;
typedef __GLXcontextRec* GLXContext;
#endif
+1 -5
Ver Arquivo
@@ -21,7 +21,6 @@
#pragma once
#include <memory>
#include <cxxabi.h>
#include "rocprogram.hpp"
#include "top.hpp"
#include "rocprintf.hpp"
@@ -61,10 +60,7 @@ class Kernel : public device::Kernel {
private:
void initDemangledName() {
if (demangled_name_.empty()) {
int status = 0;
char* demangled = abi::__cxa_demangle(name().c_str(), nullptr, nullptr, &status);
demangled_name_ = (status == 0 && demangled != nullptr) ? demangled : name().c_str();
free(demangled);
amd::Os::CxaDemangle(name(), &demangled_name_);
}
}
+9 -4
Ver Arquivo
@@ -204,11 +204,17 @@ void Memory::cpuUnmap(device::VirtualDevice& vDev) {
}
// ================================================================================================
hsa_status_t Memory::interopMapBuffer(int fd) {
hsa_status_t Memory::interopMapBuffer(amd::Os::FileDesc fdn) {
hsa_agent_t agent = dev().getBackendDevice();
size_t size;
size_t metadata_size = 0;
void* metadata;
#if IS_WINDOWS
int fd = 0;
assert(!"Unimplemented");
#else
auto fd = fdn;
#endif
hsa_status_t status = hsa_amd_interop_map_buffer(
1, &agent, fd, 0, &size, &interop_deviceMemory_,
&metadata_size, (const void**)&metadata);
@@ -232,7 +238,7 @@ hsa_status_t Memory::interopMapBuffer(int fd) {
// Setup an interop buffer (dmabuf handle) as an OpenCL buffer
// ================================================================================================
bool Memory::createInteropBuffer(GLenum targetType, int miplevel) {
#if defined(_WIN32)
#if IS_WINDOWS
return false;
#else
assert(owner()->isInterop() && "Object is not an interop object.");
@@ -851,8 +857,7 @@ bool Buffer::create(bool alloc_local) {
return false;
}
deviceMemory_ = const_cast<long int*>(signalValuePtr); // conversion to void * is
// implicit
deviceMemory_ = const_cast<void*>(reinterpret_cast<volatile void*>(signalValuePtr));
// Disable host access to force blit path for memeory writes.
flags_ &= ~HostMemoryDirectAccess;
+1 -1
Ver Arquivo
@@ -128,7 +128,7 @@ class Memory : public device::Memory {
// Free / deregister device memory.
virtual void destroy() = 0;
hsa_status_t interopMapBuffer(int fd);
hsa_status_t interopMapBuffer(amd::Os::FileDesc fdn);
// Place interop object into HSA's flat address space
bool createInteropBuffer(GLenum targetType, int miplevel);
-11
Ver Arquivo
@@ -23,17 +23,6 @@
/*! \addtogroup GPU GPU Device Implementation
* @{
*/
#ifndef isinf
#ifdef _MSC_VER
#define isinf(X) (!_finite(X) && !_isnan(X))
#endif //_MSC_VER
#endif // isinf
#ifndef isnan
#ifdef _MSC_VER
#define isnan(X) (_isnan(X))
#endif //_MSC_VER
#endif // isnan
#ifndef copysign
#ifdef _MSC_VER
+22 -8
Ver Arquivo
@@ -44,6 +44,8 @@
#include <string>
#include <thread>
#include <vector>
#include <atomic>
#include <cinttypes>
#if defined(__AVX__)
#if defined(__MINGW64__)
@@ -925,7 +927,12 @@ uint64_t VirtualGPU::getQueueID() {
// ================================================================================================
static inline void packet_store_release(uint32_t* packet, uint16_t header, uint16_t rest) {
__atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE);
#if IS_WINDOWS
std::atomic_ref<uint32_t> atomic_header(*packet);
atomic_header.store(header | (rest << 16), std::memory_order_release);
#else
__atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE);
#endif
}
// ================================================================================================
@@ -968,12 +975,12 @@ void VirtualGPU::AnalyzeAqlQueue() const {
} else {
printf("VGPU(%p) Queue(%p). Couldn't find kernel\n", this, gpu_queue_);
}
printf("VGPU=%p SWq=%p, HWq=%p, id=%ld\n\tDispatch Header = "
printf("VGPU=%p SWq=%p, HWq=%p, id=%" PRIu64 "\n\tDispatch Header ="
"0x%x (type=%d, barrier=%d, acquire=%d, release=%d), "
"setup=%d\n\tgrid=[%u, %u, %u], workgroup=[%u, %u, %u]\n\tprivate_seg_size=%u, "
"group_seg_size=%u\n\tkernel_obj=0x%lx, "
"kernarg_address=0x%p\n\tcompletion_signal=0x%lx, "
"correlation_id=%lu\n\trptr=%lu, wptr=%lu\n",
"group_seg_size=%u\n\tkernel_obj=0x%" PRIx64 ", "
"kernarg_address=0x%p\n\tcompletion_signal=0x%" PRIx64 ", "
"correlation_id=%" PRIu64 "\n\trptr=%" PRIu64 ", wptr=%" PRIu64 "\n ",
this, gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, header,
extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE),
extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER),
@@ -987,8 +994,8 @@ void VirtualGPU::AnalyzeAqlQueue() const {
packet.kernarg_address, packet.completion_signal.handle, packet.reserved2,
read, index);
} else {
printf("VGPU(%p) Queue(%p) rptr=%lu, wptr=%lu. A barrier packet in the queue!\n",
this, gpu_queue_, read, index);
printf("VGPU(%p) Queue(%p) rptr=%" PRIu64 ", wptr=%" PRIu64
". A barrier packet in the queue!\n", this, gpu_queue_, read, index);
}
} else {
printf("VGPU(%p) Queue(%p) is idle\n", this, gpu_queue_);
@@ -1261,7 +1268,7 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal,
hsa_barrier_and_packet_t* aql_loc =
&(reinterpret_cast<hsa_barrier_and_packet_t*>(gpu_queue_->base_address))[index & queueMask];
*aql_loc = barrier_packet_;
__atomic_store_n(reinterpret_cast<uint32_t*>(aql_loc), packetHeader, __ATOMIC_RELEASE);
packet_store_release(reinterpret_cast<uint32_t*>(aql_loc), packetHeader, 0);
hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index);
ClPrint(amd::LOG_DEBUG, amd::LOG_AQL,
@@ -3224,6 +3231,7 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize)
}
// ================================================================================================
#if IS_LINUX
__attribute__((optimize("unroll-all-loops"), always_inline))
static inline void nontemporalMemcpy(
void* __restrict dst, const void* __restrict src, size_t size) {
@@ -3271,6 +3279,12 @@ static inline void nontemporalMemcpy(
std::memcpy(dst, src, size);
#endif
}
#else
static inline void nontemporalMemcpy(void* __restrict dst, const void* __restrict src,
size_t size) {
std::memcpy(dst, src, size);
}
#endif
void VirtualGPU::HiddenHeapInit() { const_cast<Device&>(dev()).HiddenHeapInit(*this); }
+6 -5
Ver Arquivo
@@ -37,16 +37,17 @@
namespace amd {
// ================================================================================================
bool Os::isValidFileDesc(const amd::Os::FileDesc& desc) {
#if defined(_WIN32)
return reinterpret_cast<int>(desc) > 0;
#if IS_WINDOWS
return desc != nullptr;
#else
return static_cast<int>(desc) > 0;
return desc > 0;
#endif
return false;
}
// ================================================================================================
void* Os::loadLibrary(const char* libraryname) {
void* handle;
@@ -57,7 +58,7 @@ void* Os::loadLibrary(const char* libraryname) {
namestart = (namestart != std::string::npos) ? namestart + 1 : 0;
if (namestart == 0) {
#if defined(ATI_OS_WIN)
#if IS_WINDOWS
// Try with the path of the current loaded dll(OCL runtime) first
HMODULE hm = NULL;
if (!GetModuleHandleExA(
+7 -1
Ver Arquivo
@@ -332,8 +332,14 @@ class Os : AllStatic {
//! Return the current process id
static int getProcessId();
// Prints the location of the currently loaded library (shared object or DLL)
//! Prints the location of the currently loaded library (shared object or DLL)
static void PrintLibraryLocation();
//! Checks if a core dump must be generated (rocgdb detection). Returns false in Windows
static bool DumpCoreFile();
//! Demangle a C++ name. The function will return the same name if couldn't demangle
static void CxaDemangle(const std::string& name, std::string* demangle);
};
/*@}*/
+35 -26
Ver Arquivo
@@ -39,8 +39,10 @@
#include <pthread.h>
#include <dlfcn.h>
#include <signal.h>
#include <cxxabi.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <link.h>
#include <time.h>
@@ -238,7 +240,8 @@ address Os::reserveMemory(address start, size_t size, size_t alignment, MemProt
if (size >= kLargePageSize) {
int status = madvise(aligned, size, MADV_HUGEPAGE);
if (status) {
ClPrint(amd::LOG_DEBUG, amd::LOG_CODE, "madvise with advice MADV_HUGEPAGE"
ClPrint(amd::LOG_DEBUG, amd::LOG_CODE,
"madvise with advice MADV_HUGEPAGE"
" starting at address %p and page size 0x%zx, returned %d, errno: %s",
aligned, size, status, strerror(errno));
}
@@ -335,7 +338,7 @@ void Os::setPreferredNumaNode(uint32_t node) {
numa_free_cpumask(bm);
}
#endif //ROCCLR_SUPPORT_NUMA_POLICY
#endif // ROCCLR_SUPPORT_NUMA_POLICY
}
void* Thread::entry(Thread* thread) {
@@ -736,8 +739,7 @@ void Os::getAppPathAndFileName(std::string& appName, std::string& appPathAndName
// Get filename without path and extension.
appName = std::string(basename(buff.get()));
appPathAndName = std::string(buff.get());
}
else {
} else {
appName = "";
appPathAndName = "";
}
@@ -747,9 +749,8 @@ void Os::getAppPathAndFileName(std::string& appName, std::string& appPathAndName
bool Os::GetURIFromMemory(const void* image, size_t image_size, std::string& uri) {
pid_t pid = getpid();
std::ostringstream uri_stream;
//Create a unique resource indicator to the memory address
uri_stream << "memory://" << pid
<< "#offset=0x" << std::hex << (uintptr_t)image << std::dec
// Create a unique resource indicator to the memory address
uri_stream << "memory://" << pid << "#offset=0x" << std::hex << (uintptr_t)image << std::dec
<< "&size=" << image_size;
uri = uri_stream.str();
return true;
@@ -757,7 +758,7 @@ bool Os::GetURIFromMemory(const void* image, size_t image_size, std::string& uri
bool Os::CloseFileHandle(FileDesc fdesc) {
// Return false if close system call fails
if(close(fdesc) < 0) {
if (close(fdesc) < 0) {
return false;
}
@@ -776,7 +777,7 @@ bool Os::GetFileHandle(const char* fname, FileDesc* fd_ptr, size_t* sz_ptr) {
return false;
}
//Retrieve stat info and size
// Retrieve stat info and size
if (fstat(*fd_ptr, &stat_buf) != 0) {
close(*fd_ptr);
return false;
@@ -789,7 +790,6 @@ bool Os::GetFileHandle(const char* fname, FileDesc* fd_ptr, size_t* sz_ptr) {
bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr,
size_t* foffset_ptr) {
// Get the list of mapped file list
bool ret_value = false;
std::ifstream proc_maps;
@@ -804,9 +804,7 @@ bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr,
char dash;
std::stringstream tokens(line);
uintptr_t low_address, high_address;
tokens >> std::hex >> low_address >> std::dec
>> dash
>> std::hex >> high_address >> std::dec;
tokens >> std::hex >> low_address >> std::dec >> dash >> std::hex >> high_address >> std::dec;
if (dash != '-') {
continue;
}
@@ -818,10 +816,7 @@ bool amd::Os::FindFileNameFromAddress(const void* image, std::string* fname_ptr,
std::string permissions, device, uri_file_path;
size_t offset;
uint64_t inode;
tokens >> permissions
>> std::hex >> offset >> std::dec
>> device
>> inode;
tokens >> permissions >> std::hex >> offset >> std::dec >> device >> inode;
std::getline(tokens >> std::ws, uri_file_path);
if (inode == 0 || uri_file_path.empty()) {
@@ -870,7 +865,7 @@ bool Os::MemoryMapFile(const char* fname, const void** mmap_ptr, size_t* mmap_si
struct stat stat_buf;
int fd = open(fname, O_RDONLY);
if (fd < 0 ) {
if (fd < 0) {
return false;
}
@@ -897,15 +892,15 @@ bool Os::MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t
}
struct stat stat_buf;
int fd = shm_open(fname, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
if (fd < 0 ) {
int fd = shm_open(fname, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
if (fd < 0) {
return false;
}
if (ftruncate(fd, mmap_size) != 0) {
return false;
}
*mmap_ptr = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
*mmap_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
@@ -916,13 +911,11 @@ bool Os::MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t
return true;
}
int Os::getProcessId() {
return ::getpid();
}
int Os::getProcessId() { return ::getpid(); }
// ================================================================================================
void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) {
*desc = shm_open(fname, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
*desc = shm_open(fname, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
if (*desc < 0) {
return nullptr;
}
@@ -940,7 +933,7 @@ void* Os::CreateIpcMemory(const char* fname, size_t size, FileDesc* desc) {
void* Os::OpenIpcMemory(const char* fname, const FileDesc desc, size_t size) {
FileDesc handle = desc;
if (fname != nullptr) {
handle = shm_open(fname, O_RDWR, S_IRWXU|S_IRWXG|S_IRWXO);
handle = shm_open(fname, O_RDWR, S_IRWXU | S_IRWXG | S_IRWXO);
}
if (handle < 0) {
@@ -961,6 +954,7 @@ void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) {
}
}
// ================================================================================================
void Os::PrintLibraryLocation() {
Dl_info dl_info;
if (dladdr(reinterpret_cast<void*>(Os::loadLibrary), &dl_info) && dl_info.dli_fname) {
@@ -970,6 +964,21 @@ void Os::PrintLibraryLocation() {
}
}
// ================================================================================================
bool Os::DumpCoreFile() {
// Execute the default handler if a GPU core file should be generated ...
struct rlimit rlimit;
return (getrlimit(RLIMIT_CORE, &rlimit) == 0 && rlimit.rlim_cur != 0);
}
// ================================================================================================
void Os::CxaDemangle(const std::string& name, std::string* result) {
int status = 0;
char* demangled = abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status);
*result = (status == 0 && demangled != nullptr) ? demangled : name;
free(demangled);
}
} // namespace amd
#endif // !defined(_WIN32) && !defined(__CYGWIN__)
+7
Ver Arquivo
@@ -735,6 +735,7 @@ void Os::CloseIpcMemory(const FileDesc desc, const void* ptr, size_t size) {
}
}
// ================================================================================================
void Os::PrintLibraryLocation() {
HMODULE hm = NULL;
if (GetModuleHandleExA(
@@ -749,6 +750,12 @@ void Os::PrintLibraryLocation() {
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HIP Library Path: <unknown>");
}
// ================================================================================================
bool Os::DumpCoreFile() { return false; }
// ================================================================================================
void Os::CxaDemangle(const std::string& name, std::string* result) { *result = name; }
} // namespace amd
#endif // _WIN32 || __CYGWIN__