diff --git a/rocclr/runtime/device/rocm/pro/lnxheaders.h b/rocclr/runtime/device/rocm/pro/lnxheaders.h index 9929d40c04..a5d5d23325 100644 --- a/rocclr/runtime/device/rocm/pro/lnxheaders.h +++ b/rocclr/runtime/device/rocm/pro/lnxheaders.h @@ -22,6 +22,7 @@ extern "C" #include #include #include +#include constexpr int32_t InvalidFd = -1; // value representing a invalid file descriptor for Linux diff --git a/rocclr/runtime/device/rocm/pro/prodevice.cpp b/rocclr/runtime/device/rocm/pro/prodevice.cpp index d00ee6415d..c45b318e39 100644 --- a/rocclr/runtime/device/rocm/pro/prodevice.cpp +++ b/rocclr/runtime/device/rocm/pro/prodevice.cpp @@ -14,8 +14,17 @@ namespace roc { constexpr uint32_t kMaxDevices = 32; constexpr uint32_t kAtiVendorId = 0x1002; +void* ProDevice::lib_drm_handle_ = nullptr; +bool ProDevice::initialized_ = false; +drm::Funcs ProDevice::funcs_; + IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func) { + // Make sure DRM lib is initialized + if (!ProDevice::DrmInit()) { + return nullptr; + } + ProDevice* pro_device = new ProDevice(); if (pro_device == nullptr || !pro_device->Create(bus, dev, func)) { @@ -29,13 +38,58 @@ ProDevice::~ProDevice() { delete alloc_ops_; if (dev_handle_ != nullptr) { - amdgpu_device_deinitialize(dev_handle_); + Funcs().AmdgpuDeviceDeinitialize(dev_handle_); } if (file_desc_ > 0) { close(file_desc_); } } +bool ProDevice::DrmInit() +{ + if (initialized_ == false) { + // Find symbols in libdrm_amdgpu.so.1 + lib_drm_handle_ = dlopen("libdrm_amdgpu.so.1", RTLD_NOW); + if (lib_drm_handle_ == nullptr) { + return false; + } else { + funcs_.DrmGetDevices = reinterpret_cast(dlsym( + lib_drm_handle_, + "drmGetDevices")); + funcs_.AmdgpuDeviceInitialize = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_device_initialize")); + funcs_.AmdgpuDeviceDeinitialize = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_device_deinitialize")); + funcs_.AmdgpuQueryGpuInfo = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_query_gpu_info")); + funcs_.AmdgpuQueryInfo = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_query_info")); + funcs_.AmdgpuBoAlloc = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_bo_alloc")); + funcs_.AmdgpuBoExport = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_bo_export")); + funcs_.AmdgpuBoFree = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_bo_free")); + funcs_.AmdgpuBoCpuMap = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_bo_cpu_map")); + funcs_.AmdgpuBoCpuUnmap = reinterpret_cast(dlsym( + lib_drm_handle_, + "amdgpu_bo_cpu_unmap")); + } + } + + initialized_ = true; + return true; +} + #ifndef AMDGPU_CAPABILITY_SSG_FLAG #define AMDGPU_CAPABILITY_SSG_FLAG 4 #endif @@ -44,8 +98,8 @@ ProDevice::~ProDevice() { // Open drm device and initialize it. And also get the drm information. bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) { drmDevicePtr devices[kMaxDevices] = { }; - int32_t device_count = drmGetDevices(devices, kMaxDevices); - bool result = false; + int32_t device_count = Funcs().DrmGetDevices(devices, kMaxDevices); + bool result = false; for (int32_t i = 0; i < device_count; i++) { // Check if the device vendor is AMD @@ -54,7 +108,7 @@ bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) { } if ((devices[i]->businfo.pci->bus == bus) && (devices[i]->businfo.pci->dev == device) && - (devices[i]->businfo.pci->func == func)) { + (devices[i]->businfo.pci->func == func)) { // pDevices[i]->nodes[DRM_NODE_PRIMARY]; // Using render node here so that we can do the off-screen rendering without authentication @@ -64,14 +118,14 @@ bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) { void* data, *file, *cap; // Initialize the admgpu device. - if (amdgpu_device_initialize(file_desc_, &major_ver_, - &minor_ver_, &dev_handle_) == 0) { + if (Funcs().AmdgpuDeviceInitialize(file_desc_, &major_ver_, + &minor_ver_, &dev_handle_) == 0) { uint32_t version = 0; // amdgpu_query_gpu_info will never fail only if it is initialized - amdgpu_query_gpu_info(dev_handle_, &gpu_info_); + Funcs().AmdgpuQueryGpuInfo(dev_handle_, &gpu_info_); drm_amdgpu_capability cap = {}; - amdgpu_query_info(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap); + Funcs().AmdgpuQueryInfo(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap); // Check if DGMA and SSG are available if ((cap.flag & (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) == @@ -107,34 +161,34 @@ void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) req.preferred_heap = AMDGPU_GEM_DOMAIN_DGMA; // Allocate buffer in DGMA heap - if (0 == amdgpu_bo_alloc(dev_handle_, &req, &buf_handle)) { + if (0 == Funcs().AmdgpuBoAlloc(dev_handle_, &req, &buf_handle)) { amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; uint32_t shared_handle = 0; // Find the base driver handle - if (0 == amdgpu_bo_export(buf_handle, type, &shared_handle)) { + if (0 == Funcs().AmdgpuBoExport(buf_handle, type, &shared_handle)) { uint32_t flags = 0; size_t buf_size = 0; // Map memory object to HSA device if (0 == hsa_amd_interop_map_buffer(1, &agent, shared_handle, flags, &buf_size, &ptr, nullptr, nullptr)) { // Ask GPUPro driver to provide CPU access to allocation - if (0 == amdgpu_bo_cpu_map(buf_handle, host_ptr)) { + if (0 == Funcs().AmdgpuBoCpuMap(buf_handle, host_ptr)) { allocs_.insert(std::pair>( ptr, std::pair(buf_handle, shared_handle))); } else { hsa_amd_interop_unmap_buffer(ptr); close(shared_handle); - amdgpu_bo_free(buf_handle); + Funcs().AmdgpuBoFree(buf_handle); } } else { close(shared_handle); - amdgpu_bo_free(buf_handle); + Funcs().AmdgpuBoFree(buf_handle); } } else { - amdgpu_bo_free(buf_handle); + Funcs().AmdgpuBoFree(buf_handle); } } @@ -146,12 +200,12 @@ void ProDevice::FreeDmaBuffer(void* ptr) const amd::ScopedLock l(alloc_ops_); auto it = allocs_.find(ptr); if (it != allocs_.end()) { - amdgpu_bo_cpu_unmap(it->second.first); + Funcs().AmdgpuBoCpuUnmap(it->second.first); // Unmap memory from HSA device hsa_amd_interop_unmap_buffer(ptr); // Close shared handle close(it->second.second); - int error = amdgpu_bo_free(it->second.first); + int error = Funcs().AmdgpuBoFree(it->second.first); allocs_.erase(it); } } diff --git a/rocclr/runtime/device/rocm/pro/prodevice.hpp b/rocclr/runtime/device/rocm/pro/prodevice.hpp index 280d34c0d2..8e0b3370ce 100644 --- a/rocclr/runtime/device/rocm/pro/prodevice.hpp +++ b/rocclr/runtime/device/rocm/pro/prodevice.hpp @@ -6,6 +6,7 @@ #ifndef WITHOUT_HSA_BACKEND +#include "profuncs.hpp" #include "prodriver.hpp" #include "thread/monitor.hpp" #include @@ -19,6 +20,8 @@ namespace roc { class ProDevice : public IProDevice { public: + static bool DrmInit(); + ProDevice() : file_desc_(0) , major_ver_(0) @@ -34,6 +37,11 @@ public: virtual void FreeDmaBuffer(void* ptr) const override; private: + static void* lib_drm_handle_; + static bool initialized_; + static drm::Funcs funcs_; + const drm::Funcs& Funcs() const { return funcs_; } + int32_t file_desc_; //!< File descriptor for the device uint32_t major_ver_; //!< Major driver version uint32_t minor_ver_; //!< Minor driver version diff --git a/rocclr/runtime/device/rocm/pro/profuncs.hpp b/rocclr/runtime/device/rocm/pro/profuncs.hpp new file mode 100644 index 0000000000..9fb436f455 --- /dev/null +++ b/rocclr/runtime/device/rocm/pro/profuncs.hpp @@ -0,0 +1,68 @@ +// +// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. +// +#pragma once + +namespace roc +{ +namespace drm +{ +typedef int (*DrmGetDevices)( + drmDevicePtr* pDevices, + int maxDevices); + +typedef int (*AmdgpuDeviceInitialize)( + int fd, + uint32_t* pMajorVersion, + uint32_t* pMinorVersion, + amdgpu_device_handle* pDeviceHandle); + +typedef int (*AmdgpuDeviceDeinitialize)( + amdgpu_device_handle hDevice); + +typedef int (*AmdgpuQueryGpuInfo)( + amdgpu_device_handle hDevice, + struct amdgpu_gpu_info* pInfo); + +typedef int (*AmdgpuQueryInfo)( + amdgpu_device_handle hDevice, + unsigned infoId, + unsigned size, + void* pValue); + +typedef int (*AmdgpuBoAlloc)( + amdgpu_device_handle hDevice, + struct amdgpu_bo_alloc_request* pAllocBuffer, + amdgpu_bo_handle* pBufferHandle); + +typedef int (*AmdgpuBoExport)( + amdgpu_bo_handle hBuffer, + enum amdgpu_bo_handle_type type, + uint32_t* pFd); + +typedef int (*AmdgpuBoFree)( + amdgpu_bo_handle hBuffer); + +typedef int (*AmdgpuBoCpuMap)( + amdgpu_bo_handle hBuffer, + void** ppCpuAddress); + +typedef int (*AmdgpuBoCpuUnmap)( + amdgpu_bo_handle hBuffer); + +struct Funcs +{ + DrmGetDevices DrmGetDevices; + AmdgpuDeviceInitialize AmdgpuDeviceInitialize; + AmdgpuDeviceDeinitialize AmdgpuDeviceDeinitialize; + AmdgpuQueryGpuInfo AmdgpuQueryGpuInfo; + AmdgpuQueryInfo AmdgpuQueryInfo; + AmdgpuBoAlloc AmdgpuBoAlloc; + AmdgpuBoExport AmdgpuBoExport; + AmdgpuBoFree AmdgpuBoFree; + AmdgpuBoCpuMap AmdgpuBoCpuMap; + AmdgpuBoCpuUnmap AmdgpuBoCpuUnmap; +}; + +} //namespace drm +} //namespace roc