From 7fe63d653c06072fc34760b3b9efabe5bea8df4f Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 20 Jun 2017 17:27:45 -0400
Subject: [PATCH] P4 to Git Change 1424893 by gandryey@gera-w8 on 2017/06/20
17:18:40
SWDEV-79445 - OCL generic changes and code clean-up
- Remove build dependency from libdrm and libdrm-amdgpu libs
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#157 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/lnxheaders.h#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/profuncs.hpp#1 add
---
rocclr/runtime/device/rocm/pro/lnxheaders.h | 1 +
rocclr/runtime/device/rocm/pro/prodevice.cpp | 86 ++++++++++++++++----
rocclr/runtime/device/rocm/pro/prodevice.hpp | 8 ++
rocclr/runtime/device/rocm/pro/profuncs.hpp | 68 ++++++++++++++++
4 files changed, 147 insertions(+), 16 deletions(-)
create mode 100644 rocclr/runtime/device/rocm/pro/profuncs.hpp
diff --git a/rocclr/runtime/device/rocm/pro/lnxheaders.h b/rocclr/runtime/device/rocm/pro/lnxheaders.h
index 9929d40c04..a5d5d23325 100644
--- a/rocclr/runtime/device/rocm/pro/lnxheaders.h
+++ b/rocclr/runtime/device/rocm/pro/lnxheaders.h
@@ -22,6 +22,7 @@ extern "C"
#include
#include
#include
+#include
constexpr int32_t InvalidFd = -1; // value representing a invalid file descriptor for Linux
diff --git a/rocclr/runtime/device/rocm/pro/prodevice.cpp b/rocclr/runtime/device/rocm/pro/prodevice.cpp
index d00ee6415d..c45b318e39 100644
--- a/rocclr/runtime/device/rocm/pro/prodevice.cpp
+++ b/rocclr/runtime/device/rocm/pro/prodevice.cpp
@@ -14,8 +14,17 @@ namespace roc {
constexpr uint32_t kMaxDevices = 32;
constexpr uint32_t kAtiVendorId = 0x1002;
+void* ProDevice::lib_drm_handle_ = nullptr;
+bool ProDevice::initialized_ = false;
+drm::Funcs ProDevice::funcs_;
+
IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func)
{
+ // Make sure DRM lib is initialized
+ if (!ProDevice::DrmInit()) {
+ return nullptr;
+ }
+
ProDevice* pro_device = new ProDevice();
if (pro_device == nullptr || !pro_device->Create(bus, dev, func)) {
@@ -29,13 +38,58 @@ ProDevice::~ProDevice() {
delete alloc_ops_;
if (dev_handle_ != nullptr) {
- amdgpu_device_deinitialize(dev_handle_);
+ Funcs().AmdgpuDeviceDeinitialize(dev_handle_);
}
if (file_desc_ > 0) {
close(file_desc_);
}
}
+bool ProDevice::DrmInit()
+{
+ if (initialized_ == false) {
+ // Find symbols in libdrm_amdgpu.so.1
+ lib_drm_handle_ = dlopen("libdrm_amdgpu.so.1", RTLD_NOW);
+ if (lib_drm_handle_ == nullptr) {
+ return false;
+ } else {
+ funcs_.DrmGetDevices = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "drmGetDevices"));
+ funcs_.AmdgpuDeviceInitialize = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_device_initialize"));
+ funcs_.AmdgpuDeviceDeinitialize = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_device_deinitialize"));
+ funcs_.AmdgpuQueryGpuInfo = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_query_gpu_info"));
+ funcs_.AmdgpuQueryInfo = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_query_info"));
+ funcs_.AmdgpuBoAlloc = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_bo_alloc"));
+ funcs_.AmdgpuBoExport = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_bo_export"));
+ funcs_.AmdgpuBoFree = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_bo_free"));
+ funcs_.AmdgpuBoCpuMap = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_bo_cpu_map"));
+ funcs_.AmdgpuBoCpuUnmap = reinterpret_cast(dlsym(
+ lib_drm_handle_,
+ "amdgpu_bo_cpu_unmap"));
+ }
+ }
+
+ initialized_ = true;
+ return true;
+}
+
#ifndef AMDGPU_CAPABILITY_SSG_FLAG
#define AMDGPU_CAPABILITY_SSG_FLAG 4
#endif
@@ -44,8 +98,8 @@ ProDevice::~ProDevice() {
// Open drm device and initialize it. And also get the drm information.
bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) {
drmDevicePtr devices[kMaxDevices] = { };
- int32_t device_count = drmGetDevices(devices, kMaxDevices);
- bool result = false;
+ int32_t device_count = Funcs().DrmGetDevices(devices, kMaxDevices);
+ bool result = false;
for (int32_t i = 0; i < device_count; i++) {
// Check if the device vendor is AMD
@@ -54,7 +108,7 @@ bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) {
}
if ((devices[i]->businfo.pci->bus == bus) &&
(devices[i]->businfo.pci->dev == device) &&
- (devices[i]->businfo.pci->func == func)) {
+ (devices[i]->businfo.pci->func == func)) {
// pDevices[i]->nodes[DRM_NODE_PRIMARY];
// Using render node here so that we can do the off-screen rendering without authentication
@@ -64,14 +118,14 @@ bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) {
void* data, *file, *cap;
// Initialize the admgpu device.
- if (amdgpu_device_initialize(file_desc_, &major_ver_,
- &minor_ver_, &dev_handle_) == 0) {
+ if (Funcs().AmdgpuDeviceInitialize(file_desc_, &major_ver_,
+ &minor_ver_, &dev_handle_) == 0) {
uint32_t version = 0;
// amdgpu_query_gpu_info will never fail only if it is initialized
- amdgpu_query_gpu_info(dev_handle_, &gpu_info_);
+ Funcs().AmdgpuQueryGpuInfo(dev_handle_, &gpu_info_);
drm_amdgpu_capability cap = {};
- amdgpu_query_info(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap);
+ Funcs().AmdgpuQueryInfo(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap);
// Check if DGMA and SSG are available
if ((cap.flag & (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) ==
@@ -107,34 +161,34 @@ void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr)
req.preferred_heap = AMDGPU_GEM_DOMAIN_DGMA;
// Allocate buffer in DGMA heap
- if (0 == amdgpu_bo_alloc(dev_handle_, &req, &buf_handle)) {
+ if (0 == Funcs().AmdgpuBoAlloc(dev_handle_, &req, &buf_handle)) {
amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
uint32_t shared_handle = 0;
// Find the base driver handle
- if (0 == amdgpu_bo_export(buf_handle, type, &shared_handle)) {
+ if (0 == Funcs().AmdgpuBoExport(buf_handle, type, &shared_handle)) {
uint32_t flags = 0;
size_t buf_size = 0;
// Map memory object to HSA device
if (0 == hsa_amd_interop_map_buffer(1, &agent, shared_handle,
flags, &buf_size, &ptr, nullptr, nullptr)) {
// Ask GPUPro driver to provide CPU access to allocation
- if (0 == amdgpu_bo_cpu_map(buf_handle, host_ptr)) {
+ if (0 == Funcs().AmdgpuBoCpuMap(buf_handle, host_ptr)) {
allocs_.insert(std::pair>(
ptr, std::pair(buf_handle, shared_handle)));
}
else {
hsa_amd_interop_unmap_buffer(ptr);
close(shared_handle);
- amdgpu_bo_free(buf_handle);
+ Funcs().AmdgpuBoFree(buf_handle);
}
}
else {
close(shared_handle);
- amdgpu_bo_free(buf_handle);
+ Funcs().AmdgpuBoFree(buf_handle);
}
}
else {
- amdgpu_bo_free(buf_handle);
+ Funcs().AmdgpuBoFree(buf_handle);
}
}
@@ -146,12 +200,12 @@ void ProDevice::FreeDmaBuffer(void* ptr) const
amd::ScopedLock l(alloc_ops_);
auto it = allocs_.find(ptr);
if (it != allocs_.end()) {
- amdgpu_bo_cpu_unmap(it->second.first);
+ Funcs().AmdgpuBoCpuUnmap(it->second.first);
// Unmap memory from HSA device
hsa_amd_interop_unmap_buffer(ptr);
// Close shared handle
close(it->second.second);
- int error = amdgpu_bo_free(it->second.first);
+ int error = Funcs().AmdgpuBoFree(it->second.first);
allocs_.erase(it);
}
}
diff --git a/rocclr/runtime/device/rocm/pro/prodevice.hpp b/rocclr/runtime/device/rocm/pro/prodevice.hpp
index 280d34c0d2..8e0b3370ce 100644
--- a/rocclr/runtime/device/rocm/pro/prodevice.hpp
+++ b/rocclr/runtime/device/rocm/pro/prodevice.hpp
@@ -6,6 +6,7 @@
#ifndef WITHOUT_HSA_BACKEND
+#include "profuncs.hpp"
#include "prodriver.hpp"
#include "thread/monitor.hpp"
#include