From 0da70b03dec38d3bfda9593498a0865ef3b01f2a Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 16 May 2017 17:22:59 -0400
Subject: [PATCH] P4 to Git Change 1410373 by gandryey@gera-w8 on 2017/05/16
17:16:52
SWDEV-120180 - [amdgpu-pro] OpenCL support for SSG
- Add initial support of DGMA memory under ROCr backend.
- The implementation requires amdgpu-pro stack initialization and memory allocation.
- An interop with HSA device is created for ROCr access
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/Makefile#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#153 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#285 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#351 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/lnxheaders.h#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodriver.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/runtimedefs#41 edit
---
rocclr/runtime/device/device.hpp | 2 +-
rocclr/runtime/device/gpu/gpusettings.cpp | 2 +-
rocclr/runtime/device/rocm/pro/lnxheaders.h | 30 ++++
rocclr/runtime/device/rocm/pro/prodevice.cpp | 162 ++++++++++++++++++
rocclr/runtime/device/rocm/pro/prodevice.hpp | 53 ++++++
rocclr/runtime/device/rocm/pro/prodriver.hpp | 35 ++++
rocclr/runtime/device/rocm/rocdevice.cpp | 167 ++++++++++---------
rocclr/runtime/device/rocm/rocdevice.hpp | 9 +-
rocclr/runtime/device/rocm/rocmemory.cpp | 22 ++-
rocclr/runtime/device/rocm/rocsettings.cpp | 1 +
rocclr/runtime/device/rocm/rocvirtual.cpp | 52 ++++++
rocclr/runtime/device/rocm/rocvirtual.hpp | 1 +
rocclr/runtime/platform/memory.hpp | 2 +-
13 files changed, 454 insertions(+), 84 deletions(-)
create mode 100644 rocclr/runtime/device/rocm/pro/lnxheaders.h
create mode 100644 rocclr/runtime/device/rocm/pro/prodevice.cpp
create mode 100644 rocclr/runtime/device/rocm/pro/prodevice.hpp
create mode 100644 rocclr/runtime/device/rocm/pro/prodriver.hpp
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index 5a9871878a..658a8f02b3 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -157,7 +157,7 @@ static const char* OclExtensionsString[] = {"cl_khr_fp64 ",
"cl_khr_mipmap_image ",
"cl_khr_mipmap_image_writes ",
"",
- (IS_LINUX) ? "" : "cl_amd_liquid_flash ",
+ "cl_amd_liquid_flash ",
NULL};
namespace device {
diff --git a/rocclr/runtime/device/gpu/gpusettings.cpp b/rocclr/runtime/device/gpu/gpusettings.cpp
index 3c57d08165..57395bbc7c 100644
--- a/rocclr/runtime/device/gpu/gpusettings.cpp
+++ b/rocclr/runtime/device/gpu/gpusettings.cpp
@@ -313,6 +313,7 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device,
maxWorkloadTime_ = modifyMaxWorkload.time;
}
}
+ enableExtension(ClAMDLiquidFlash);
#endif // defined(_WIN32)
// Enable atomics support
@@ -332,7 +333,6 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device,
// Enable some platform extensions
enableExtension(ClAmdDeviceAttributeQuery);
enableExtension(ClKhrSpir);
- enableExtension(ClAMDLiquidFlash);
hwLDSSize_ = 32 * Ki;
diff --git a/rocclr/runtime/device/rocm/pro/lnxheaders.h b/rocclr/runtime/device/rocm/pro/lnxheaders.h
new file mode 100644
index 0000000000..9929d40c04
--- /dev/null
+++ b/rocclr/runtime/device/rocm/pro/lnxheaders.h
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#pragma once
+
+// NOTE: Some of the Linux driver stack's headers don't wrap their C-style interface names in 'extern "C" { ... }'
+// blocks when building with a C++ compiler, so we need to add that ourselves.
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+constexpr int32_t InvalidFd = -1; // value representing a invalid file descriptor for Linux
+
+#if __cplusplus
+} // extern "C"
+#endif
diff --git a/rocclr/runtime/device/rocm/pro/prodevice.cpp b/rocclr/runtime/device/rocm/pro/prodevice.cpp
new file mode 100644
index 0000000000..d00ee6415d
--- /dev/null
+++ b/rocclr/runtime/device/rocm/pro/prodevice.cpp
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifndef WITHOUT_HSA_BACKEND
+
+#include "hsa_ext_amd.h"
+#include "lnxheaders.h"
+#include "prodevice.hpp"
+#include "amdgpu_drm.h"
+
+namespace roc {
+
+constexpr uint32_t kMaxDevices = 32;
+constexpr uint32_t kAtiVendorId = 0x1002;
+
+IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func)
+{
+ ProDevice* pro_device = new ProDevice();
+
+ if (pro_device == nullptr || !pro_device->Create(bus, dev, func)) {
+ delete pro_device;
+ return nullptr;
+ }
+ return pro_device;
+}
+
+ProDevice::~ProDevice() {
+ delete alloc_ops_;
+
+ if (dev_handle_ != nullptr) {
+ amdgpu_device_deinitialize(dev_handle_);
+ }
+ if (file_desc_ > 0) {
+ close(file_desc_);
+ }
+}
+
+#ifndef AMDGPU_CAPABILITY_SSG_FLAG
+#define AMDGPU_CAPABILITY_SSG_FLAG 4
+#endif
+
+// ================================================================================================
+// Open drm device and initialize it. And also get the drm information.
+bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) {
+ drmDevicePtr devices[kMaxDevices] = { };
+ int32_t device_count = drmGetDevices(devices, kMaxDevices);
+ bool result = false;
+
+ for (int32_t i = 0; i < device_count; i++) {
+ // Check if the device vendor is AMD
+ if (devices[i]->deviceinfo.pci->vendor_id != kAtiVendorId) {
+ continue;
+ }
+ if ((devices[i]->businfo.pci->bus == bus) &&
+ (devices[i]->businfo.pci->dev == device) &&
+ (devices[i]->businfo.pci->func == func)) {
+
+ // pDevices[i]->nodes[DRM_NODE_PRIMARY];
+ // Using render node here so that we can do the off-screen rendering without authentication
+ file_desc_ = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR, 0);
+
+ if (file_desc_ > 0) {
+ void* data, *file, *cap;
+
+ // Initialize the admgpu device.
+ if (amdgpu_device_initialize(file_desc_, &major_ver_,
+ &minor_ver_, &dev_handle_) == 0) {
+ uint32_t version = 0;
+ // amdgpu_query_gpu_info will never fail only if it is initialized
+ amdgpu_query_gpu_info(dev_handle_, &gpu_info_);
+
+ drm_amdgpu_capability cap = {};
+ amdgpu_query_info(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap);
+
+ // Check if DGMA and SSG are available
+ if ((cap.flag & (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) ==
+ (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) {
+ result = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (result) {
+ alloc_ops_ = new amd::Monitor("DGMA mem alloc lock", true);
+ if (nullptr == alloc_ops_) {
+ return true;
+ }
+ }
+
+ return result;
+}
+
+void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) const
+{
+ amd::ScopedLock l(alloc_ops_);
+ void* ptr = nullptr;
+ amdgpu_bo_handle buf_handle = 0;
+ amdgpu_bo_alloc_request req = {0};
+ *host_ptr = nullptr;
+
+ req.alloc_size = size;
+ req.phys_alignment = 64 * Ki;
+ req.preferred_heap = AMDGPU_GEM_DOMAIN_DGMA;
+
+ // Allocate buffer in DGMA heap
+ if (0 == amdgpu_bo_alloc(dev_handle_, &req, &buf_handle)) {
+ amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
+ uint32_t shared_handle = 0;
+ // Find the base driver handle
+ if (0 == amdgpu_bo_export(buf_handle, type, &shared_handle)) {
+ uint32_t flags = 0;
+ size_t buf_size = 0;
+ // Map memory object to HSA device
+ if (0 == hsa_amd_interop_map_buffer(1, &agent, shared_handle,
+ flags, &buf_size, &ptr, nullptr, nullptr)) {
+ // Ask GPUPro driver to provide CPU access to allocation
+ if (0 == amdgpu_bo_cpu_map(buf_handle, host_ptr)) {
+ allocs_.insert(std::pair>(
+ ptr, std::pair(buf_handle, shared_handle)));
+ }
+ else {
+ hsa_amd_interop_unmap_buffer(ptr);
+ close(shared_handle);
+ amdgpu_bo_free(buf_handle);
+ }
+ }
+ else {
+ close(shared_handle);
+ amdgpu_bo_free(buf_handle);
+ }
+ }
+ else {
+ amdgpu_bo_free(buf_handle);
+ }
+ }
+
+ return ptr;
+}
+
+void ProDevice::FreeDmaBuffer(void* ptr) const
+{
+ amd::ScopedLock l(alloc_ops_);
+ auto it = allocs_.find(ptr);
+ if (it != allocs_.end()) {
+ amdgpu_bo_cpu_unmap(it->second.first);
+ // Unmap memory from HSA device
+ hsa_amd_interop_unmap_buffer(ptr);
+ // Close shared handle
+ close(it->second.second);
+ int error = amdgpu_bo_free(it->second.first);
+ allocs_.erase(it);
+ }
+}
+
+}
+
+#endif // WITHOUT_HSA_BACKEND
+
diff --git a/rocclr/runtime/device/rocm/pro/prodevice.hpp b/rocclr/runtime/device/rocm/pro/prodevice.hpp
new file mode 100644
index 0000000000..3628df3f9e
--- /dev/null
+++ b/rocclr/runtime/device/rocm/pro/prodevice.hpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#pragma once
+
+#ifndef WITHOUT_HSA_BACKEND
+
+#include "prodriver.hpp"
+#include "thread/monitor.hpp"
+#include