P4 to Git Change 1410373 by gandryey@gera-w8 on 2017/05/16 17:16:52

SWDEV-120180 - [amdgpu-pro] OpenCL support for SSG
	- Add initial support of DGMA memory under ROCr backend.
	 - The implementation requires amdgpu-pro stack initialization and memory allocation.
	 - An interop with HSA device is created for ROCr access

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/Makefile#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#153 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#285 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#351 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/lnxheaders.h#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.cpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodevice.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/pro/prodriver.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/runtimedefs#41 edit
Этот коммит содержится в:
foreman
2017-05-16 17:22:59 -04:00
родитель df45033047
Коммит 0da70b03de
13 изменённых файлов: 454 добавлений и 84 удалений
+1 -1
Просмотреть файл
@@ -157,7 +157,7 @@ static const char* OclExtensionsString[] = {"cl_khr_fp64 ",
"cl_khr_mipmap_image ",
"cl_khr_mipmap_image_writes ",
"",
(IS_LINUX) ? "" : "cl_amd_liquid_flash ",
"cl_amd_liquid_flash ",
NULL};
namespace device {
+1 -1
Просмотреть файл
@@ -313,6 +313,7 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device,
maxWorkloadTime_ = modifyMaxWorkload.time;
}
}
enableExtension(ClAMDLiquidFlash);
#endif // defined(_WIN32)
// Enable atomics support
@@ -332,7 +333,6 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device,
// Enable some platform extensions
enableExtension(ClAmdDeviceAttributeQuery);
enableExtension(ClKhrSpir);
enableExtension(ClAMDLiquidFlash);
hwLDSSize_ = 32 * Ki;
+30
Просмотреть файл
@@ -0,0 +1,30 @@
//
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
#pragma once
// NOTE: Some of the Linux driver stack's headers don't wrap their C-style interface names in 'extern "C" { ... }'
// blocks when building with a C++ compiler, so we need to add that ourselves.
#if __cplusplus
extern "C"
{
#endif
#include <amdgpu.h>
#include <amdgpu_drm.h>
#include <amdgpu_shared.h>
#include <xf86drm.h>
#include <xf86drmMode.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
constexpr int32_t InvalidFd = -1; // value representing a invalid file descriptor for Linux
#if __cplusplus
} // extern "C"
#endif
+162
Просмотреть файл
@@ -0,0 +1,162 @@
//
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef WITHOUT_HSA_BACKEND
#include "hsa_ext_amd.h"
#include "lnxheaders.h"
#include "prodevice.hpp"
#include "amdgpu_drm.h"
namespace roc {
constexpr uint32_t kMaxDevices = 32;
constexpr uint32_t kAtiVendorId = 0x1002;
IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func)
{
ProDevice* pro_device = new ProDevice();
if (pro_device == nullptr || !pro_device->Create(bus, dev, func)) {
delete pro_device;
return nullptr;
}
return pro_device;
}
ProDevice::~ProDevice() {
delete alloc_ops_;
if (dev_handle_ != nullptr) {
amdgpu_device_deinitialize(dev_handle_);
}
if (file_desc_ > 0) {
close(file_desc_);
}
}
#ifndef AMDGPU_CAPABILITY_SSG_FLAG
#define AMDGPU_CAPABILITY_SSG_FLAG 4
#endif
// ================================================================================================
// Open drm device and initialize it. And also get the drm information.
bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) {
drmDevicePtr devices[kMaxDevices] = { };
int32_t device_count = drmGetDevices(devices, kMaxDevices);
bool result = false;
for (int32_t i = 0; i < device_count; i++) {
// Check if the device vendor is AMD
if (devices[i]->deviceinfo.pci->vendor_id != kAtiVendorId) {
continue;
}
if ((devices[i]->businfo.pci->bus == bus) &&
(devices[i]->businfo.pci->dev == device) &&
(devices[i]->businfo.pci->func == func)) {
// pDevices[i]->nodes[DRM_NODE_PRIMARY];
// Using render node here so that we can do the off-screen rendering without authentication
file_desc_ = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR, 0);
if (file_desc_ > 0) {
void* data, *file, *cap;
// Initialize the admgpu device.
if (amdgpu_device_initialize(file_desc_, &major_ver_,
&minor_ver_, &dev_handle_) == 0) {
uint32_t version = 0;
// amdgpu_query_gpu_info will never fail only if it is initialized
amdgpu_query_gpu_info(dev_handle_, &gpu_info_);
drm_amdgpu_capability cap = {};
amdgpu_query_info(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap);
// Check if DGMA and SSG are available
if ((cap.flag & (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) ==
(AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) {
result = true;
break;
}
}
}
}
}
if (result) {
alloc_ops_ = new amd::Monitor("DGMA mem alloc lock", true);
if (nullptr == alloc_ops_) {
return true;
}
}
return result;
}
void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) const
{
amd::ScopedLock l(alloc_ops_);
void* ptr = nullptr;
amdgpu_bo_handle buf_handle = 0;
amdgpu_bo_alloc_request req = {0};
*host_ptr = nullptr;
req.alloc_size = size;
req.phys_alignment = 64 * Ki;
req.preferred_heap = AMDGPU_GEM_DOMAIN_DGMA;
// Allocate buffer in DGMA heap
if (0 == amdgpu_bo_alloc(dev_handle_, &req, &buf_handle)) {
amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
uint32_t shared_handle = 0;
// Find the base driver handle
if (0 == amdgpu_bo_export(buf_handle, type, &shared_handle)) {
uint32_t flags = 0;
size_t buf_size = 0;
// Map memory object to HSA device
if (0 == hsa_amd_interop_map_buffer(1, &agent, shared_handle,
flags, &buf_size, &ptr, nullptr, nullptr)) {
// Ask GPUPro driver to provide CPU access to allocation
if (0 == amdgpu_bo_cpu_map(buf_handle, host_ptr)) {
allocs_.insert(std::pair<void*, std::pair<amdgpu_bo_handle, uint32_t>>(
ptr, std::pair<amdgpu_bo_handle, uint32_t>(buf_handle, shared_handle)));
}
else {
hsa_amd_interop_unmap_buffer(ptr);
close(shared_handle);
amdgpu_bo_free(buf_handle);
}
}
else {
close(shared_handle);
amdgpu_bo_free(buf_handle);
}
}
else {
amdgpu_bo_free(buf_handle);
}
}
return ptr;
}
void ProDevice::FreeDmaBuffer(void* ptr) const
{
amd::ScopedLock l(alloc_ops_);
auto it = allocs_.find(ptr);
if (it != allocs_.end()) {
amdgpu_bo_cpu_unmap(it->second.first);
// Unmap memory from HSA device
hsa_amd_interop_unmap_buffer(ptr);
// Close shared handle
close(it->second.second);
int error = amdgpu_bo_free(it->second.first);
allocs_.erase(it);
}
}
}
#endif // WITHOUT_HSA_BACKEND
+53
Просмотреть файл
@@ -0,0 +1,53 @@
//
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
#pragma once
#ifndef WITHOUT_HSA_BACKEND
#include "prodriver.hpp"
#include "thread/monitor.hpp"
#include <map>
/*! \addtogroup HSA
* @{
*/
//! HSA Device Implementation
namespace roc {
class ProDevice : public IProDevice {
public:
ProDevice()
: file_desc_(0)
, major_ver_(0)
, minor_ver_(0)
, cp_ver_(0)
, alloc_ops_(nullptr) {}
virtual ~ProDevice() override;
bool Create(uint32_t bus, uint32_t device, uint32_t func);
virtual void* AllocDmaBuffer(
hsa_agent_t agent, size_t size, void** host_ptr) const override;
virtual void FreeDmaBuffer(void* ptr) const override;
private:
int32_t file_desc_; //!< File descriptor for the device
uint32_t major_ver_; //!< Major driver version
uint32_t minor_ver_; //!< Minor driver version
uint32_t cp_ver_; //!< CP ucode version
amdgpu_device_handle dev_handle_; //!< AMD gpu device handle
amdgpu_gpu_info gpu_info_; //!< GPU info structure
amdgpu_heap_info heap_info_; //!< Information about memory
mutable std::map<void*, std::pair<amdgpu_bo_handle, uint32_t>> allocs_; //!< Alloced memory mapping
amd::Monitor* alloc_ops_; //!< Serializes memory allocations/destructions
};
} // namespace roc
/**
* @}
*/
#endif /*WITHOUT_HSA_BACKEND*/
+35
Просмотреть файл
@@ -0,0 +1,35 @@
//
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
#pragma once
#ifndef WITHOUT_HSA_BACKEND
#include "top.hpp"
#include "hsa.h"
/*! \addtogroup HSA
* @{
*/
namespace roc {
//! Pro Device Interface
class IProDevice : public amd::HeapObject {
public:
static IProDevice* Init(uint32_t bus, uint32_t device, uint32_t func);
virtual void* AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) const = 0;
virtual void FreeDmaBuffer(void* ptr) const = 0;
IProDevice() {}
virtual ~IProDevice() {}
};
} // namespace roc
/**
* @}
*/
#endif /*WITHOUT_HSA_BACKEND*/
+89 -78
Просмотреть файл
@@ -25,6 +25,7 @@
#endif // !defined(WITH_LIGHTNING_COMPILER)
#include "device/rocm/rocmemory.hpp"
#include "device/rocm/rocglinterop.hpp"
#include "pro/prodriver.hpp"
#include <cstring>
#include <fstream>
#include <sstream>
@@ -111,16 +112,18 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
}
Device::Device(hsa_agent_t bkendDevice)
: mapCacheOps_(nullptr),
mapCache_(nullptr),
_bkendDevice(bkendDevice),
gpuvm_segment_max_alloc_(0),
alloc_granularity_(0),
context_(nullptr),
xferQueue_(nullptr),
xferRead_(nullptr),
xferWrite_(nullptr),
numOfVgpus_(0) {
: mapCacheOps_(nullptr)
, mapCache_(nullptr)
, _bkendDevice(bkendDevice)
, gpuvm_segment_max_alloc_(0)
, alloc_granularity_(0)
, context_(nullptr)
, xferQueue_(nullptr)
, xferRead_(nullptr)
, xferWrite_(nullptr)
, pro_device_(nullptr)
, pro_ena_(false)
, numOfVgpus_(0) {
group_segment_.handle = 0;
system_segment_.handle = 0;
system_coarse_segment_.handle = 0;
@@ -128,6 +131,10 @@ Device::Device(hsa_agent_t bkendDevice)
}
Device::~Device() {
#ifdef WITH_AMDGPU_PRO
delete pro_device_;
#endif
// Release cached map targets
for (uint i = 0; mapCache_ != nullptr && i < mapCache_->size(); ++i) {
if ((*mapCache_)[i] != nullptr) {
@@ -474,16 +481,22 @@ bool Device::init() {
roc_device->deviceInfo_.gfxipVersion_ = major * 100 + minor * 10 + stepping;
if (!roc_device->mapHSADeviceToOpenCLDevice(agent)) {
LogError("Failed mapping of HsaDevice to Device.");
continue;
}
if (!roc_device->create()) {
LogError("Error creating new instance of Device.");
continue;
}
// Setup System Memory to be Non-Coherent per user
// request via environment variable. By default the
// System Memory is setup to be Coherent
if (roc_device->settings().enableNCMode_) {
hsa_status_t err = hsa_amd_coherency_set_type(agent, HSA_AMD_COHERENCY_TYPE_NONCOHERENT);
if (err != HSA_STATUS_SUCCESS) {
LogError("Unable to set NC memory policy!");
continue;
}
}
if (selectedDevices[ordinal++] &&
(flagIsDefault(GPU_DEVICE_NAME) || GPU_DEVICE_NAME == 0 || GPU_DEVICE_NAME[0] == '\0' ||
!strcmp(GPU_DEVICE_NAME, roc_device->info_.name_))) {
@@ -500,10 +513,71 @@ void Device::tearDown() {
}
bool Device::create() {
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) {
return false;
}
// Create HSA settings
settings_ = new Settings();
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
if ((hsaSettings == nullptr) ||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipVersion_)) {
return false;
}
if (!amd::Device::create()) {
return false;
}
uint32_t hsa_bdf_id = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &hsa_bdf_id)) {
return false;
}
info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
info_.deviceTopology_.pcie.bus = (hsa_bdf_id & (0xFF << 8)) >> 8;
info_.deviceTopology_.pcie.device = (hsa_bdf_id & (0x1F << 3)) >> 3;
info_.deviceTopology_.pcie.function = (hsa_bdf_id & 0x07);
#ifdef WITH_AMDGPU_PRO
// Create amdgpu-pro device interface for SSG support
pro_device_ = IProDevice::Init(
info_.deviceTopology_.pcie.bus,
info_.deviceTopology_.pcie.device,
info_.deviceTopology_.pcie.function);
if (pro_device_ != nullptr) {
pro_ena_ = true;
settings_->enableExtension(ClAMDLiquidFlash);
}
#endif
if (populateOCLDeviceConstants() == false) {
return false;
}
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
std::ostringstream cacheTarget;
cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
amd::CacheCompilation* compObj = new amd::CacheCompilation(
cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
if (!compObj) {
LogError("Unable to create cache compilation object!");
return false;
}
cacheCompilation_.reset(compObj);
#endif
amd::Context::Info info = {0};
std::vector<amd::Device*> devices;
devices.push_back(this);
@@ -568,59 +642,6 @@ device::Program* Device::createProgram(amd::option::Options* options) {
return new roc::HSAILProgram(*this);
}
bool Device::mapHSADeviceToOpenCLDevice(hsa_agent_t dev) {
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) {
return false;
}
// Create HSA settings
settings_ = new Settings();
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
if ((hsaSettings == nullptr) ||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipVersion_)) {
return false;
}
if (populateOCLDeviceConstants() == false) {
return false;
}
// Setup System Memory to be Non-Coherent per user
// request via environment variable. By default the
// System Memory is setup to be Coherent
if (hsaSettings->enableNCMode_) {
hsa_status_t err = hsa_amd_coherency_set_type(dev, HSA_AMD_COHERENCY_TYPE_NONCOHERENT);
if (err != HSA_STATUS_SUCCESS) {
LogError("Unable to set NC memory policy!");
return false;
}
}
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
std::ostringstream cacheTarget;
cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
amd::CacheCompilation* compObj = new amd::CacheCompilation(
cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
if (!compObj) {
LogError("Unable to create cache compilation object!");
return false;
}
cacheCompilation_.reset(compObj);
#endif
return true;
}
hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, void* data) {
if (data == nullptr) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
@@ -734,16 +755,6 @@ bool Device::populateOCLDeviceConstants() {
info_.type_ = CL_DEVICE_TYPE_GPU;
uint32_t hsa_bdf_id = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &hsa_bdf_id)) {
return false;
}
info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
info_.deviceTopology_.pcie.bus = (hsa_bdf_id & (0xFF << 8)) >> 8;
info_.deviceTopology_.pcie.device = (hsa_bdf_id & (0x1F << 3)) >> 3;
info_.deviceTopology_.pcie.function = (hsa_bdf_id & 0x07);
info_.extensions_ = getExtensionString();
info_.nativeVectorWidthDouble_ = info_.preferredVectorWidthDouble_ =
(settings().doublePrecision_) ? 1 : 0;
+7 -2
Просмотреть файл
@@ -58,6 +58,7 @@ class Memory;
class Resource;
class VirtualDevice;
class PrintfDbg;
class IProDevice;
// A NULL Device type used only for offline compilation
// Only functions that are used for compilation will be in this device
@@ -276,8 +277,6 @@ class Device : public NullDevice {
//! Destructor for the physical HSA device
virtual ~Device();
bool mapHSADeviceToOpenCLDevice(hsa_agent_t hsadevice);
// Temporary, delete it later when HSA Runtime and KFD is fully fucntional.
void fake_device();
@@ -388,6 +387,10 @@ class Device : public NullDevice {
amd::Context& context() const { return *context_; }
// Returns AMD GPU Pro interfaces
const IProDevice& iPro() const { return *pro_device_; }
bool ProEna() const { return pro_ena_; }
private:
static hsa_ven_amd_loader_1_00_pfn_t amd_loader_ext_table;
@@ -415,6 +418,8 @@ class Device : public NullDevice {
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
const IProDevice* pro_device_; //!< AMDGPUPro device
bool pro_ena_; //!< Extra functionality with AMDGPUPro device, beyond ROCr
public:
amd::Atomic<uint> numOfVgpus_; //!< Virtual gpu unique index
+21 -1
Просмотреть файл
@@ -20,6 +20,7 @@
#include "platform/memory.hpp"
#include "platform/sampler.hpp"
#include "amdocl/cl_gl_amd.hpp"
#include "pro/prodriver.hpp"
namespace roc {
@@ -548,7 +549,12 @@ void Buffer::destroy() {
}
const cl_mem_flags memFlags = owner()->getMemFlags();
#ifdef WITH_AMDGPU_PRO
if ((memFlags & CL_MEM_USE_PERSISTENT_MEM_AMD) && dev().ProEna()) {
dev().iPro().FreeDmaBuffer(deviceMemory_);
return;
}
#endif
if ((deviceMemory_ != nullptr) && (deviceMemory_ != owner()->getHostMem())) {
// if they are identical, the host pointer will be
// deallocated later on => avoid double deallocation
@@ -611,6 +617,20 @@ bool Buffer::create() {
// Allocate backing storage in device local memory unless UHP or AHP are set
const cl_mem_flags memFlags = owner()->getMemFlags();
#ifdef WITH_AMDGPU_PRO
if ((memFlags & CL_MEM_USE_PERSISTENT_MEM_AMD) && dev().ProEna()) {
void* host_ptr = nullptr;
deviceMemory_ = dev().iPro().AllocDmaBuffer(dev().getGpuAgents()[0], size(), &host_ptr);
if (deviceMemory_ == nullptr) {
return false;
}
flags_ |= HostMemoryDirectAccess;
owner()->setHostMem(host_ptr);
return true;
}
#endif
if (!(memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
deviceMemory_ = dev().deviceLocalAlloc(size());
+1
Просмотреть файл
@@ -95,6 +95,7 @@ bool Settings::create(bool fullProfile, int gfxipVersion) {
enableExtension(ClKhr3DImageWrites);
enableExtension(ClAmdMediaOps);
enableExtension(ClAmdMediaOps2);
enableExtension(ClAMDLiquidFlash);
if (MesaInterop::Supported()) {
enableExtension(ClKhrGlSharing);
}
+52
Просмотреть файл
@@ -1757,4 +1757,56 @@ amd::Memory* VirtualGPU::findPinnedMem(void* addr, size_t size) {
}
void VirtualGPU::enableSyncBlit() const { blitMgr_->enableSynchronization(); }
void VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) {
size_t copySize = cmd.size()[0];
size_t fileOffset = cmd.fileOffset();
Memory* mem = dev().getRocMemory(&cmd.memory());
uint idx = 0;
assert((cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD) ||
(cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD));
const bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD);
if (writeBuffer) {
size_t dstOffset = cmd.origin()[0];
while (copySize > 0) {
Memory* staging = dev().getRocMemory(&cmd.staging(idx));
size_t dstSize = amd::TransferBufferFileCommand::StagingBufferSize;
dstSize = std::min(dstSize, copySize);
void* dstBuffer = staging->cpuMap(*this);
if (!cmd.file()->transferBlock(writeBuffer, dstBuffer, staging->size(), fileOffset, 0,
dstSize)) {
cmd.setStatus(CL_INVALID_OPERATION);
return;
}
staging->cpuUnmap(*this);
bool result = blitMgr().copyBuffer(*staging, *mem, 0, dstOffset, dstSize, false);
fileOffset += dstSize;
dstOffset += dstSize;
copySize -= dstSize;
}
} else {
size_t srcOffset = cmd.origin()[0];
while (copySize > 0) {
Memory* staging = dev().getRocMemory(&cmd.staging(idx));
size_t srcSize = amd::TransferBufferFileCommand::StagingBufferSize;
srcSize = std::min(srcSize, copySize);
bool result = blitMgr().copyBuffer(*mem, *staging, srcOffset, 0, srcSize, false);
void* srcBuffer = staging->cpuMap(*this);
if (!cmd.file()->transferBlock(writeBuffer, srcBuffer, staging->size(), fileOffset, 0,
srcSize)) {
cmd.setStatus(CL_INVALID_OPERATION);
return;
}
staging->cpuUnmap(*this);
fileOffset += srcSize;
srcOffset += srcSize;
copySize -= srcSize;
}
}
}
} // End of roc namespace
+1
Просмотреть файл
@@ -190,6 +190,7 @@ class VirtualGPU : public device::VirtualDevice {
virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd);
virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd);
virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd);
void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand& cmd) {}
void submitThreadTrace(amd::ThreadTraceCommand& vcmd) {}
+1 -1
Просмотреть файл
@@ -595,7 +595,7 @@ class SvmBuffer : AllStatic {
//! Liquid flash extension
class LiquidFlashFile : public RuntimeObject {
private:
const wchar_t* name_;
std::wstring name_;
cl_file_flags_amd flags_;
void* handle_;
uint32_t blockSize_;