diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index 5a9871878a..658a8f02b3 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -157,7 +157,7 @@ static const char* OclExtensionsString[] = {"cl_khr_fp64 ", "cl_khr_mipmap_image ", "cl_khr_mipmap_image_writes ", "", - (IS_LINUX) ? "" : "cl_amd_liquid_flash ", + "cl_amd_liquid_flash ", NULL}; namespace device { diff --git a/rocclr/runtime/device/gpu/gpusettings.cpp b/rocclr/runtime/device/gpu/gpusettings.cpp index 3c57d08165..57395bbc7c 100644 --- a/rocclr/runtime/device/gpu/gpusettings.cpp +++ b/rocclr/runtime/device/gpu/gpusettings.cpp @@ -313,6 +313,7 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device, maxWorkloadTime_ = modifyMaxWorkload.time; } } + enableExtension(ClAMDLiquidFlash); #endif // defined(_WIN32) // Enable atomics support @@ -332,7 +333,6 @@ bool Settings::create(const CALdeviceattribs& calAttr, bool reportAsOCL12Device, // Enable some platform extensions enableExtension(ClAmdDeviceAttributeQuery); enableExtension(ClKhrSpir); - enableExtension(ClAMDLiquidFlash); hwLDSSize_ = 32 * Ki; diff --git a/rocclr/runtime/device/rocm/pro/lnxheaders.h b/rocclr/runtime/device/rocm/pro/lnxheaders.h new file mode 100644 index 0000000000..9929d40c04 --- /dev/null +++ b/rocclr/runtime/device/rocm/pro/lnxheaders.h @@ -0,0 +1,30 @@ +// +// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. +// + +#pragma once + +// NOTE: Some of the Linux driver stack's headers don't wrap their C-style interface names in 'extern "C" { ... }' +// blocks when building with a C++ compiler, so we need to add that ourselves. +#if __cplusplus +extern "C" +{ +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +constexpr int32_t InvalidFd = -1; // value representing a invalid file descriptor for Linux + +#if __cplusplus +} // extern "C" +#endif diff --git a/rocclr/runtime/device/rocm/pro/prodevice.cpp b/rocclr/runtime/device/rocm/pro/prodevice.cpp new file mode 100644 index 0000000000..d00ee6415d --- /dev/null +++ b/rocclr/runtime/device/rocm/pro/prodevice.cpp @@ -0,0 +1,162 @@ +// +// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. +// + +#ifndef WITHOUT_HSA_BACKEND + +#include "hsa_ext_amd.h" +#include "lnxheaders.h" +#include "prodevice.hpp" +#include "amdgpu_drm.h" + +namespace roc { + +constexpr uint32_t kMaxDevices = 32; +constexpr uint32_t kAtiVendorId = 0x1002; + +IProDevice* IProDevice::Init(uint32_t bus, uint32_t dev, uint32_t func) +{ + ProDevice* pro_device = new ProDevice(); + + if (pro_device == nullptr || !pro_device->Create(bus, dev, func)) { + delete pro_device; + return nullptr; + } + return pro_device; +} + +ProDevice::~ProDevice() { + delete alloc_ops_; + + if (dev_handle_ != nullptr) { + amdgpu_device_deinitialize(dev_handle_); + } + if (file_desc_ > 0) { + close(file_desc_); + } +} + +#ifndef AMDGPU_CAPABILITY_SSG_FLAG +#define AMDGPU_CAPABILITY_SSG_FLAG 4 +#endif + +// ================================================================================================ +// Open drm device and initialize it. And also get the drm information. +bool ProDevice::Create(uint32_t bus, uint32_t device, uint32_t func) { + drmDevicePtr devices[kMaxDevices] = { }; + int32_t device_count = drmGetDevices(devices, kMaxDevices); + bool result = false; + + for (int32_t i = 0; i < device_count; i++) { + // Check if the device vendor is AMD + if (devices[i]->deviceinfo.pci->vendor_id != kAtiVendorId) { + continue; + } + if ((devices[i]->businfo.pci->bus == bus) && + (devices[i]->businfo.pci->dev == device) && + (devices[i]->businfo.pci->func == func)) { + + // pDevices[i]->nodes[DRM_NODE_PRIMARY]; + // Using render node here so that we can do the off-screen rendering without authentication + file_desc_ = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR, 0); + + if (file_desc_ > 0) { + void* data, *file, *cap; + + // Initialize the admgpu device. + if (amdgpu_device_initialize(file_desc_, &major_ver_, + &minor_ver_, &dev_handle_) == 0) { + uint32_t version = 0; + // amdgpu_query_gpu_info will never fail only if it is initialized + amdgpu_query_gpu_info(dev_handle_, &gpu_info_); + + drm_amdgpu_capability cap = {}; + amdgpu_query_info(dev_handle_, AMDGPU_INFO_CAPABILITY, sizeof(drm_amdgpu_capability), &cap); + + // Check if DGMA and SSG are available + if ((cap.flag & (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) == + (AMDGPU_CAPABILITY_DIRECT_GMA_FLAG | AMDGPU_CAPABILITY_SSG_FLAG)) { + result = true; + break; + } + } + } + } + } + + if (result) { + alloc_ops_ = new amd::Monitor("DGMA mem alloc lock", true); + if (nullptr == alloc_ops_) { + return true; + } + } + + return result; +} + +void* ProDevice::AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) const +{ + amd::ScopedLock l(alloc_ops_); + void* ptr = nullptr; + amdgpu_bo_handle buf_handle = 0; + amdgpu_bo_alloc_request req = {0}; + *host_ptr = nullptr; + + req.alloc_size = size; + req.phys_alignment = 64 * Ki; + req.preferred_heap = AMDGPU_GEM_DOMAIN_DGMA; + + // Allocate buffer in DGMA heap + if (0 == amdgpu_bo_alloc(dev_handle_, &req, &buf_handle)) { + amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; + uint32_t shared_handle = 0; + // Find the base driver handle + if (0 == amdgpu_bo_export(buf_handle, type, &shared_handle)) { + uint32_t flags = 0; + size_t buf_size = 0; + // Map memory object to HSA device + if (0 == hsa_amd_interop_map_buffer(1, &agent, shared_handle, + flags, &buf_size, &ptr, nullptr, nullptr)) { + // Ask GPUPro driver to provide CPU access to allocation + if (0 == amdgpu_bo_cpu_map(buf_handle, host_ptr)) { + allocs_.insert(std::pair>( + ptr, std::pair(buf_handle, shared_handle))); + } + else { + hsa_amd_interop_unmap_buffer(ptr); + close(shared_handle); + amdgpu_bo_free(buf_handle); + } + } + else { + close(shared_handle); + amdgpu_bo_free(buf_handle); + } + } + else { + amdgpu_bo_free(buf_handle); + } + } + + return ptr; +} + +void ProDevice::FreeDmaBuffer(void* ptr) const +{ + amd::ScopedLock l(alloc_ops_); + auto it = allocs_.find(ptr); + if (it != allocs_.end()) { + amdgpu_bo_cpu_unmap(it->second.first); + // Unmap memory from HSA device + hsa_amd_interop_unmap_buffer(ptr); + // Close shared handle + close(it->second.second); + int error = amdgpu_bo_free(it->second.first); + allocs_.erase(it); + } +} + +} + +#endif // WITHOUT_HSA_BACKEND + diff --git a/rocclr/runtime/device/rocm/pro/prodevice.hpp b/rocclr/runtime/device/rocm/pro/prodevice.hpp new file mode 100644 index 0000000000..3628df3f9e --- /dev/null +++ b/rocclr/runtime/device/rocm/pro/prodevice.hpp @@ -0,0 +1,53 @@ +// +// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. +// + +#pragma once + +#ifndef WITHOUT_HSA_BACKEND + +#include "prodriver.hpp" +#include "thread/monitor.hpp" +#include + +/*! \addtogroup HSA + * @{ + */ + +//! HSA Device Implementation +namespace roc { + +class ProDevice : public IProDevice { +public: + ProDevice() + : file_desc_(0) + , major_ver_(0) + , minor_ver_(0) + , cp_ver_(0) + , alloc_ops_(nullptr) {} + virtual ~ProDevice() override; + + bool Create(uint32_t bus, uint32_t device, uint32_t func); + + virtual void* AllocDmaBuffer( + hsa_agent_t agent, size_t size, void** host_ptr) const override; + virtual void FreeDmaBuffer(void* ptr) const override; + +private: + int32_t file_desc_; //!< File descriptor for the device + uint32_t major_ver_; //!< Major driver version + uint32_t minor_ver_; //!< Minor driver version + uint32_t cp_ver_; //!< CP ucode version + amdgpu_device_handle dev_handle_; //!< AMD gpu device handle + amdgpu_gpu_info gpu_info_; //!< GPU info structure + amdgpu_heap_info heap_info_; //!< Information about memory + mutable std::map> allocs_; //!< Alloced memory mapping + amd::Monitor* alloc_ops_; //!< Serializes memory allocations/destructions +}; + +} // namespace roc + +/** + * @} + */ +#endif /*WITHOUT_HSA_BACKEND*/ diff --git a/rocclr/runtime/device/rocm/pro/prodriver.hpp b/rocclr/runtime/device/rocm/pro/prodriver.hpp new file mode 100644 index 0000000000..85feafbb9e --- /dev/null +++ b/rocclr/runtime/device/rocm/pro/prodriver.hpp @@ -0,0 +1,35 @@ +// +// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. +// + +#pragma once + +#ifndef WITHOUT_HSA_BACKEND + +#include "top.hpp" +#include "hsa.h" + +/*! \addtogroup HSA + * @{ + */ + +namespace roc { + +//! Pro Device Interface +class IProDevice : public amd::HeapObject { +public: + static IProDevice* Init(uint32_t bus, uint32_t device, uint32_t func); + + virtual void* AllocDmaBuffer(hsa_agent_t agent, size_t size, void** host_ptr) const = 0; + virtual void FreeDmaBuffer(void* ptr) const = 0; + + IProDevice() {} + virtual ~IProDevice() {} +}; + +} // namespace roc + +/** + * @} + */ +#endif /*WITHOUT_HSA_BACKEND*/ diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp index 04f7ad1cc9..110b2c7891 100644 --- a/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/rocclr/runtime/device/rocm/rocdevice.cpp @@ -25,6 +25,7 @@ #endif // !defined(WITH_LIGHTNING_COMPILER) #include "device/rocm/rocmemory.hpp" #include "device/rocm/rocglinterop.hpp" +#include "pro/prodriver.hpp" #include #include #include @@ -111,16 +112,18 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) { } Device::Device(hsa_agent_t bkendDevice) - : mapCacheOps_(nullptr), - mapCache_(nullptr), - _bkendDevice(bkendDevice), - gpuvm_segment_max_alloc_(0), - alloc_granularity_(0), - context_(nullptr), - xferQueue_(nullptr), - xferRead_(nullptr), - xferWrite_(nullptr), - numOfVgpus_(0) { + : mapCacheOps_(nullptr) + , mapCache_(nullptr) + , _bkendDevice(bkendDevice) + , gpuvm_segment_max_alloc_(0) + , alloc_granularity_(0) + , context_(nullptr) + , xferQueue_(nullptr) + , xferRead_(nullptr) + , xferWrite_(nullptr) + , pro_device_(nullptr) + , pro_ena_(false) + , numOfVgpus_(0) { group_segment_.handle = 0; system_segment_.handle = 0; system_coarse_segment_.handle = 0; @@ -128,6 +131,10 @@ Device::Device(hsa_agent_t bkendDevice) } Device::~Device() { +#ifdef WITH_AMDGPU_PRO + delete pro_device_; +#endif + // Release cached map targets for (uint i = 0; mapCache_ != nullptr && i < mapCache_->size(); ++i) { if ((*mapCache_)[i] != nullptr) { @@ -474,16 +481,22 @@ bool Device::init() { roc_device->deviceInfo_.gfxipVersion_ = major * 100 + minor * 10 + stepping; - if (!roc_device->mapHSADeviceToOpenCLDevice(agent)) { - LogError("Failed mapping of HsaDevice to Device."); - continue; - } - if (!roc_device->create()) { LogError("Error creating new instance of Device."); continue; } + // Setup System Memory to be Non-Coherent per user + // request via environment variable. By default the + // System Memory is setup to be Coherent + if (roc_device->settings().enableNCMode_) { + hsa_status_t err = hsa_amd_coherency_set_type(agent, HSA_AMD_COHERENCY_TYPE_NONCOHERENT); + if (err != HSA_STATUS_SUCCESS) { + LogError("Unable to set NC memory policy!"); + continue; + } + } + if (selectedDevices[ordinal++] && (flagIsDefault(GPU_DEVICE_NAME) || GPU_DEVICE_NAME == 0 || GPU_DEVICE_NAME[0] == '\0' || !strcmp(GPU_DEVICE_NAME, roc_device->info_.name_))) { @@ -500,10 +513,71 @@ void Device::tearDown() { } bool Device::create() { + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) { + return false; + } + + // Create HSA settings + settings_ = new Settings(); + roc::Settings* hsaSettings = static_cast(settings_); + if ((hsaSettings == nullptr) || + !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipVersion_)) { + return false; + } + if (!amd::Device::create()) { return false; } + uint32_t hsa_bdf_id = 0; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &hsa_bdf_id)) { + return false; + } + + info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD; + info_.deviceTopology_.pcie.bus = (hsa_bdf_id & (0xFF << 8)) >> 8; + info_.deviceTopology_.pcie.device = (hsa_bdf_id & (0x1F << 3)) >> 3; + info_.deviceTopology_.pcie.function = (hsa_bdf_id & 0x07); + +#ifdef WITH_AMDGPU_PRO + // Create amdgpu-pro device interface for SSG support + pro_device_ = IProDevice::Init( + info_.deviceTopology_.pcie.bus, + info_.deviceTopology_.pcie.device, + info_.deviceTopology_.pcie.function); + if (pro_device_ != nullptr) { + pro_ena_ = true; + settings_->enableExtension(ClAMDLiquidFlash); + } +#endif + + if (populateOCLDeviceConstants() == false) { + return false; + } + +#if defined(WITH_LIGHTNING_COMPILER) + // create compilation object with cache support + int gfxipMajor = deviceInfo_.gfxipVersion_ / 100; + int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10; + int gfxipStepping = deviceInfo_.gfxipVersion_ % 10; + + // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) + // with dash as delimiter to be compatible with Windows directory name + std::ostringstream cacheTarget; + cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; + + amd::CacheCompilation* compObj = new amd::CacheCompilation( + cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); + if (!compObj) { + LogError("Unable to create cache compilation object!"); + return false; + } + + cacheCompilation_.reset(compObj); +#endif + amd::Context::Info info = {0}; std::vector devices; devices.push_back(this); @@ -568,59 +642,6 @@ device::Program* Device::createProgram(amd::option::Options* options) { return new roc::HSAILProgram(*this); } -bool Device::mapHSADeviceToOpenCLDevice(hsa_agent_t dev) { - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) { - return false; - } - - // Create HSA settings - settings_ = new Settings(); - roc::Settings* hsaSettings = static_cast(settings_); - if ((hsaSettings == nullptr) || - !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipVersion_)) { - return false; - } - - if (populateOCLDeviceConstants() == false) { - return false; - } - - // Setup System Memory to be Non-Coherent per user - // request via environment variable. By default the - // System Memory is setup to be Coherent - if (hsaSettings->enableNCMode_) { - hsa_status_t err = hsa_amd_coherency_set_type(dev, HSA_AMD_COHERENCY_TYPE_NONCOHERENT); - if (err != HSA_STATUS_SUCCESS) { - LogError("Unable to set NC memory policy!"); - return false; - } - } - -#if defined(WITH_LIGHTNING_COMPILER) - // create compilation object with cache support - int gfxipMajor = deviceInfo_.gfxipVersion_ / 100; - int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10; - int gfxipStepping = deviceInfo_.gfxipVersion_ % 10; - - // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) - // with dash as delimiter to be compatible with Windows directory name - std::ostringstream cacheTarget; - cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; - - amd::CacheCompilation* compObj = new amd::CacheCompilation( - cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); - if (!compObj) { - LogError("Unable to create cache compilation object!"); - return false; - } - - cacheCompilation_.reset(compObj); -#endif - - return true; -} - hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, void* data) { if (data == nullptr) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; @@ -734,16 +755,6 @@ bool Device::populateOCLDeviceConstants() { info_.type_ = CL_DEVICE_TYPE_GPU; - uint32_t hsa_bdf_id = 0; - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &hsa_bdf_id)) { - return false; - } - - info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD; - info_.deviceTopology_.pcie.bus = (hsa_bdf_id & (0xFF << 8)) >> 8; - info_.deviceTopology_.pcie.device = (hsa_bdf_id & (0x1F << 3)) >> 3; - info_.deviceTopology_.pcie.function = (hsa_bdf_id & 0x07); info_.extensions_ = getExtensionString(); info_.nativeVectorWidthDouble_ = info_.preferredVectorWidthDouble_ = (settings().doublePrecision_) ? 1 : 0; diff --git a/rocclr/runtime/device/rocm/rocdevice.hpp b/rocclr/runtime/device/rocm/rocdevice.hpp index 37d1bd74b3..7e7d6fd7f7 100644 --- a/rocclr/runtime/device/rocm/rocdevice.hpp +++ b/rocclr/runtime/device/rocm/rocdevice.hpp @@ -58,6 +58,7 @@ class Memory; class Resource; class VirtualDevice; class PrintfDbg; +class IProDevice; // A NULL Device type used only for offline compilation // Only functions that are used for compilation will be in this device @@ -276,8 +277,6 @@ class Device : public NullDevice { //! Destructor for the physical HSA device virtual ~Device(); - bool mapHSADeviceToOpenCLDevice(hsa_agent_t hsadevice); - // Temporary, delete it later when HSA Runtime and KFD is fully fucntional. void fake_device(); @@ -388,6 +387,10 @@ class Device : public NullDevice { amd::Context& context() const { return *context_; } + // Returns AMD GPU Pro interfaces + const IProDevice& iPro() const { return *pro_device_; } + bool ProEna() const { return pro_ena_; } + private: static hsa_ven_amd_loader_1_00_pfn_t amd_loader_ext_table; @@ -415,6 +418,8 @@ class Device : public NullDevice { XferBuffers* xferRead_; //!< Transfer buffers read XferBuffers* xferWrite_; //!< Transfer buffers write + const IProDevice* pro_device_; //!< AMDGPUPro device + bool pro_ena_; //!< Extra functionality with AMDGPUPro device, beyond ROCr public: amd::Atomic numOfVgpus_; //!< Virtual gpu unique index diff --git a/rocclr/runtime/device/rocm/rocmemory.cpp b/rocclr/runtime/device/rocm/rocmemory.cpp index 7130476f68..4f19bd8cff 100644 --- a/rocclr/runtime/device/rocm/rocmemory.cpp +++ b/rocclr/runtime/device/rocm/rocmemory.cpp @@ -20,6 +20,7 @@ #include "platform/memory.hpp" #include "platform/sampler.hpp" #include "amdocl/cl_gl_amd.hpp" +#include "pro/prodriver.hpp" namespace roc { @@ -548,7 +549,12 @@ void Buffer::destroy() { } const cl_mem_flags memFlags = owner()->getMemFlags(); - +#ifdef WITH_AMDGPU_PRO + if ((memFlags & CL_MEM_USE_PERSISTENT_MEM_AMD) && dev().ProEna()) { + dev().iPro().FreeDmaBuffer(deviceMemory_); + return; + } +#endif if ((deviceMemory_ != nullptr) && (deviceMemory_ != owner()->getHostMem())) { // if they are identical, the host pointer will be // deallocated later on => avoid double deallocation @@ -611,6 +617,20 @@ bool Buffer::create() { // Allocate backing storage in device local memory unless UHP or AHP are set const cl_mem_flags memFlags = owner()->getMemFlags(); + +#ifdef WITH_AMDGPU_PRO + if ((memFlags & CL_MEM_USE_PERSISTENT_MEM_AMD) && dev().ProEna()) { + void* host_ptr = nullptr; + deviceMemory_ = dev().iPro().AllocDmaBuffer(dev().getGpuAgents()[0], size(), &host_ptr); + if (deviceMemory_ == nullptr) { + return false; + } + flags_ |= HostMemoryDirectAccess; + owner()->setHostMem(host_ptr); + return true; + } +#endif + if (!(memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { deviceMemory_ = dev().deviceLocalAlloc(size()); diff --git a/rocclr/runtime/device/rocm/rocsettings.cpp b/rocclr/runtime/device/rocm/rocsettings.cpp index 3bb8c9d9e3..3d3192cbd2 100644 --- a/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/rocclr/runtime/device/rocm/rocsettings.cpp @@ -95,6 +95,7 @@ bool Settings::create(bool fullProfile, int gfxipVersion) { enableExtension(ClKhr3DImageWrites); enableExtension(ClAmdMediaOps); enableExtension(ClAmdMediaOps2); + enableExtension(ClAMDLiquidFlash); if (MesaInterop::Supported()) { enableExtension(ClKhrGlSharing); } diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp index ee802c0581..ec630a9568 100644 --- a/rocclr/runtime/device/rocm/rocvirtual.cpp +++ b/rocclr/runtime/device/rocm/rocvirtual.cpp @@ -1757,4 +1757,56 @@ amd::Memory* VirtualGPU::findPinnedMem(void* addr, size_t size) { } void VirtualGPU::enableSyncBlit() const { blitMgr_->enableSynchronization(); } + +void VirtualGPU::submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd) { + size_t copySize = cmd.size()[0]; + size_t fileOffset = cmd.fileOffset(); + Memory* mem = dev().getRocMemory(&cmd.memory()); + uint idx = 0; + + assert((cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD) || + (cmd.type() == CL_COMMAND_WRITE_SSG_FILE_AMD)); + const bool writeBuffer(cmd.type() == CL_COMMAND_READ_SSG_FILE_AMD); + + if (writeBuffer) { + size_t dstOffset = cmd.origin()[0]; + while (copySize > 0) { + Memory* staging = dev().getRocMemory(&cmd.staging(idx)); + size_t dstSize = amd::TransferBufferFileCommand::StagingBufferSize; + dstSize = std::min(dstSize, copySize); + void* dstBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, dstBuffer, staging->size(), fileOffset, 0, + dstSize)) { + cmd.setStatus(CL_INVALID_OPERATION); + return; + } + staging->cpuUnmap(*this); + + bool result = blitMgr().copyBuffer(*staging, *mem, 0, dstOffset, dstSize, false); + fileOffset += dstSize; + dstOffset += dstSize; + copySize -= dstSize; + } + } else { + size_t srcOffset = cmd.origin()[0]; + while (copySize > 0) { + Memory* staging = dev().getRocMemory(&cmd.staging(idx)); + size_t srcSize = amd::TransferBufferFileCommand::StagingBufferSize; + srcSize = std::min(srcSize, copySize); + bool result = blitMgr().copyBuffer(*mem, *staging, srcOffset, 0, srcSize, false); + + void* srcBuffer = staging->cpuMap(*this); + if (!cmd.file()->transferBlock(writeBuffer, srcBuffer, staging->size(), fileOffset, 0, + srcSize)) { + cmd.setStatus(CL_INVALID_OPERATION); + return; + } + staging->cpuUnmap(*this); + + fileOffset += srcSize; + srcOffset += srcSize; + copySize -= srcSize; + } + } +} } // End of roc namespace diff --git a/rocclr/runtime/device/rocm/rocvirtual.hpp b/rocclr/runtime/device/rocm/rocvirtual.hpp index 4af1002580..74bb4f8cf7 100644 --- a/rocclr/runtime/device/rocm/rocvirtual.hpp +++ b/rocclr/runtime/device/rocm/rocvirtual.hpp @@ -190,6 +190,7 @@ class VirtualGPU : public device::VirtualDevice { virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd); virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd); virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd); + virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd); void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand& cmd) {} void submitThreadTrace(amd::ThreadTraceCommand& vcmd) {} diff --git a/rocclr/runtime/platform/memory.hpp b/rocclr/runtime/platform/memory.hpp index 539ef646ae..227738f888 100644 --- a/rocclr/runtime/platform/memory.hpp +++ b/rocclr/runtime/platform/memory.hpp @@ -595,7 +595,7 @@ class SvmBuffer : AllStatic { //! Liquid flash extension class LiquidFlashFile : public RuntimeObject { private: - const wchar_t* name_; + std::wstring name_; cl_file_flags_amd flags_; void* handle_; uint32_t blockSize_;