From 7df0da7bacee7dbd5fca6513107b6795831d569a Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 6 Nov 2015 03:40:14 -0500
Subject: [PATCH] P4 to Git Change 1208254 by nhaustov@nhaustov_hsa on
2015/11/06 03:25:21
SWDEV-77584 - Remove old OpenCL hsa device and loader.
Reviewed by: Evgeniy Mankov
Testing: pre-checkin
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#128 edit
... //depot/stg/opencl/drivers/opencl/compiler/loader/Makefile#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/Makefile#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/build/Makefile#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/build/Makefile.libloader#11 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/hsacore_symbol_loader.cpp#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/hsacore_symbol_loader.hpp#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/loader.cpp#14 delete
... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/loader.hpp#6 delete
... //depot/stg/opencl/drivers/opencl/runtime/Makefile#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/build/Makefile.runtime#61 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#190 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/Makefile#8 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/build/Makefile#5 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/build/Makefile.oclhsa#23 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaappprofile.cpp#4 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaappprofile.hpp#4 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsabinary.cpp#8 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsabinary.hpp#5 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.cpp#10 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.hpp#3 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompiler.cpp#27 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompilerlib.cpp#13 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompilerlib.hpp#10 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacore_symbol_loader.cpp#8 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacore_symbol_loader.hpp#8 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacounters.cpp#5 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacounters.hpp#3 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadefs.hpp#5 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#95 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#51 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsakernel.cpp#27 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsakernel.hpp#20 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.cpp#43 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.hpp#28 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.cpp#39 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.hpp#20 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.cpp#40 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.hpp#13 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.cpp#99 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.hpp#29 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/oclhsa.def#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/oclhsa_common.hpp#4 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/services_symbol_loader.cpp#10 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/services_symbol_loader.hpp#11 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/system_memory.h#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsaappprofile.cpp#2 edit
---
rocclr/runtime/device/device.cpp | 2 +-
rocclr/runtime/device/hsa/hsaappprofile.cpp | 61 -
rocclr/runtime/device/hsa/hsaappprofile.hpp | 27 -
rocclr/runtime/device/hsa/hsabinary.cpp | 152 --
rocclr/runtime/device/hsa/hsabinary.hpp | 56 -
rocclr/runtime/device/hsa/hsablit.cpp | 1838 -----------------
rocclr/runtime/device/hsa/hsablit.hpp | 401 ----
rocclr/runtime/device/hsa/hsacompiler.cpp | 163 --
rocclr/runtime/device/hsa/hsacompilerlib.cpp | 67 -
rocclr/runtime/device/hsa/hsacompilerlib.hpp | 92 -
.../device/hsa/hsacore_symbol_loader.cpp | 53 -
.../device/hsa/hsacore_symbol_loader.hpp | 75 -
rocclr/runtime/device/hsa/hsacounters.cpp | 144 --
rocclr/runtime/device/hsa/hsacounters.hpp | 103 -
rocclr/runtime/device/hsa/hsadefs.hpp | 42 -
rocclr/runtime/device/hsa/hsadevice.cpp | 896 --------
rocclr/runtime/device/hsa/hsadevice.hpp | 334 ---
rocclr/runtime/device/hsa/hsakernel.cpp | 573 -----
rocclr/runtime/device/hsa/hsakernel.hpp | 161 --
rocclr/runtime/device/hsa/hsamemory.cpp | 938 ---------
rocclr/runtime/device/hsa/hsamemory.hpp | 202 --
rocclr/runtime/device/hsa/hsaprogram.cpp | 726 -------
rocclr/runtime/device/hsa/hsaprogram.hpp | 160 --
rocclr/runtime/device/hsa/hsasettings.cpp | 81 -
rocclr/runtime/device/hsa/hsasettings.hpp | 65 -
rocclr/runtime/device/hsa/hsavirtual.cpp | 1544 --------------
rocclr/runtime/device/hsa/hsavirtual.hpp | 181 --
rocclr/runtime/device/hsa/oclhsa.def | 3 -
rocclr/runtime/device/hsa/oclhsa_common.hpp | 26 -
.../device/hsa/services_symbol_loader.cpp | 52 -
.../device/hsa/services_symbol_loader.hpp | 78 -
rocclr/runtime/device/hsa/system_memory.h | 97 -
32 files changed, 1 insertion(+), 9392 deletions(-)
delete mode 100644 rocclr/runtime/device/hsa/hsaappprofile.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsaappprofile.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsabinary.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsabinary.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsablit.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsablit.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsacompiler.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsacompilerlib.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsacompilerlib.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsacore_symbol_loader.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsacore_symbol_loader.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsacounters.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsacounters.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsadefs.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsadevice.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsadevice.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsakernel.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsakernel.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsamemory.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsamemory.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsaprogram.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsaprogram.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsasettings.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsasettings.hpp
delete mode 100644 rocclr/runtime/device/hsa/hsavirtual.cpp
delete mode 100644 rocclr/runtime/device/hsa/hsavirtual.hpp
delete mode 100644 rocclr/runtime/device/hsa/oclhsa.def
delete mode 100644 rocclr/runtime/device/hsa/oclhsa_common.hpp
delete mode 100644 rocclr/runtime/device/hsa/services_symbol_loader.cpp
delete mode 100644 rocclr/runtime/device/hsa/services_symbol_loader.hpp
delete mode 100644 rocclr/runtime/device/hsa/system_memory.h
diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 378fd59a37..132e884720 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -7,7 +7,7 @@
#include "thread/monitor.hpp"
#if defined(WITH_HSA_DEVICE)
-#include "device/hsa/hsadevice.hpp"
+#include "device/hsa_foundation/hsadevice.hpp"
extern amd::AppProfile* oclhsaCreateAppProfile();
#endif
diff --git a/rocclr/runtime/device/hsa/hsaappprofile.cpp b/rocclr/runtime/device/hsa/hsaappprofile.cpp
deleted file mode 100644
index ae19bd7c0e..0000000000
--- a/rocclr/runtime/device/hsa/hsaappprofile.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-
-#ifndef WITHOUT_FSA_BACKEND
-
-#include "top.hpp"
-#include "device/device.hpp"
-#include "device/appprofile.hpp"
-#include "device/hsa/hsaappprofile.hpp"
-
-#include
-
-amd::AppProfile* oclhsaCreateAppProfile()
-{
- amd::AppProfile* appProfile = new oclhsa::AppProfile;
-
- if ((appProfile == NULL) || !appProfile->init()) {
- return NULL;
- }
-
- return appProfile;
-}
-
-namespace oclhsa {
-
-bool AppProfile::ParseApplicationProfile()
-{
- std::string appName("Explorer");
-
- std::transform(appName.begin(), appName.end(), appName.begin(), ::tolower);
- std::transform(appFileName_.begin(), appFileName_.end(), appFileName_.begin(), ::tolower);
-
- if (appFileName_.compare(appName) == 0 ) {
- hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
- gpuvmHighAddr_ = false;
- noHsaInit_ = true;
- profileOverridesAllSettings_ = true;
- }
-
- // Setting both bits is invalid, make it niether.
- if (hsaDeviceHint_ & CL_HSA_ENABLED_AMD
- && hsaDeviceHint_ & CL_HSA_DISABLED_AMD) {
- hsaDeviceHint_ = 0;
- }
-
- if (noHsaInit_) {
- // If no HSA initialization, then force hint flag to non-HSA device.
- // Even if this is not forced, the device selection logic will endure it.
- // After all hint flags are treated as hint only - depending on
- // availibility.
- hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
- }
-
- return true;
-}
-
-}
-
-#endif
diff --git a/rocclr/runtime/device/hsa/hsaappprofile.hpp b/rocclr/runtime/device/hsa/hsaappprofile.hpp
deleted file mode 100644
index e2cac7d71f..0000000000
--- a/rocclr/runtime/device/hsa/hsaappprofile.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-#ifndef HSAAPPPROFILE_HPP_
-#define HSAAPPPROFILE_HPP_
-
-
-#ifndef WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-
-class AppProfile : public amd::AppProfile
-{
-public:
- AppProfile(): amd::AppProfile() {}
-
-protected:
- //! parse application profile based on application file name
- virtual bool ParseApplicationProfile();
-};
-
-}
-
-#endif
-
-#endif
-
diff --git a/rocclr/runtime/device/hsa/hsabinary.cpp b/rocclr/runtime/device/hsa/hsabinary.cpp
deleted file mode 100644
index 342b8214c7..0000000000
--- a/rocclr/runtime/device/hsa/hsabinary.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-
-#ifndef WITHOUT_FSA_BACKEND
-
-
-#include "hsabinary.hpp"
-#include "hsaprogram.hpp"
-#include "hsakernel.hpp"
-#include "utils/options.hpp"
-#include "os/os.hpp"
-#include
-#include
-
-
-
-namespace oclhsa {
- /*
-bool
-ClBinary::loadKernels(FSAILProgram& program, NameKernelMap &kernels)
-{
- return true;
-
-
- const char _kernel[] = "_kernel";
- const char __FSA_[] = "__FSA_";
- const char _header[] = "_header";
- const char _fsail[] = "_fsail";
- bool hasKernels = false;
-
- // TODO : jugu
- // Target should be 15 bit maximum. Should check this somewhere.
- uint32_t target = static_cast(21);//dev().calTarget());
- uint16_t elf_target;
- amd::OclElf::oclElfPlatform platform;
- if (!elfIn()->getTarget(elf_target, platform) ||
- (platform != amd::OclElf::CAL_PLATFORM) ||
- ((uint32_t)target != elf_target)) {
- // warning !
- // LogError("The OCL binary image loading failed: different target");
-
- // LHOWES TODO: target in kannan's elf is wrong so skip this for now
- // We may want a special HSA target or a similar more substantial change.
- // return false;
- }
-
- for (amd::Sym_Handle sym = elfIn()->nextSymbol(NULL);
- sym != NULL;
- sym = elfIn()->nextSymbol(sym)) {
- amd::OclElf::SymbolInfo symInfo;
- if (!elfIn()->getSymbolInfo(sym, &symInfo)) {
- LogError("LoadKernelFromElf: getSymbolInfo() fails");
- return false;
- }
-
- std::string elfSymName(symInfo.sym_name);
-
- const size_t offset = sizeof(__FSA_) - 1;
- if (elfSymName.compare(0, offset, __FSA_) != 0) {
- continue;
- }
-
- // Assume this elfSymName is associated with a kernel name. The folloiwng code will adjust
- // If it isn't.
- const size_t suffixPos = elfSymName.rfind('_');
- bool isKernel = true; // assume it is a kernel
- std::string functionName = elfSymName.substr(sizeof(__FSA_)-1, suffixPos-(sizeof(__FSA_)-1));
- //"__OpenCL_";
- //functionName.append(elfSymName.substr(sizeof(__FSA_)-1, suffixPos-(sizeof(__FSA_)-1)));
- //functionName.append("_kernel"); // make the kernel's linkage name
-
- // Find kernel in map and get its kernel representation
- NameKernelMap::iterator searchIterator = kernels.find(functionName);
- Kernel *currentKernel = 0;
- if( searchIterator == kernels.end() ) {
- // TODO: note, this will need to be decided on based on the the device type. As we have no CPU yet...
- //currentKernel = new Kernel(functionName);
- //kernels[functionName] = currentKernel;
- } else {
- currentKernel = static_cast(searchIterator->second);
- }
-
-
- // Add info for this elf symbol into tempobj's functionNameMap[]
- if (elfSymName.compare(suffixPos, sizeof(_fsail) - 1, _fsail) == 0) {
-
- assert (currentKernel->hasFSAIL() &&
- "More than one fsail symbol for a kernel");
- // LHOWES TODO: Currently this is using the section address and size because
- // we only have a single kernel and there is a bug in the current AMP compiler.
- // Kannan is working on fixing this and once we have the symbol address and size
- // correct in the metadata then we can change this and it'll work properly for
- // multiple kernels.
- std::string options("");
- std::string fsailString(symInfo.sec_addr, symInfo.sec_addr + symInfo.sec_size);
- currentKernel->setFSAIL(fsailString);
- //currentKernel->compile(options);
-
- }
-
-
- // LHOWES
- // Hack to assume that this is the AMP path for now
- // until we have kernel metadata we need a way to generate the parameter list.
- {
- device::Kernel::parameters_t parameterList;
- // Is AMP code
-
- amd::KernelParameterDescriptor desc;
- desc.name_ = "Functor";
- desc.type_ = T_POINTER;
-
- desc.size_ = sizeof(void*);
- desc.offset_ = 0;
-
- // BKENDALL HACK
- desc.typeName_ = "";
- desc.typeQualifier_ = 0;
- desc.accessQualifier_ = 0;
- desc.addressQualifier_ = 0;
- // !BKENDALL HACK
-
- parameterList.push_back(desc);
- // oclhsa OpenCL integration
- }
-
- hasKernels = true;
- }
-
-
- return hasKernels;
-
-}
- */
-/*
-bool
-ClBinary::clearElfOut()
-{
- // Recreate libelf elf object
- if (!elfOut()->Clear()) {
- return false;
- }
-
- // Need to re-setup target
- return setElfTarget();
-}
-*/
-} // namespace oclhsa
-
-#endif // WITHOUT_FSA_BACKEND
diff --git a/rocclr/runtime/device/hsa/hsabinary.hpp b/rocclr/runtime/device/hsa/hsabinary.hpp
deleted file mode 100644
index 5fa3ab53ba..0000000000
--- a/rocclr/runtime/device/hsa/hsabinary.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-#ifndef HSABINARY_HPP_
-#define HSABINARY_HPP_
-
-#include "top.hpp"
-#include "hsadevice.hpp"
-
-#ifndef WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-
-
-typedef std::map NameKernelMap;
-
-class FSAILProgram;
-
-class ClBinary : public device::ClBinary
-{
-public:
- ClBinary(const Device& dev, BinaryImageFormat bifVer = BIF_VERSION3)
- : device::ClBinary(dev, bifVer)
- {}
-
- //! Destructor
- ~ClBinary() {}
-
-
-protected:
- bool setElfTarget() {
- uint32_t target = static_cast(21);//dev().calTarget());
- assert (((0xFFFF8000 & target) == 0) && "ASIC target ID >= 2^15");
- uint16_t elf_target = (uint16_t)(0x7FFF & target);
- return elfOut()->setTarget(elf_target, amd::OclElf::CAL_PLATFORM);
- return true;
- }
-
-private:
- //! Disable default copy constructor
- ClBinary(const ClBinary&);
-
- //! Disable default operator=
- ClBinary& operator=(const ClBinary&);
-
- //! Returns the HSA device for this object
- const Device& dev() const { return static_cast(dev_); }
-
-};
-
-} // namespace oclhsa
-
-#endif // WITHOUT_FSA_BACKEND
-
-#endif // HSABINARY_HPP_
-
diff --git a/rocclr/runtime/device/hsa/hsablit.cpp b/rocclr/runtime/device/hsa/hsablit.cpp
deleted file mode 100644
index ff7a735534..0000000000
--- a/rocclr/runtime/device/hsa/hsablit.cpp
+++ /dev/null
@@ -1,1838 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#include "platform/commandqueue.hpp"
-#include "device/hsa/hsadevice.hpp"
-#include "device/hsa/hsablit.hpp"
-#include "device/hsa/hsamemory.hpp"
-#include "device/hsa/hsavirtual.hpp"
-#include "device/hsa/oclhsa_common.hpp"
-#include "utils/debug.hpp"
-
-namespace oclhsa {
-HsaBlitManager::HsaBlitManager(device::VirtualDevice& vDev, Setup setup)
- : HostBlitManager(vDev, setup)
-{ }
-
-bool
-HsaBlitManager::readBuffer(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableReadBuffer_ || srcMemory.isHostMemDirectAccess()) {
- return HostBlitManager::readBuffer(
- srcMemory, dstHost, origin, size, entire);
- }
-
- void *src = static_cast(srcMemory).getDeviceMemory();
-
- // Copy memory
- HsaStatus status = hsacoreapi->HsaCopyMemory(
- dstHost, reinterpret_cast(src) + origin[0], size[0]);
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
- return true;
-}
-
-bool
-HsaBlitManager::readBufferRect(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::BufferRect& bufRect,
- const amd::BufferRect& hostRect,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableReadBufferRect_ || srcMemory.isHostMemDirectAccess()) {
- return HostBlitManager::readBufferRect(
- srcMemory, dstHost, bufRect, hostRect, size, entire);
- }
-
- void *src = static_cast(srcMemory).getDeviceMemory();
-
- size_t srcOffset;
- size_t dstOffset;
-
- for (size_t z = 0; z < size[2]; ++z) {
- for (size_t y = 0; y < size[1]; ++y) {
- srcOffset = bufRect.offset(0, y, z);
- dstOffset = hostRect.offset(0, y, z);
-
- // Copy memory line by line
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(dstHost) + dstOffset),
- (reinterpret_cast(src) + srcOffset),
- size[0]);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::readImage(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- size_t rowPitch,
- size_t slicePitch,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Image &image = static_cast(srcMemory);
-
- const uint8_t *src = static_cast(image.getDeviceMemory());
- uint8_t* dst = static_cast(dstHost);
-
- const amd::Coord3D srcOffset = origin;
- const amd::Coord3D dstOffset = amd::Coord3D(0);
-
- size_t srcRowPitch = image.getDeviceRowPitchSize();
- size_t srcSlicePitch = image.getDeviceSlicePitchSize();
-
- size_t elementSize =
- srcMemory.owner()->asImage()->getImageFormat().getElementSize();
- size_t dstRowPitch =
- (rowPitch == 0) ? (size[0] * elementSize) : rowPitch;
- size_t dstSlicePitch =
- (slicePitch == 0) ? (size[1] * dstRowPitch) : slicePitch;
-
- const amd::Coord3D& sizeToCopy = size;
-
- return importExportImage(
- dst, src, dstOffset, dstRowPitch, dstSlicePitch, srcOffset, srcRowPitch,
- srcSlicePitch, sizeToCopy, elementSize);
-}
-
-bool
-HsaBlitManager::writeBuffer(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess()) {
- return HostBlitManager::writeBuffer(
- srcHost, dstMemory, origin, size, entire);
- }
-
- void *dst = static_cast(dstMemory).getDeviceMemory();
-
- // Copy memory
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- reinterpret_cast(dst) + origin[0], srcHost, size[0]);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::writeBufferRect(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::BufferRect& hostRect,
- const amd::BufferRect& bufRect,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess()) {
- return HostBlitManager::writeBufferRect(
- srcHost, dstMemory, hostRect, bufRect, size, entire);
- }
-
- void *dst = static_cast(dstMemory).getDeviceMemory();
-
- size_t srcOffset;
- size_t dstOffset;
-
- for (size_t z = 0; z < size[2]; ++z) {
- for (size_t y = 0; y < size[1]; ++y) {
- srcOffset = hostRect.offset(0, y, z);
- dstOffset = bufRect.offset(0, y, z);
-
- // Copy memory line by line
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(dst) + dstOffset),
- (reinterpret_cast(srcHost) + srcOffset),
- size[0]);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::writeImage(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- size_t rowPitch,
- size_t slicePitch,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Image &image = static_cast(dstMemory);
-
- const uint8_t* src = static_cast(srcHost);
- uint8_t *dst = static_cast(image.getDeviceMemory());
-
- const amd::Coord3D srcOffset = amd::Coord3D(0);
- const amd::Coord3D dstOffset = origin;
-
- size_t elementSize =
- dstMemory.owner()->asImage()->getImageFormat().getElementSize();
- size_t srcRowPitch =
- (rowPitch == 0) ? (size[0] * elementSize) : rowPitch;
- size_t srcSlicePitch =
- (slicePitch == 0) ? (size[1] * srcRowPitch) : slicePitch;
-
- size_t dstRowPitch = image.getDeviceRowPitchSize();
- size_t dstSlicePitch = image.getDeviceSlicePitchSize();
-
- const amd::Coord3D& sizeToCopy = size;
-
- return importExportImage(
- dst, src, dstOffset, dstRowPitch, dstSlicePitch, srcOffset, srcRowPitch,
- srcSlicePitch, sizeToCopy, elementSize);
-}
-
-bool
-HsaBlitManager::copyBuffer(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableCopyBuffer_ ||
- (srcMemory.isHostMemDirectAccess() &&
- dstMemory.isHostMemDirectAccess())) {
- return HostBlitManager::copyBuffer(
- srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
- }
-
- void *src = static_cast(srcMemory).getDeviceMemory();
- void *dst = static_cast(dstMemory).getDeviceMemory();
-
- // Straight forward buffer copy
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(dst) + dstOrigin[0]),
- (reinterpret_cast(src) + srcOrigin[0]),
- size[0]);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::copyBufferRect(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::BufferRect& srcRect,
- const amd::BufferRect& dstRect,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableCopyBuffer_ ||
- (srcMemory.isHostMemDirectAccess() &&
- dstMemory.isHostMemDirectAccess())) {
- return HostBlitManager::copyBufferRect(
- srcMemory, dstMemory, srcRect, dstRect, size, entire);
- }
-
- void *src = static_cast(srcMemory).getDeviceMemory();
- void *dst = static_cast(dstMemory).getDeviceMemory();
-
- for (size_t z = 0; z < size[2]; ++z) {
- for (size_t y = 0; y < size[1]; ++y) {
- size_t srcOffset = srcRect.offset(0, y, z);
- size_t dstOffset = dstRect.offset(0, y, z);
-
- // Copy memory line by line
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(dst) + dstOffset),
- (reinterpret_cast(src) + srcOffset),
- size[0]);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
- }
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::copyImageToBuffer(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire,
- size_t rowPitch,
- size_t slicePitch) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Image& srcImage = static_cast(srcMemory);
- oclhsa::Buffer& destBuff = static_cast(dstMemory);
-
- const uint8_t *src = static_cast(srcImage.getDeviceMemory());
- uint8_t* dst = static_cast(destBuff.getDeviceMemory());
-
- size_t elementSize =
- srcMemory.owner()->asImage()->getImageFormat().getElementSize();
- size_t dstRowPitch = size[0] * elementSize;
- size_t dstSlicePitch = size[1] * dstRowPitch;
-
- size_t srcRowPitch = srcImage.getDeviceRowPitchSize();
- size_t srcSlicePitch = srcImage.getDeviceSlicePitchSize();
-
- return importExportImage(
- dst, src, dstOrigin, dstRowPitch, dstSlicePitch, srcOrigin, srcRowPitch,
- srcSlicePitch, size, elementSize);
-}
-
-bool
-HsaBlitManager::copyBufferToImage(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire,
- size_t rowPitch,
- size_t slicePitch) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Buffer& srcBuff = static_cast(srcMemory);
- oclhsa::Image& dstImage = static_cast(dstMemory);
-
- const uint8_t *src = static_cast(srcBuff.getDeviceMemory());
- uint8_t* dst = static_cast(dstImage.getDeviceMemory());
-
- size_t elementSize =
- dstMemory.owner()->asImage()->getImageFormat().getElementSize();
- size_t srcRowPitch = size[0] * elementSize;
- size_t srcSlicePitch = size[1] * srcRowPitch;
-
- size_t dstRowPitch = dstImage.getDeviceRowPitchSize();
- size_t dstSlicePitch = dstImage.getDeviceSlicePitchSize();
-
- return importExportImage(
- dst, src, dstOrigin, dstRowPitch, dstSlicePitch, srcOrigin, srcRowPitch,
- srcSlicePitch, size, elementSize);
-}
-
-bool
-HsaBlitManager::copyImage(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Image& srcImage = static_cast(srcMemory);
- oclhsa::Image& destImage = static_cast(dstMemory);
-
- const uint8_t *src = static_cast(srcImage.getDeviceMemory());
- uint8_t* dst = static_cast(destImage.getDeviceMemory());
-
- size_t srcRowPitch = srcImage.getDeviceRowPitchSize();
- size_t srcSlicePitch = srcImage.getDeviceSlicePitchSize();
-
- size_t dstRowPitch = destImage.getDeviceRowPitchSize();
- size_t dstSlicePitch = destImage.getDeviceSlicePitchSize();
-
- size_t elementSize =
- srcMemory.owner()->asImage()->getImageFormat().getElementSize();
-
- return importExportImage(
- dst, src, dstOrigin, dstRowPitch, dstSlicePitch, srcOrigin, srcRowPitch,
- srcSlicePitch, size, elementSize);
-}
-
-bool
-HsaBlitManager::fillBuffer(
- device::Memory& memory,
- const void* pattern,
- size_t patternSize,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire
- ) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- if (setup_.disableFillBuffer_ || memory.isHostMemDirectAccess()) {
- return HostBlitManager::fillBuffer(memory, pattern, patternSize,
- origin, size, entire);
- }
-
- void *fillMem = static_cast(memory).getDeviceMemory();
-
- size_t offset = origin[0];
- size_t fillSize = size[0];
-
- if ((fillSize % patternSize) != 0) {
- LogError("Misaligned buffer size and pattern size!");
- }
-
- // Fill the buffer memory with a pattern
- for (size_t i = 0; i < (fillSize / patternSize); i++) {
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(fillMem) + offset),
- (reinterpret_cast(pattern)),
- patternSize);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
-
- offset += patternSize;
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::fillImage(
- device::Memory& memory,
- const void* pattern,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire
- ) const
-{
- // Wait on the last outstanding kernel.
- gpu().releaseGpuMemoryFence();
-
- oclhsa::Image& image = static_cast(memory);
-
- void *fillMem = image.getDeviceMemory();
-
- size_t elementSize =
- memory.owner()->asImage()->getImageFormat().getElementSize();
-
- float fillValue[4];
- memset(fillValue, 0, sizeof(fillValue));
- memory.owner()->asImage()->getImageFormat().formatColor(
- pattern, fillValue);
-
- size_t rowPitchSize = image.getDeviceRowPitchSize();
- size_t slicePitchSize = image.getDeviceSlicePitchSize();
-
- size_t offset = origin[0] * elementSize;
-
- // Adjust offset with Y dimension
- offset += rowPitchSize * origin[1];
-
- // Adjust offset with Z dimension
- offset += slicePitchSize * origin[2];
-
- size_t offsetOrg = offset;
-
- // Fill the image memory with a pattern
- for (size_t slice = 0; slice < size[2]; ++slice) {
- offset = offsetOrg + slice * slicePitchSize;
-
- for (size_t rows = 0; rows < size[1]; ++rows) {
- size_t pixOffset = offset;
-
- // Copy memory pixel by pixel
- for (size_t column = 0; column < size[0]; ++column) {
- HsaStatus status =
- hsacoreapi->HsaCopyMemory(
- (reinterpret_cast(fillMem) + pixOffset),
- (reinterpret_cast(fillValue)),
- elementSize);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA buffer failed with code %d", status);
- return false;
- }
-
- pixOffset += elementSize;
- }
-
- offset += rowPitchSize;
- }
- }
-
- return true;
-}
-
-bool
-HsaBlitManager::importExportImage(
- uint8_t* dst,
- const uint8_t* src,
- const amd::Coord3D& dstOffset,
- size_t dstRowPitch,
- size_t dstSlicePitch,
- const amd::Coord3D& srcOffset,
- size_t srcRowPitch,
- size_t srcSlicePitch,
- const amd::Coord3D& sizeToCopy,
- size_t elementSize) const
-{
- for (size_t zDim = 0; zDim < sizeToCopy[2]; ++zDim) {
- for (size_t yDim = 0; yDim < sizeToCopy[1]; ++yDim) {
- size_t srcImgOffset =
- srcOffset[0] * elementSize + (srcOffset[1] + yDim) * srcRowPitch +
- (srcOffset[2] + zDim) * srcSlicePitch;
- size_t dstImgOffset =
- dstOffset[0] * elementSize + (dstOffset[1] + yDim) * dstRowPitch +
- (dstOffset[2] + zDim) * dstSlicePitch;
- HsaStatus status = hsacoreapi->HsaCopyMemory(
- dst + dstImgOffset, src + srcImgOffset, sizeToCopy[0]*elementSize);
-
- if (status != kHsaStatusSuccess) {
- LogPrintfError("DMA import/export image failed with code %d", status);
- return false;
- }
- }
- }
-
- return true;
-}
-
-static void
-CalcRowSlicePitches(
- cl_ulong* pitch, const cl_int* copySize,
- size_t rowPitch, size_t slicePitch, const Memory& mem)
-{
- const oclhsa::Image &hsaImage = static_cast< const oclhsa::Image &>(mem);
- bool img1Darray =
- (mem.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? true : false;
- size_t memFmtSize = mem.owner()->asImage()->getImageFormat().getElementSize();
-
- if (rowPitch == 0) {
- pitch[0] = copySize[0];
- }
- else {
- pitch[0] = rowPitch / memFmtSize;
- }
- if (slicePitch == 0) {
- pitch[1] = pitch[0] * (img1Darray ? 1 : copySize[1]);
- }
- else {
- pitch[1] = slicePitch / memFmtSize;
- }
- assert((pitch[0] <= pitch[1]) && "rowPitch must be <= slicePitch");
-
- if (img1Darray) {
- // For 1D array rowRitch = slicePitch
- pitch[0] = pitch[1];
- }
-}
-
-KernelBlitManager::KernelBlitManager(device::VirtualDevice& vDev, Setup setup)
- : HsaBlitManager(vDev, setup),
- context_(NULL),
- program_(NULL)
-{
- for (uint i = 0; i < BlitTotal; ++i) {
- kernels_[i] = NULL;
- }
-}
-
-KernelBlitManager::~KernelBlitManager()
-{
- for (uint i = 0; i < BlitTotal; ++i) {
- if (NULL != kernels_[i]) {
- kernels_[i]->release();
- }
- }
-
- if (NULL != program_) {
- program_->release();
- }
-
- if (NULL != context_) {
- // Release a dummy context
- context_->release();
- }
-}
-
-bool
-KernelBlitManager::readBuffer(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire) const
-{
- if (setup_.disableReadBuffer_ || srcMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::readBuffer(srcMemory, dstHost, origin,
- size, entire);
- }
-
- amd::Buffer *dstMemory = new (*context_) amd::Buffer(
- *context_, CL_MEM_USE_HOST_PTR, size[0]);
-
- if (!dstMemory->create(const_cast(dstHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devDstMemory = dstMemory->getDeviceMemory(dev_);
- if (devDstMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result = copyBuffer(
- srcMemory, *devDstMemory, origin, amd::Coord3D(0), size, entire);
-
- // Wait for the transfer to finish so that we could safely release the
- // destination memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- dstMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::readBufferRect(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::BufferRect& bufRect,
- const amd::BufferRect& hostRect,
- const amd::Coord3D& size,
- bool entire) const
-{
- if (setup_.disableReadBufferRect_ || srcMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::readBufferRect(
- srcMemory, dstHost, bufRect, hostRect, size, entire);
- }
-
- size_t dstSize = hostRect.start_ + hostRect.end_;
- amd::Buffer *dstMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, dstSize);
-
- if (!dstMemory->create(const_cast(dstHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devDstMemory = dstMemory->getDeviceMemory(dev_);
- if (devDstMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result = copyBufferRect(
- srcMemory, *devDstMemory, bufRect, hostRect, size, entire);
-
- // Wait for the transfer to finish so that we could safely release the
- // destination memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- dstMemory->release();
-
- return result;
-}
-
-void
-FindLinearSize(
- size_t& linearSize, const amd::Coord3D& size,
- size_t& rowPitch, size_t& slicePitch, const device::Memory& mem)
-{
- const oclhsa::Image &image = static_cast(mem);
- size_t elementSize = mem.owner()->asImage()->getImageFormat().getElementSize();
-
- linearSize = size[0] * elementSize;
- if ((rowPitch == 0) || (rowPitch == linearSize)) {
- rowPitch = 0;
- }
- else {
- linearSize = rowPitch;
- }
-
- // Calculate the pin size, which should be equal to the copy size
- for (uint i = 1; i < mem.owner()->asImage()->getDims(); ++i) {
- linearSize *= size[i];
- if (i == 1) {
- if ((slicePitch == 0) || (slicePitch == linearSize)) {
- slicePitch = 0;
- }
- else {
- if (mem.owner()->getType() != CL_MEM_OBJECT_IMAGE1D_ARRAY) {
- linearSize = slicePitch;
- }
- else {
- linearSize = slicePitch * size[i];
- }
- }
- }
- }
-}
-
-// The following data structures will be used for the view creations.
-// Some formats has to be converted before a kernel blit operation
-struct FormatConvertion {
- cl_uint clOldType_;
- cl_uint clNewType_;
-};
-
-// The list of rejected data formats and corresponding conversion
-static const FormatConvertion RejectedData[] =
-{
- { CL_UNORM_INT8, CL_UNSIGNED_INT8 },
- { CL_UNORM_INT16, CL_UNSIGNED_INT16 },
- { CL_SNORM_INT8, CL_UNSIGNED_INT8 },
- { CL_SNORM_INT16, CL_UNSIGNED_INT16 },
- { CL_HALF_FLOAT, CL_UNSIGNED_INT16 },
- { CL_FLOAT, CL_UNSIGNED_INT32 },
- { CL_SIGNED_INT8, CL_UNSIGNED_INT8 },
- { CL_SIGNED_INT16, CL_UNSIGNED_INT16 },
- { CL_SIGNED_INT32, CL_UNSIGNED_INT32 }
-};
-
-// The list of rejected channel's order and corresponding conversion
-static const FormatConvertion RejectedOrder[] =
-{
- { CL_A, CL_R },
- { CL_RA, CL_RG },
- { CL_LUMINANCE, CL_R },
- { CL_INTENSITY, CL_R },
- { CL_BGRA, CL_RGBA },
- { CL_ARGB, CL_RGBA }
-};
-
-const uint RejectedFormatDataTotal =
- sizeof(RejectedData) / sizeof(FormatConvertion);
-const uint RejectedFormatChannelTotal =
- sizeof(RejectedOrder) / sizeof(FormatConvertion);
-
-amd::Image::Format
-KernelBlitManager::filterFormat(amd::Image::Format oldFormat) const
-{
- cl_image_format newFormat;
- newFormat.image_channel_data_type = oldFormat.image_channel_data_type;
- newFormat.image_channel_order = oldFormat.image_channel_order;
-
- // Find unsupported formats
- for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
- if (RejectedData[i].clOldType_ == oldFormat.image_channel_data_type) {
- newFormat.image_channel_data_type = RejectedData[i].clNewType_;
- break;
- }
- }
-
- // Find unsupported channel's order
- for (uint i = 0; i < RejectedFormatChannelTotal; ++i) {
- if (RejectedOrder[i].clOldType_ == oldFormat.image_channel_order) {
- newFormat.image_channel_order = RejectedOrder[i].clNewType_;
- break;
- }
- }
-
- return amd::Image::Format(newFormat);
-}
-
-device::Memory *
-KernelBlitManager::createImageView(
- device::Memory &parent,
- amd::Image::Format newFormat) const
-{
- amd::Image *image =
- parent.owner()->asImage()->createView(parent.owner()->getContext(), newFormat, &gpu());
-
- if (image == NULL) {
- LogError("[OCL] Fail to allocate view of image object");
- return NULL;
- }
-
- Image* devImage = new oclhsa::Image(static_cast(dev_), *image);
- if (devImage == NULL) {
- LogError("[OCL] Fail to allocate device mem object for the view");
- image->release();
- return NULL;
- }
-
- if (!devImage->createView(static_cast(parent))) {
- LogError("[OCL] Fail to create device mem object for the view");
- delete devImage;
- image->release();
- return NULL;
- }
-
- image->replaceDeviceMemory(&dev_, devImage);
-
- return devImage;
-}
-
-bool
-KernelBlitManager::readImage(
- device::Memory& srcMemory,
- void* dstHost,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- size_t rowPitch,
- size_t slicePitch,
- bool entire) const
-{
- if (setup_.disableReadImage_ || srcMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::readImage(
- srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
- }
-
- size_t linearSize = 0;
- FindLinearSize(linearSize, size, rowPitch, slicePitch, srcMemory);
- amd::Buffer *dstMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, linearSize);
-
- if (!dstMemory->create(const_cast(dstHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devDstMemory = dstMemory->getDeviceMemory(dev_);
- if (devDstMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result = copyImageToBuffer(
- srcMemory, *devDstMemory, origin, amd::Coord3D(0), size, entire, rowPitch,
- slicePitch);
-
- // Wait for the transfer to finish so that we could safely release the
- // destination memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- dstMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::writeBuffer(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire) const
-{
- if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::writeBuffer(srcHost, dstMemory, origin, size,
- entire);
- }
-
- amd::Buffer *srcMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, size[0]);
-
- if (!srcMemory->create(const_cast(srcHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devSrcMemory = srcMemory->getDeviceMemory(dev_);
- if (devSrcMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result =
- copyBuffer(*devSrcMemory, dstMemory, amd::Coord3D(0), origin, size, entire);
-
- // Wait for the transfer to finish so that we could safely release the
- // source memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- srcMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::writeBufferRect(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::BufferRect& hostRect,
- const amd::BufferRect& bufRect,
- const amd::Coord3D& size,
- bool entire) const
-{
- if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::writeBufferRect(
- srcHost, dstMemory, hostRect, bufRect, size, entire);
- }
-
- size_t srcSize = hostRect.start_ + hostRect.end_;
- amd::Buffer *srcMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, srcSize);
-
- if (!srcMemory->create(const_cast(srcHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devSrcMemory = srcMemory->getDeviceMemory(dev_);
- if (devSrcMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result = copyBufferRect(
- *devSrcMemory, dstMemory, hostRect, bufRect, size, entire);
-
- // Wait for the transfer to finish so that we could safely release the
- // destination memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- srcMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::writeImage(
- const void* srcHost,
- device::Memory& dstMemory,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- size_t rowPitch,
- size_t slicePitch,
- bool entire) const
-{
- if (setup_.disableWriteImage_ || dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::writeImage(
- srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
- }
-
- size_t linearSize = 0;
- FindLinearSize(linearSize, size, rowPitch, slicePitch, dstMemory);
- amd::Buffer *srcMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, linearSize);
-
- if (!srcMemory->create(const_cast(srcHost))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- device::Memory *devSrcMemory = srcMemory->getDeviceMemory(dev_);
- if (devSrcMemory== NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- bool result = copyBufferToImage(
- *devSrcMemory, dstMemory, amd::Coord3D(0), origin, size, entire,
- rowPitch, slicePitch);
-
- // Wait for the transfer to finish so that we could safely release the
- // destination memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- srcMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::copyBuffer(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& sizeIn,
- bool entire) const
-{
- if (setup_.disableCopyBuffer_ ||
- srcMemory.isHostMemDirectAccess() ||
- dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::copyBuffer(
- srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
- }
-
- uint blitType = BlitCopyBuffer;
- size_t dim = 1;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize = 0;
- size_t localWorkSize = 0;
-
- const static uint CopyBuffAlignment[3] = { 16, 4, 1 };
- amd::Coord3D size(sizeIn[0], sizeIn[1], sizeIn[2]);
-
- bool aligned;
- uint i;
- for (i = 0; i < 3; ++i) {
- // Check source alignments
- aligned = ((srcOrigin[0] % CopyBuffAlignment[i]) == 0);
- // Check destination alignments
- aligned &= ((dstOrigin[0] % CopyBuffAlignment[i]) == 0);
- // Check copy size alignment in the first dimension
- aligned &= ((sizeIn[0] % CopyBuffAlignment[i]) == 0);
-
- if (aligned) {
- if (CopyBuffAlignment[i] != 1) {
- blitType = BlitCopyBufferAligned;
- }
- break;
- }
- }
-
- cl_uint remain;
- if (blitType == BlitCopyBufferAligned) {
- size.c[0] /= CopyBuffAlignment[i];
- }
- else {
- remain = size[0] % 4;
- size.c[0] /= 4;
- size.c[0] += 1;
- }
-
- // Program the dispatch dimensions
- localWorkSize = 256;
- globalWorkSize = amd::alignUp(size[0] , 256);
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(srcMemory.owner()));
- kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
- clmem = ((cl_mem) as_cl(dstMemory.owner()));
- kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
- // Program source origin
- cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
- kernels_[blitType]->parameters().set(2, sizeof(srcOffset), &srcOffset);
-
- // Program destinaiton origin
- cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
- kernels_[blitType]->parameters().set(3, sizeof(dstOffset), &dstOffset);
-
- cl_ulong copySize = size[0];
- kernels_[blitType]->parameters().set(4, sizeof(copySize), ©Size);
-
- if (blitType == BlitCopyBufferAligned) {
- cl_int alignment = CopyBuffAlignment[i];
- kernels_[blitType]->parameters().set(5, sizeof(alignment), &alignment);
- }
- else {
- kernels_[blitType]->parameters().set(5, sizeof(remain), &remain);
- }
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(
- 1, globalWorkOffset, &globalWorkSize, &localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[blitType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[blitType], parameters, NULL);
- kernels_[blitType]->parameters().release(const_cast(parameters), dev_);
- return result;
-}
-
-bool
-KernelBlitManager::copyBufferRect(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::BufferRect& srcRectIn,
- const amd::BufferRect& dstRectIn,
- const amd::Coord3D& sizeIn,
- bool entire) const
-{
- if (setup_.disableCopyBuffer_ ||
- (srcMemory.isHostMemDirectAccess() && dstMemory.isHostMemDirectAccess())) {
- return HsaBlitManager::copyBufferRect(
- srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire);
- }
-
- uint blitType = BlitCopyBufferRect;
- size_t dim = 3;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize[3];
- size_t localWorkSize[3];
-
- const static uint CopyRectAlignment[3] = { 16, 4, 1 };
-
- bool aligned;
- uint i;
- for (i = 0; i < sizeof(CopyRectAlignment) / sizeof(uint); i++) {
- // Check source alignments
- aligned = ((srcRectIn.rowPitch_ % CopyRectAlignment[i]) == 0);
- aligned &= ((srcRectIn.slicePitch_ % CopyRectAlignment[i]) == 0);
- aligned &= ((srcRectIn.start_ % CopyRectAlignment[i]) == 0);
-
- // Check destination alignments
- aligned &= ((dstRectIn.rowPitch_ % CopyRectAlignment[i]) == 0);
- aligned &= ((dstRectIn.slicePitch_ % CopyRectAlignment[i]) == 0);
- aligned &= ((dstRectIn.start_ % CopyRectAlignment[i]) == 0);
-
- // Check copy size alignment in the first dimension
- aligned &= ((sizeIn[0] % CopyRectAlignment[i]) == 0);
-
- if (aligned) {
- if (CopyRectAlignment[i] != 1) {
- blitType = BlitCopyBufferRectAligned;
- }
- break;
- }
- }
-
- amd::BufferRect srcRect;
- amd::BufferRect dstRect;
- amd::Coord3D size(sizeIn[0], sizeIn[1], sizeIn[2]);
-
- srcRect.rowPitch_ = srcRectIn.rowPitch_ / CopyRectAlignment[i];
- srcRect.slicePitch_ = srcRectIn.slicePitch_ / CopyRectAlignment[i];
- srcRect.start_ = srcRectIn.start_ / CopyRectAlignment[i];
- srcRect.end_ = srcRectIn.end_ / CopyRectAlignment[i];
-
- dstRect.rowPitch_ = dstRectIn.rowPitch_ / CopyRectAlignment[i];
- dstRect.slicePitch_ = dstRectIn.slicePitch_ / CopyRectAlignment[i];
- dstRect.start_ = dstRectIn.start_ / CopyRectAlignment[i];
- dstRect.end_ = dstRectIn.end_ / CopyRectAlignment[i];
-
- size.c[0] /= CopyRectAlignment[i];
-
- // Program the kernel's workload depending on the transfer dimensions
- if ((size[1] == 1) && (size[2] == 1)) {
- globalWorkSize[0] = amd::alignUp(size[0], 256);
- globalWorkSize[1] = 1;
- globalWorkSize[2] = 1;
- localWorkSize[0] = 256;
- localWorkSize[1] = 1;
- localWorkSize[2] = 1;
- }
- else if (size[2] == 1) {
- globalWorkSize[0] = amd::alignUp(size[0], 16);
- globalWorkSize[1] = amd::alignUp(size[1], 16);
- globalWorkSize[2] = 1;
- localWorkSize[0] = localWorkSize[1] = 16;
- localWorkSize[2] = 1;
- }
- else {
- globalWorkSize[0] = amd::alignUp(size[0], 8);
- globalWorkSize[1] = amd::alignUp(size[1], 8);
- globalWorkSize[2] = amd::alignUp(size[2], 4);
- localWorkSize[0] = localWorkSize[1] = 8;
- localWorkSize[2] = 4;
- }
-
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(srcMemory.owner()));
- kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
- clmem = ((cl_mem) as_cl(dstMemory.owner()));
- kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
- cl_ulong src[4] = { srcRect.rowPitch_,
- srcRect.slicePitch_,
- srcRect.start_, 0 };
- kernels_[blitType]->parameters().set(2, sizeof(src), src);
- cl_ulong dst[4] = { dstRect.rowPitch_,
- dstRect.slicePitch_,
- dstRect.start_, 0 };
- kernels_[blitType]->parameters().set(3, sizeof(dst), dst);
- cl_ulong copySize[4] = { size[0],
- size[1],
- size[2],
- CopyRectAlignment[i] };
- kernels_[blitType]->parameters().set(4, sizeof(copySize), copySize);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(dim,
- globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[blitType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[blitType], parameters, NULL);
- kernels_[blitType]->parameters().release(const_cast(parameters), dev_);
- return result;
-}
-
-bool
-KernelBlitManager::copyImageToBuffer(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire,
- size_t rowPitch,
- size_t slicePitch) const
-{
- if (srcMemory.isHostMemDirectAccess() && dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::copyImageToBuffer(
- srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
- rowPitch, slicePitch);
- }
-
- amd::Image::Format oldFormat = srcMemory.owner()->asImage()->getImageFormat();
- amd::Image::Format newFormat = filterFormat(oldFormat);
- bool useView = false;
-
- device::Memory *srcView = &srcMemory;
- if (oldFormat != newFormat) {
- srcView = createImageView(srcMemory, newFormat);
- useView = true;
- }
-
- oclhsa::Image &srcImage = static_cast(*srcView);
-
- amd::Image * image = srcImage.owner()->asImage();
- uint blitType = 0;
- blitType = BlitCopyImageToBuffer;
- size_t dim = 0;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize[3];
- size_t localWorkSize[3];
-
- // Program the kernels workload depending on the blit dimensions
- const size_t imageDims = srcImage.owner()->asImage()->getDims();
- dim = 3;
- // Find the current blit type
- if (imageDims == 1) {
- globalWorkSize[0] = amd::alignUp(size[0], 256);
- globalWorkSize[1] = amd::alignUp(size[1], 1);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = 256;
- localWorkSize[1] = localWorkSize[2] = 1;
- }
- else if (imageDims == 2) {
- globalWorkSize[0] = amd::alignUp(size[0], 16);
- globalWorkSize[1] = amd::alignUp(size[1], 16);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = localWorkSize[1] = 16;
- localWorkSize[2] = 1;
- }
- else {
- globalWorkSize[0] = amd::alignUp(size[0], 8);
- globalWorkSize[1] = amd::alignUp(size[1], 8);
- globalWorkSize[2] = amd::alignUp(size[2], 4);
- localWorkSize[0] = localWorkSize[1] = 8;
- localWorkSize[2] = 4;
- }
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(srcImage.owner()));
- kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
- clmem = ((cl_mem) as_cl(dstMemory.owner()));
- kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
-
- // Update extra paramters for USHORT and UBYTE pointers.
- // Only then compiler can optimize the kernel to use
- // UAV Raw for other writes
- kernels_[blitType]->parameters().set(2, sizeof(cl_mem), &clmem);
- kernels_[blitType]->parameters().set(3, sizeof(cl_mem), &clmem);
-
- cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
- (cl_int)srcOrigin[1],
- (cl_int)srcOrigin[2], 0 };
- cl_int copySize[4] = { (cl_int)size[0],
- (cl_int)size[1],
- (cl_int)size[2], 0 };
-
- kernels_[blitType]->parameters().set(4, sizeof(srcOrg), srcOrg);
-
- const size_t elementSize =
- srcImage.owner()->asImage()->getImageFormat().getElementSize();
- const size_t numChannels =
- srcImage.owner()->asImage()->getImageFormat().getNumChannels();
-
- // 1 element granularity for writes by default
- cl_int granularity = 1;
- if (elementSize == 2) {
- granularity = 2;
- }
- else if (elementSize >= 4) {
- granularity = 4;
- }
- CondLog(((dstOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
- cl_ulong dstOrg[4] = { dstOrigin[0] / granularity,
- dstOrigin[1],
- dstOrigin[2],
- 0 };
- kernels_[blitType]->parameters().set(5, sizeof(dstOrg), dstOrg);
- kernels_[blitType]->parameters().set(6, sizeof(copySize), copySize);
-
- // Program memory format
- uint multiplier = elementSize / sizeof(uint32_t);
- multiplier = (multiplier == 0) ? 1 : multiplier;
- cl_uint format[4] = { (cl_uint)numChannels,
- (cl_uint)(elementSize / numChannels),
- multiplier, 0 };
- kernels_[blitType]->parameters().set(7, sizeof(format), format);
-
- // Program row and slice pitches
- cl_ulong pitch[4] = { 0 };
- CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, srcImage);
- kernels_[blitType]->parameters().set(8, sizeof(pitch), pitch);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(dim,
- globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[blitType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[blitType], parameters, NULL);
- kernels_[blitType]->parameters().release(const_cast(parameters), dev_);
-
- if (useView) {
- srcView->owner()->release();
- }
-
- return result;
-}
-
-bool
-KernelBlitManager::copyBufferToImage(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire,
- size_t rowPitch,
- size_t slicePitch) const
-{
- if (srcMemory.isHostMemDirectAccess() && dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::copyBufferToImage(
- srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
- rowPitch, slicePitch);
- }
-
- amd::Image::Format oldFormat = dstMemory.owner()->asImage()->getImageFormat();
- amd::Image::Format newFormat = filterFormat(oldFormat);
- bool useView = false;
-
- device::Memory *dstView = &dstMemory;
- if (oldFormat != newFormat) {
- dstView = createImageView(dstMemory, newFormat);
- useView = true;
- }
-
- oclhsa::Image &dstImage = static_cast(*dstView);
-
- // Use a common blit type with three dimensions by default
- uint blitType = BlitCopyBufferToImage;
- size_t dim = 0;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize[3];
- size_t localWorkSize[3];
-
- // Program the kernels workload depending on the blit dimensions
- const size_t imageDims = dstImage.owner()->asImage()->getDims();
- dim = 3;
- if (imageDims == 1) {
- globalWorkSize[0] = amd::alignUp(size[0], 256);
- globalWorkSize[1] = amd::alignUp(size[1], 1);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = 256;
- localWorkSize[1] = localWorkSize[2] = 1;
- }
- else if (imageDims == 2) {
- globalWorkSize[0] = amd::alignUp(size[0], 16);
- globalWorkSize[1] = amd::alignUp(size[1], 16);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = localWorkSize[1] = 16;
- localWorkSize[2] = 1;
- }
- else {
- globalWorkSize[0] = amd::alignUp(size[0], 8);
- globalWorkSize[1] = amd::alignUp(size[1], 8);
- globalWorkSize[2] = amd::alignUp(size[2], 4);
- localWorkSize[0] = localWorkSize[1] = 8;
- localWorkSize[2] = 4;
- }
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(srcMemory.owner()));
- kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
- clmem = ((cl_mem) as_cl(dstImage.owner()));
- kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
-
- const size_t elementSize =
- dstImage.owner()->asImage()->getImageFormat().getElementSize();
- const size_t numChannels =
- dstImage.owner()->asImage()->getImageFormat().getNumChannels();
-
- // 1 element granularity for writes by default
- cl_int granularity = 1;
- if (elementSize == 2) {
- granularity = 2;
- }
- else if (elementSize >= 4) {
- granularity = 4;
- }
- CondLog(((srcOrigin[0] % granularity) != 0), "Unaligned offset in blit!");
- cl_ulong srcOrg[4] = { srcOrigin[0] / granularity,
- srcOrigin[1],
- srcOrigin[2], 0 };
- kernels_[blitType]->parameters().set(2, sizeof(srcOrg), srcOrg);
-
- cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
- (cl_int)dstOrigin[1],
- (cl_int)dstOrigin[2], 0 };
- cl_int copySize[4] = { (cl_int)size[0],
- (cl_int)size[1],
- (cl_int)size[2], 0 };
-
- kernels_[blitType]->parameters().set(3, sizeof(dstOrg), dstOrg);
- kernels_[blitType]->parameters().set(4, sizeof(copySize), copySize);
-
- // Program memory format
- uint multiplier = elementSize / sizeof(uint32_t);
- multiplier = (multiplier == 0) ? 1 : multiplier;
- cl_uint format[4] = { (cl_uint)numChannels,
- (cl_uint)(elementSize / numChannels),
- multiplier, 0 };
- kernels_[blitType]->parameters().set(5, sizeof(format), format);
-
- // Program row and slice pitches
- cl_ulong pitch[4] = { 0 };
- CalcRowSlicePitches(pitch, copySize, rowPitch, slicePitch, dstImage);
- kernels_[blitType]->parameters().set(6, sizeof(pitch), pitch);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(dim,
- globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[blitType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[blitType], parameters, NULL);
- kernels_[blitType]->parameters().release(const_cast(parameters), dev_);
-
- if (useView) {
- dstView->owner()->release();
- }
-
- return result;
-}
-
-bool
-KernelBlitManager::copyImage(
- device::Memory& srcMemory,
- device::Memory& dstMemory,
- const amd::Coord3D& srcOrigin,
- const amd::Coord3D& dstOrigin,
- const amd::Coord3D& size,
- bool entire) const
-{
- if (srcMemory.isHostMemDirectAccess() &&
- dstMemory.isHostMemDirectAccess()) {
- return HsaBlitManager::copyImage(
- srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
- }
-
- amd::Image::Format srcOldFormat = srcMemory.owner()->asImage()->getImageFormat();
- amd::Image::Format srcNewFormat = filterFormat(srcOldFormat);
- bool useSrcView = false;
-
- device::Memory *srcView = &srcMemory;
- if (srcOldFormat != srcNewFormat) {
- srcView = createImageView(srcMemory, srcNewFormat);
- useSrcView = true;
- }
-
- oclhsa::Image &srcImage = static_cast(*srcView);
-
- amd::Image::Format dstOldFormat = srcMemory.owner()->asImage()->getImageFormat();
- amd::Image::Format dstNewFormat = filterFormat(dstOldFormat);
- bool useDstView = false;
-
- device::Memory *dstView = &dstMemory;
- if (dstOldFormat != dstNewFormat) {
- dstView = createImageView(dstMemory, dstNewFormat);
- useDstView = true;
- }
-
- oclhsa::Image &dstImage = static_cast(*dstView);
-
- uint blitType = BlitCopyImage;
- size_t dim = 0;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize[3];
- size_t localWorkSize[3];
-
- // Program the kernels workload depending on the blit dimensions
- dim = 3;
- // Find the current blit type
- const size_t srcDimSize = srcImage.owner()->asImage()->getDims();
- const size_t dstDimSize = dstImage.owner()->asImage()->getDims();
- if ((srcDimSize == 1) ||
- (dstDimSize == 1)) {
- globalWorkSize[0] = amd::alignUp(size[0], 256);
- globalWorkSize[1] = amd::alignUp(size[1], 1);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = 256;
- localWorkSize[1] = localWorkSize[2] = 1;
- }
- else if ((srcDimSize == 2) ||
- (dstDimSize == 2)) {
- globalWorkSize[0] = amd::alignUp(size[0], 16);
- globalWorkSize[1] = amd::alignUp(size[1], 16);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = localWorkSize[1] = 16;
- localWorkSize[2] = 1;
- }
- else {
- globalWorkSize[0] = amd::alignUp(size[0], 8);
- globalWorkSize[1] = amd::alignUp(size[1], 8);
- globalWorkSize[2] = amd::alignUp(size[2], 4);
- localWorkSize[0] = localWorkSize[1] = 8;
- localWorkSize[2] = 4;
- }
-
- // The current OpenCL spec allows "copy images from a 1D image
- // array object to a 1D image array object" only.
- if ((srcImage.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) ||
- (dstImage.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY)) {
- blitType = BlitCopyImage1DA;
- }
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(srcImage.owner()));
- kernels_[blitType]->parameters().set(0, sizeof(cl_mem), &clmem);
- clmem = ((cl_mem) as_cl(dstImage.owner()));
- kernels_[blitType]->parameters().set(1, sizeof(cl_mem), &clmem);
-
- // Program source origin
- cl_int srcOrg[4] = { (cl_int)srcOrigin[0],
- (cl_int)srcOrigin[1],
- (cl_int)srcOrigin[2], 0 };
-
- kernels_[blitType]->parameters().set(2, sizeof(srcOrg), srcOrg);
-
- // Program destination origin
- cl_int dstOrg[4] = { (cl_int)dstOrigin[0],
- (cl_int)dstOrigin[1],
- (cl_int)dstOrigin[2], 0 };
- kernels_[blitType]->parameters().set(3, sizeof(dstOrg), dstOrg);
-
- cl_int copySize[4] = { (cl_int)size[0],
- (cl_int)size[1],
- (cl_int)size[2], 0 };
- kernels_[blitType]->parameters().set(4, sizeof(copySize), copySize);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(
- dim, globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[blitType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[blitType], parameters, NULL);
- kernels_[blitType]->parameters().release(const_cast(parameters), dev_);
-
- if (useSrcView) {
- srcView->owner()->release();
- }
-
- if (useDstView) {
- dstView->owner()->release();
- }
-
- return result;
-}
-
-bool
-KernelBlitManager::fillBuffer(
- device::Memory& memory,
- const void* pattern,
- size_t patternSize,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire
- ) const
-{
- if (setup_.disableFillBuffer_ || memory.isHostMemDirectAccess()) {
- return HsaBlitManager::fillBuffer(
- memory, pattern, patternSize, origin, size, entire);
- }
-
- uint fillType = FillBuffer;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- cl_ulong fillSize = size[0] / patternSize;
- size_t globalWorkSize = amd::alignUp(fillSize, 256);
- size_t localWorkSize = 256;
- bool dwordAligned =
- ((patternSize % sizeof(uint32_t)) == 0) ? true : false;
-
- // Program kernels arguments for the fill operation
- if (dwordAligned) {
- kernels_[fillType]->parameters().set(0, sizeof(cl_mem), NULL);
- cl_mem clmem = ((cl_mem) as_cl(memory.owner()));
- kernels_[fillType]->parameters().set(1, sizeof(cl_mem), &clmem);
- }
- else {
- cl_mem clmem = ((cl_mem) as_cl(memory.owner()));
- kernels_[fillType]->parameters().set(0, sizeof(cl_mem), &clmem);
- kernels_[fillType]->parameters().set(1, sizeof(cl_mem), NULL);
- }
-
- amd::Buffer *fillMemory =
- new (*context_) amd::Buffer(*context_, CL_MEM_USE_HOST_PTR, patternSize);
-
- if (!fillMemory->create(const_cast(pattern))) {
- LogError("[OCL] Fail to create mem object for destination");
- return false;
- }
-
- if (fillMemory->getDeviceMemory(dev_) == NULL) {
- LogError("[OCL] Fail to create device mem object for destination");
- return false;
- }
-
- cl_mem clmem = ((cl_mem) as_cl(fillMemory));
- kernels_[fillType]->parameters().set(2, sizeof(cl_mem), &clmem);
- cl_ulong offset = origin[0];
- if (dwordAligned) {
- patternSize /= sizeof(uint32_t);
- offset /= sizeof(uint32_t);
- }
- kernels_[fillType]->parameters().set(3, sizeof(cl_uint), &patternSize);
- kernels_[fillType]->parameters().set(4, sizeof(offset), &offset);
- kernels_[fillType]->parameters().set(5, sizeof(fillSize), &fillSize);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(1,
- globalWorkOffset, &globalWorkSize, &localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[fillType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[fillType], parameters, NULL);
- kernels_[fillType]->parameters().release(const_cast(parameters), dev_);
-
- // Wait for the transfer to finish so that we could safely release the
- // fill memory object.
- // TODO: we could remove this if issue on implicit memory registration is
- // fixed by KFD, so that we could pass the pattern as SVM.
- gpu().releaseGpuMemoryFence();
-
- fillMemory->release();
-
- return result;
-}
-
-bool
-KernelBlitManager::fillImage(
- device::Memory& memory,
- const void* pattern,
- const amd::Coord3D& origin,
- const amd::Coord3D& size,
- bool entire
- ) const
-{
- if (memory.isHostMemDirectAccess()) {
- return HsaBlitManager::fillImage(memory, pattern, origin, size, entire);
- }
-
- amd::Image *image = memory.owner()->asImage();
-
- uint fillType;
- size_t dim = 0;
- size_t globalWorkOffset[3] = { 0, 0, 0 };
- size_t globalWorkSize[3];
- size_t localWorkSize[3];
-
- // Program the kernels workload depending on the fill dimensions
- fillType = FillImage;
- dim = 3;
- // Find the current blit type
- const size_t dimSize = image->getDims();
- if (dimSize == 1) {
- globalWorkSize[0] = amd::alignUp(size[0], 256);
- globalWorkSize[1] = amd::alignUp(size[1], 1);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = 256;
- localWorkSize[1] = localWorkSize[2] = 1;
- }
- else if (dimSize == 2) {
- globalWorkSize[0] = amd::alignUp(size[0], 16);
- globalWorkSize[1] = amd::alignUp(size[1], 16);
- globalWorkSize[2] = amd::alignUp(size[2], 1);
- localWorkSize[0] = localWorkSize[1] = 16;
- localWorkSize[2] = 1;
- }
- else {
- globalWorkSize[0] = amd::alignUp(size[0], 8);
- globalWorkSize[1] = amd::alignUp(size[1], 8);
- globalWorkSize[2] = amd::alignUp(size[2], 4);
- localWorkSize[0] = localWorkSize[1] = 8;
- localWorkSize[2] = 4;
- }
-
- // Program kernels arguments for the blit operation
- cl_mem clmem = ((cl_mem) as_cl(memory.owner()));
- kernels_[fillType]->parameters().set(0, sizeof(cl_mem), &clmem);
- kernels_[fillType]->parameters().set(1, sizeof(cl_float4), pattern);
- kernels_[fillType]->parameters().set(2, sizeof(cl_int4), pattern);
- kernels_[fillType]->parameters().set(3, sizeof(cl_uint4), pattern);
-
- cl_int fillOrigin[4] = { (cl_int)origin[0],
- (cl_int)origin[1],
- (cl_int)origin[2], 0 };
- cl_int fillSize[4] = { (cl_int)size[0],
- (cl_int)size[1],
- (cl_int)size[2], 0 };
- kernels_[fillType]->parameters().set(4, sizeof(fillOrigin), fillOrigin);
- kernels_[fillType]->parameters().set(5, sizeof(fillSize), fillSize);
-
- // Find the type of image
- uint32_t type = 0;
- amd::Image::Format format(image->getImageFormat());
- switch (format.image_channel_data_type) {
- case CL_SNORM_INT8:
- case CL_SNORM_INT16:
- case CL_UNORM_INT8:
- case CL_UNORM_INT16:
- case CL_UNORM_SHORT_565:
- case CL_UNORM_SHORT_555:
- case CL_UNORM_INT_101010:
- case CL_HALF_FLOAT:
- case CL_FLOAT:
- type = 0;
- break;
- case CL_SIGNED_INT8:
- case CL_SIGNED_INT16:
- case CL_SIGNED_INT32:
- type = 1;
- break;
- case CL_UNSIGNED_INT8:
- case CL_UNSIGNED_INT16:
- case CL_UNSIGNED_INT32:
- type = 2;
- break;
- }
- kernels_[fillType]->parameters().set(6, sizeof(type), &type);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(dim,
- globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = kernels_[fillType]->parameters().capture(dev_);
- bool result = gpu().submitKernelInternal(
- ndrange, *kernels_[fillType], parameters, NULL);
- kernels_[fillType]->parameters().release(const_cast(parameters), dev_);
-
- return result;
-}
-
-bool
-KernelBlitManager::create(amd::Device& device)
-{
- if (!createProgram(static_cast(device))) {
- return false;
- }
-
- return true;
-}
-
-bool
-KernelBlitManager::createProgram(Device& device)
-{
- // Save context and program for this device
- context_ = device.blitProgram()->context_;
- context_->retain();
- program_ = device.blitProgram()->program_;
- program_->retain();
-
- bool result = false;
- do {
- // Create kernel objects for all blits
- for (uint i = 0; i < BlitTotal; ++i) {
- const amd::Symbol* symbol = program_->findSymbol(BlitName[i]);
- if (symbol == NULL) {
- break;
- }
- kernels_[i] = new amd::Kernel(*program_, *symbol, BlitName[i]);
- if (kernels_[i] == NULL) {
- break;
- }
- }
-
- result = true;
- } while(!result);
-
- return result;
-}
-
-} // namespace oclhsa
diff --git a/rocclr/runtime/device/hsa/hsablit.hpp b/rocclr/runtime/device/hsa/hsablit.hpp
deleted file mode 100644
index b24a61a8cd..0000000000
--- a/rocclr/runtime/device/hsa/hsablit.hpp
+++ /dev/null
@@ -1,401 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef HSABLIT_HPP_
-#define HSABLIT_HPP_
-
-#include "top.hpp"
-#include "platform/command.hpp"
-#include "platform/commandqueue.hpp"
-#include "device/device.hpp"
-#include "device/blit.hpp"
-
-/*! \addtogroup HSA Blit Implementation
- * @{
- */
-
-//! HSA Blit Manager Implementation
-namespace oclhsa {
-
-class Device;
-class Kernel;
-class Memory;
-class VirtualGPU;
-
-//! DMA Blit Manager
-class HsaBlitManager : public device::HostBlitManager
-{
-public:
- //! Constructor
- HsaBlitManager(
- device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
- Setup setup = Setup() //!< Specifies HW accelerated blits
- );
-
- //! Destructor
- virtual ~HsaBlitManager() { }
-
- //! Creates HostBlitManager object
- virtual bool create(amd::Device& device) { return true; }
-
- //! Copies a buffer object to system memory
- virtual bool readBuffer(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destination host memory
- const amd::Coord3D& origin, //!< Source origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to system memory
- virtual bool readBufferRect(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destinaiton host memory
- const amd::BufferRect& bufRect, //!< Source rectangle
- const amd::BufferRect& hostRect, //!< Destination rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies an image object to system memory
- virtual bool readImage(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destination host memory
- const amd::Coord3D& origin, //!< Source origin
- const amd::Coord3D& size, //!< Size of the copy region
- size_t rowPitch, //!< Row pitch for host memory
- size_t slicePitch, //!< Slice pitch for host memory
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to a buffer object
- virtual bool writeBuffer(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to a buffer object
- virtual bool writeBufferRect(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::BufferRect& hostRect, //!< Destination rectangle
- const amd::BufferRect& bufRect, //!< Source rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to an image object
- virtual bool writeImage(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- size_t rowPitch, //!< Row pitch for host memory
- size_t slicePitch, //!< Slice pitch for host memory
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to another buffer object
- virtual bool copyBuffer(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to another buffer object
- virtual bool copyBufferRect(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::BufferRect& srcRect, //!< Source rectangle
- const amd::BufferRect& dstRect, //!< Destination rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies an image object to a buffer object
- virtual bool copyImageToBuffer(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false, //!< Entire buffer will be updated
- size_t rowPitch = 0, //!< Pitch for buffer
- size_t slicePitch = 0 //!< Slice for buffer
- ) const;
-
- //! Copies a buffer object to an image object
- virtual bool copyBufferToImage(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false, //!< Entire buffer will be updated
- size_t rowPitch = 0, //!< Pitch for buffer
- size_t slicePitch = 0 //!< Slice for buffer
- ) const;
-
- //! Copies an image object to another image object
- virtual bool copyImage(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Fills a buffer memory with a pattern data
- virtual bool fillBuffer(
- device::Memory& memory, //!< Memory object to fill with pattern
- const void* pattern, //!< Pattern data
- size_t patternSize, //!< Pattern size
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Fills an image memory with a pattern data
- virtual bool fillImage(
- device::Memory& dstMemory, //!< Memory object to fill with pattern
- const void* pattern, //!< Pattern data
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
-protected:
- //! Returns the virtual GPU object
- VirtualGPU& gpu() const { return static_cast(vDev_); }
-
-private:
- //! Disable copy constructor
- HsaBlitManager(const HsaBlitManager&);
-
- //! Disable operator=
- HsaBlitManager& operator=(const HsaBlitManager&);
-
- bool importExportImage(
- uint8_t* dst,
- const uint8_t* src,
- const amd::Coord3D& dstOffset,
- size_t dstRowPitch,
- size_t dstSlicePitch,
- const amd::Coord3D& srcOffset,
- size_t srcRowPitch,
- size_t srcSlicePitch,
- const amd::Coord3D& sizeToCopy,
- size_t elementSize) const;
-};
-
-//! Kernel Blit Manager
-class KernelBlitManager : public HsaBlitManager
-{
-public:
- enum {
- BlitCopyImage = 0,
- BlitCopyImage1DA,
- BlitCopyImageToBuffer,
- BlitCopyBufferToImage,
- BlitCopyBufferRect,
- BlitCopyBufferRectAligned,
- BlitCopyBuffer,
- BlitCopyBufferAligned,
- FillBuffer,
- FillImage,
- BlitTotal
- };
-
- //! Constructor
- KernelBlitManager(
- device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
- Setup setup = Setup() //!< Specifies HW accelerated blits
- );
-
- //! Destructor
- virtual ~KernelBlitManager();
-
- //! Creates HostBlitManager object
- virtual bool create(amd::Device& device);
-
- //! Copies a buffer object to system memory
- virtual bool readBuffer(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destination host memory
- const amd::Coord3D& origin, //!< Source origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to system memory
- virtual bool readBufferRect(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destinaiton host memory
- const amd::BufferRect& bufRect, //!< Source rectangle
- const amd::BufferRect& hostRect, //!< Destination rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies an image object to system memory
- virtual bool readImage(
- device::Memory& srcMemory, //!< Source memory object
- void* dstHost, //!< Destination host memory
- const amd::Coord3D& origin, //!< Source origin
- const amd::Coord3D& size, //!< Size of the copy region
- size_t rowPitch, //!< Row pitch for host memory
- size_t slicePitch, //!< Slice pitch for host memory
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to a buffer object
- virtual bool writeBuffer(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to a buffer object
- virtual bool writeBufferRect(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::BufferRect& hostRect, //!< Destination rectangle
- const amd::BufferRect& bufRect, //!< Source rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies system memory to an image object
- virtual bool writeImage(
- const void* srcHost, //!< Source host memory
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- size_t rowPitch, //!< Row pitch for host memory
- size_t slicePitch, //!< Slice pitch for host memory
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to another buffer object
- virtual bool copyBuffer(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies a buffer object to another buffer object
- virtual bool copyBufferRect(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::BufferRect& srcRect, //!< Source rectangle
- const amd::BufferRect& dstRect, //!< Destination rectangle
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Copies an image object to a buffer object
- virtual bool copyImageToBuffer(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false, //!< Entire buffer will be updated
- size_t rowPitch = 0, //!< Pitch for buffer
- size_t slicePitch = 0 //!< Slice for buffer
- ) const;
-
- //! Copies a buffer object to an image object
- virtual bool copyBufferToImage(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false, //!< Entire buffer will be updated
- size_t rowPitch = 0, //!< Pitch for buffer
- size_t slicePitch = 0 //!< Slice for buffer
- ) const;
-
- //! Copies an image object to another image object
- virtual bool copyImage(
- device::Memory& srcMemory, //!< Source memory object
- device::Memory& dstMemory, //!< Destination memory object
- const amd::Coord3D& srcOrigin, //!< Source origin
- const amd::Coord3D& dstOrigin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Fills a buffer memory with a pattern data
- virtual bool fillBuffer(
- device::Memory& memory, //!< Memory object to fill with pattern
- const void* pattern, //!< Pattern data
- size_t patternSize, //!< Pattern size
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
- //! Fills an image memory with a pattern data
- virtual bool fillImage(
- device::Memory& dstMemory, //!< Memory object to fill with pattern
- const void* pattern, //!< Pattern data
- const amd::Coord3D& origin, //!< Destination origin
- const amd::Coord3D& size, //!< Size of the copy region
- bool entire = false //!< Entire buffer will be updated
- ) const;
-
-private:
- //! Disable copy constructor
- KernelBlitManager(const KernelBlitManager&);
-
- //! Disable operator=
- KernelBlitManager& operator=(const KernelBlitManager&);
-
- //! Creates a program for all blit operations
- bool createProgram(
- Device& device //!< Device object
- );
-
- amd::Image::Format filterFormat(amd::Image::Format oldFormat) const;
-
- device::Memory *createImageView(
- device::Memory &parent,
- amd::Image::Format newFormat) const;
-
- amd::Context *context_; //!< A dummy context
- amd::Program *program_; //!< GPU program obejct
- amd::Kernel *kernels_[BlitTotal]; //!< GPU kernels for blit
-};
-
-static const char* BlitName[KernelBlitManager::BlitTotal] = {
- "copyImage",
- "copyImage1DA",
- "copyImageToBuffer",
- "copyBufferToImage",
- "copyBufferRect",
- "copyBufferRectAligned",
- "copyBuffer",
- "copyBufferAligned",
- "fillBuffer",
- "fillImage"
- };
-
-/*@}*/
-} // namespace oclhsa
-
-#endif /*HSABLIT_HPP_*/
diff --git a/rocclr/runtime/device/hsa/hsacompiler.cpp b/rocclr/runtime/device/hsa/hsacompiler.cpp
deleted file mode 100644
index 1c1fb5987b..0000000000
--- a/rocclr/runtime/device/hsa/hsacompiler.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-#ifndef WITHOUT_FSA_BACKEND
-
-#include
-#include
-#include
-#include
-
-#include "os/os.hpp"
-#include "hsadevice.hpp"
-#include "hsaprogram.hpp"
-#include "hsacompilerlib.hpp"
-//#include "gpukernel.hpp"
-//#include "compiler/compiler.hpp"
-#include "utils/options.hpp"
-#include
-
-//CLC_IN_PROCESS_CHANGE
-extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typeInfo = NULL);
-
-
-namespace oclhsa {
-
-
-/* Temporary log function for the compiler library */
-static void logFunction(const char* msg, size_t size)
-{
- std::cout<< "Compiler Log: " << msg << std::endl;
-}
-
-static int programsCount = 0;
-
-
-bool
-FSAILProgram::compileImpl(const std::string& sourceCode,
- const std::vector& headers,
- const char** headerIncludeNames,
- amd::option::Options* options)
-{
-
- acl_error errorCode;
- aclTargetInfo target;
- target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail-64"),
- dev().deviceInfo().targetName_,
- &errorCode);
-
- //end if asic info is ready
- // We dump the source code for each program (param: headers)
- // into their filenames (headerIncludeNames) into the TEMP
- // folder specific to the OS and add the include path while
- // compiling
-
- //Find the temp folder for the OS
- std::string tempFolder = amd::Os::getEnvironment("TEMP");
- if (tempFolder.empty()) {
- tempFolder = amd::Os::getEnvironment("TMP");
- if (tempFolder.empty()) {
- tempFolder = WINDOWS_SWITCH(".","/tmp");;
- }
- }
- //Iterate through each source code and dump it into tmp
- std::fstream f;
- std::vector headerFileNames(headers.size());
- std::vector newDirs;
- for (size_t i = 0; i < headers.size(); ++i) {
- std::string headerPath = tempFolder;
- std::string headerIncludeName(headerIncludeNames[i]);
- // replace / in path with current os's file separator
- if ( amd::Os::fileSeparator() != '/') {
- for (std::string::iterator it = headerIncludeName.begin(),
- end = headerIncludeName.end();
- it != end;
- ++it) {
- if (*it == '/') *it = amd::Os::fileSeparator();
- }
- }
- size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
- if (pos != std::string::npos) {
- headerPath += amd::Os::fileSeparator();
- headerPath += headerIncludeName.substr(0, pos);
- headerIncludeName = headerIncludeName.substr(pos+1);
- }
- if (!amd::Os::pathExists(headerPath)) {
- bool ret = amd::Os::createPath(headerPath);
- assert(ret && "failed creating path!");
- newDirs.push_back(headerPath);
- }
- std::string headerFullName
- = headerPath + amd::Os::fileSeparator() + headerIncludeName;
- headerFileNames[i] = headerFullName;
- f.open(headerFullName.c_str(), std::fstream::out);
- //Should we allow asserts
- assert(!f.fail() && "failed creating header file!");
- f.write(headers[i]->c_str(), headers[i]->length());
- f.close();
- }
-
- //Create Binary
- binaryElf_ = g_complibApi._aclBinaryInit(sizeof(aclBinary),
- &target,
- &binOpts_,
- &errorCode);
-
- if( errorCode!=ACL_SUCCESS ) {
- buildLog_ += "Error while compiling opencl source:\
- aclBinary init failure \n";
- LogWarning("aclBinaryInit failed");
- return false;
- }
-
- //Insert opencl into binary
- errorCode = g_complibApi._aclInsertSection(device().compiler(),
- binaryElf_,
- sourceCode.c_str(),
- strlen(sourceCode.c_str()),
- aclSOURCE);
-
- if ( errorCode != ACL_SUCCESS ) {
- buildLog_ += "Error while converting to BRIG: \
- Inserting openCl Source \n";
- }
-
-
- //Set the options for the compiler
- //Set the include path for the temp folder that contains the includes
- if(!headers.empty()) {
- this->compileOptions_.append(" -I");
- this->compileOptions_.append(tempFolder);
- }
-
- //Add only for CL2.0 and later
- if (options->oVariables->CLStd[2] >= '2') {
- std::stringstream opts;
- opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
- << device().info().maxGlobalVariableSize_;
- compileOptions_.append(opts.str());
- }
-
- //Compile source to IR
- this->compileOptions_.append(hsailOptions());
- errorCode = g_complibApi._aclCompile(device().compiler(),
- binaryElf_,
- //"-Wf,--support_all_extensions",
- this->compileOptions_.c_str(),
- ACL_TYPE_OPENCL,
- ACL_TYPE_LLVMIR_BINARY,
- logFunction);
- buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
- if( errorCode!=ACL_SUCCESS ) {
- LogWarning("aclCompile failed");
- buildLog_ += "Error while compiling \
- opencl source: Compiling CL to IR";
- return false;
- }
- // Save the binary in the interface class
- saveBinaryAndSetType(TYPE_COMPILED);
- return true;
-
-}
-}
-#endif // WITHOUT_GPU_BACKEND
diff --git a/rocclr/runtime/device/hsa/hsacompilerlib.cpp b/rocclr/runtime/device/hsa/hsacompilerlib.cpp
deleted file mode 100644
index ca568dc683..0000000000
--- a/rocclr/runtime/device/hsa/hsacompilerlib.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include "hsacompilerlib.hpp"
-#include "utils/flags.hpp"
-
-#include "acl.h"
-
-namespace oclhsa {
-
-void* g_complibModule = NULL;
-struct CompLibApi g_complibApi;
-
-
-//
-// g_complibModule is defined in LoadCompLib(). This macro must be used only in LoadCompLib() function.
-//
-#define LOADSYMBOL(api) \
- g_complibApi._##api = (pfn_##api) amd::Os::getSymbol(g_complibModule, #api); \
- if( g_complibApi._##api == NULL ) { \
- LogError ("amd::Os::getSymbol() for exported func " #api " failed."); \
- amd::Os::unloadLibrary(g_complibModule); \
- return false; \
- }
-
-
-bool LoadCompLib(bool offline)
-{
- g_complibModule = amd::Os::loadLibrary("amdhsacl" LP64_SWITCH(LINUX_SWITCH("32",""), "64"));
- if( g_complibModule == NULL ) {
- if (!offline) {
- LogError( "amd::Os::loadLibrary() for loading of amdhsacl.dll failed.");
- }
- return false;
- }
-
- LOADSYMBOL(aclCompilerInit)
- LOADSYMBOL(aclGetTargetInfo)
- LOADSYMBOL(aclBinaryInit)
- LOADSYMBOL(aclInsertSection)
- LOADSYMBOL(aclCompile)
- LOADSYMBOL(aclCompilerFini)
- LOADSYMBOL(aclBinaryFini)
- LOADSYMBOL(aclExtractSection)
- LOADSYMBOL(aclWriteToMem)
- LOADSYMBOL(aclQueryInfo)
- LOADSYMBOL(aclGetDeviceBinary)
- LOADSYMBOL(aclExtractSymbol)
- LOADSYMBOL(aclGetCompilerLog)
- LOADSYMBOL(aclCreateFromBinary)
- LOADSYMBOL(aclReadFromMem)
-
- LOADSYMBOL(aclRemoveSymbol)
- LOADSYMBOL(aclInsertSymbol)
- LOADSYMBOL(aclWriteToFile)
- LOADSYMBOL(aclBinaryVersion)
- LOADSYMBOL(aclLink)
-
- return true;
-}
-
-void UnloadCompLib()
-{
- if( g_complibModule )
- {
- amd::Os::unloadLibrary(g_complibModule);
- }
-}
-
-} // namespace oclhsa
\ No newline at end of file
diff --git a/rocclr/runtime/device/hsa/hsacompilerlib.hpp b/rocclr/runtime/device/hsa/hsacompilerlib.hpp
deleted file mode 100644
index 1d245ddede..0000000000
--- a/rocclr/runtime/device/hsa/hsacompilerlib.hpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef HSACOMPILERLIB_HPP_
-#define HSACOMPILERLIB_HPP_
-
-//
-// This file hsa the code for explicity loading amdoclcl.dll.
-// Exported functions from amdoclcl.dll can be added for usage as need-basis.
-// With explicit/dynamic loading oclhsa will not have any linkage to amdoclcl.lib.
-//
-
-#include "thread/thread.hpp"
-#include "acl.h"
-#include "utils/debug.hpp"
-
-using namespace amd;
-
-namespace oclhsa {
-
-//
-// To use any new exported function from amdhsacl.dll please add/make that function specific changes
-// in typedef below, struct CompLibApi and in hsacompilerLib.cpp::LoadCompLib() function.
-//
-
-//
-// Convention: The typedefed function name must be prefixed with pfn_
-//
-typedef aclCompiler* (ACL_API_ENTRY *pfn_aclCompilerInit) (aclCompilerOptions *opts, acl_error *error_code);
-typedef aclTargetInfo (ACL_API_ENTRY *pfn_aclGetTargetInfo) (const char*, const char*, acl_error*);
-typedef aclBinary* (ACL_API_ENTRY *pfn_aclBinaryInit) (size_t, const aclTargetInfo*, const aclBinaryOptions*, acl_error*);
-typedef acl_error (ACL_API_ENTRY *pfn_aclInsertSection) (aclCompiler *cl, aclBinary *binary, const void *data, size_t data_size, aclSections id);
-typedef acl_error (ACL_API_ENTRY *pfn_aclCompile) (aclCompiler *cl, aclBinary *bin, const char *options, aclType from, aclType to, aclLogFunction compile_callback);
-typedef acl_error (ACL_API_ENTRY *pfn_aclCompilerFini) (aclCompiler *cl);
-typedef acl_error (ACL_API_ENTRY *pfn_aclBinaryFini) (aclBinary *bin);
-typedef const void* (ACL_API_ENTRY *pfn_aclExtractSection) (aclCompiler *cl, const aclBinary *binary, size_t *size, aclSections id, acl_error *error_code);
-typedef acl_error (ACL_API_ENTRY *pfn_aclWriteToMem) (aclBinary *bin,void **mem, size_t *size);
-typedef acl_error (ACL_API_ENTRY *pfn_aclQueryInfo) (aclCompiler *cl, const aclBinary *binary, aclQueryType query, const char *kernel, void *data_ptr, size_t *ptr_size);
-
-
-typedef const void* (ACL_API_ENTRY *pfn_aclGetDeviceBinary) (aclCompiler *cl,const aclBinary *bin,const char *kernel,size_t *size,acl_error *error_code);
-typedef const void* (ACL_API_ENTRY *pfn_aclExtractSymbol) (aclCompiler *cl,const aclBinary *binary,size_t *size,aclSections id,const char *symbol,acl_error *error_code);
-typedef aclBinary* (ACL_API_ENTRY *pfn_aclReadFromMem) (void *mem,size_t size, acl_error *error_code);
-typedef acl_error (ACL_API_ENTRY *pfn_aclRemoveSymbol) (aclCompiler *cl, aclBinary *binary, aclSections id, const char *symbol);
-typedef acl_error (ACL_API_ENTRY *pfn_aclInsertSymbol) (aclCompiler *cl, aclBinary *binary, const void *data, size_t data_size, aclSections id, const char *symbol);
-typedef acl_error (ACL_API_ENTRY *pfn_aclWriteToFile) (aclBinary *bin, const char *str);
-
-
-typedef char* (ACL_API_ENTRY *pfn_aclGetCompilerLog) (aclCompiler* cl);
-typedef aclBinary* (ACL_API_ENTRY *pfn_aclCreateFromBinary) (const aclBinary *binary,aclBIFVersion version);
-typedef aclBIFVersion (ACL_API_ENTRY *pfn_aclBinaryVersion) (const aclBinary *binary);
-typedef acl_error (ACL_API_ENTRY *pfn_aclLink) (aclCompiler* cl, aclBinary *src_bin, unsigned int num_libs, aclBinary **libs, aclType link_mode,const char* options, aclLogFunction link_callback);
-//
-// Convention: prefix struct member variable with with underscore '_'
-// would be nice if there was no underscore prfix, but on Linux the token
-// pasting in the macro is srtict and his is the workaround.
-//
-struct CompLibApi
-{
- pfn_aclCompilerInit _aclCompilerInit;
- pfn_aclGetTargetInfo _aclGetTargetInfo;
- pfn_aclBinaryInit _aclBinaryInit;
- pfn_aclInsertSection _aclInsertSection;
- pfn_aclCompile _aclCompile;
- pfn_aclCompilerFini _aclCompilerFini;
- pfn_aclBinaryFini _aclBinaryFini;
- pfn_aclExtractSection _aclExtractSection;
- pfn_aclWriteToMem _aclWriteToMem;
- pfn_aclQueryInfo _aclQueryInfo;
- pfn_aclGetDeviceBinary _aclGetDeviceBinary;
- pfn_aclExtractSymbol _aclExtractSymbol;
- pfn_aclReadFromMem _aclReadFromMem;
- pfn_aclRemoveSymbol _aclRemoveSymbol;
- pfn_aclInsertSymbol _aclInsertSymbol;
- pfn_aclWriteToFile _aclWriteToFile;
- pfn_aclGetCompilerLog _aclGetCompilerLog;
- pfn_aclCreateFromBinary _aclCreateFromBinary;
- pfn_aclBinaryVersion _aclBinaryVersion;
- pfn_aclLink _aclLink;
-};
-
-
-//
-// Use g_ prefix for all global variables.
-//
-extern void* g_complibModule;
-extern CompLibApi g_complibApi;
-
-// Note: initializes global variable g_complibApi.
-// Not sure what error values we have, for now returning false on failure.
-bool LoadCompLib(bool isOfflineDevice=false);
-void UnloadCompLib();
-
-} // namespace oclhsa
-#endif
diff --git a/rocclr/runtime/device/hsa/hsacore_symbol_loader.cpp b/rocclr/runtime/device/hsa/hsacore_symbol_loader.cpp
deleted file mode 100644
index d71b16d089..0000000000
--- a/rocclr/runtime/device/hsa/hsacore_symbol_loader.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-// Implementation of the the loading of dll and loading of all the exported
-// function symbols.
-
-
-#include "runtime/device/hsa/hsacore_symbol_loader.hpp"
-
-#include "runtime/thread/thread.hpp"
-#include "runtime/utils/debug.hpp"
-#include "runtime/os/os.hpp"
-
-#include
-#include
-
-HsacoreApiSymbols* HsacoreApiSymbols::instance_ = NULL;
-// hsacore_dll_handle_ is defined in HsacoreApiSymbols class.
-// This macro must be used only in member functions of HsacoreApiSymbols
-// class.
-#define LOADSYMBOL(api) \
- api = (pfn_ ## api) amd::Os::getSymbol(hsacore_dll_handle_, # api); \
- if (api == NULL) { \
- amd::log_printf(amd::LOG_ERROR, __FILE__, __LINE__, \
- "amd::Os::getSymbol() for exported func " # api " failed."); \
- amd::Os::unloadLibrary(hsacore_dll_handle_); \
- abort(); \
- }
-
-HsacoreApiSymbols::HsacoreApiSymbols()
- : hsacore_dll_name_(HSACORE_DLL_NAME) {
- hsacore_dll_handle_ = amd::Os::loadLibrary(hsacore_dll_name_.c_str());
- if( hsacore_dll_handle_ == NULL) {
- // Do not print, otherwise tests fail when HSA core and services DLLs are
- // not installed, in which case only ORCA stack is initialized and it is
- // not an error..
- //amd::log_printf(amd::LOG_INFO, __FILE__, __LINE__,
- // "Cannot load hsa core dll. HSA DLLs may not be installed on the machine."
- // " OpenCL requirement, returning without error.");
- return;
- }
-
- LOADSYMBOL(HsaGetCoreApiTable)
-}
-
-HsacoreApiSymbols::~HsacoreApiSymbols() {
- if (hsacore_dll_handle_) {
- amd::Os::unloadLibrary(hsacore_dll_handle_);
- hsacore_dll_handle_ = NULL;
- }
-}
-
diff --git a/rocclr/runtime/device/hsa/hsacore_symbol_loader.hpp b/rocclr/runtime/device/hsa/hsacore_symbol_loader.hpp
deleted file mode 100644
index 4133ecdfdf..0000000000
--- a/rocclr/runtime/device/hsa/hsacore_symbol_loader.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSACORE_SYMBOL_LOADER_HPP_
-#define _OPENCL_RUNTIME_DEVICE_HSA_HSACORE_SYMBOL_LOADER_HPP_
-
-// File: hsacore_symbol_loader.hpp
-// The main purpose of this file (class HsacoreApiSymbols), is to load the HSA
-// API function symbol HsaGetCoreApiTable() from hsacore DLL/so module.
-// This function outputs HsaCoreApiTable which has pointers to the rest of the
-// hsacore API functions, which should be used to invoke the API functions.
-
-#include "newcore.h"
-#include "hsacoreagent.h"
-
-#include
-
-// In case of change in the name of hsacore dll name, change the
-// #define HSACORE_DLL_NAME value. this is the only place the DLL name should
-// be changed or referred to.
-#define HSACORE_DLL_NAME "newhsacore" LP64_ONLY("64")
-
-// Convention: The typedefed function name must be prefixed with pfn_ indicating
-// it as pointer-to-function.
-typedef HsaStatus (*pfn_HsaGetCoreApiTable)(const HsaCoreApiTable **api_table);
-
-
-// Singleton HsacoreApiSymbols class contains the module handle and loaded
-// symbols of one accessor API accessor function.
-// To call hsacore API funciton, instance of this class must be used.
-// Example:
-// // In initialization code
-// const HsaCoreApiTable *hsacoreapi = NULL;
-// HsacoreApiSymbols::Instance().HsaGetCoreApiTable(&hsacoreapi);
-// ...
-// ...
-// // Calling the core api.
-// hsacoreapi->HsaGetDevices(...);
-// hsacoreapi->HsaRegisterMemory(...);
-class HsacoreApiSymbols {
- public:
- // Only the access function symbol is loaded, which in turn has pointers to
- // rest of the hsacore api.
- pfn_HsaGetCoreApiTable HsaGetCoreApiTable;
-
- static HsacoreApiSymbols &Instance() {
- if (instance_ == NULL) {
- instance_ = new HsacoreApiSymbols();
- }
- return *instance_;
- }
- static void teardown(){
- if (instance_ != NULL){
- delete instance_;
- }
- }
- static bool IsDllLoaded() {
- return Instance().hsacore_dll_handle_ ? true : false;
- };
-
- private:
-
- static HsacoreApiSymbols* instance_;
- // Force singleton pattern.export LD_LIBRAR
- explicit HsacoreApiSymbols();
- ~HsacoreApiSymbols();
- HsacoreApiSymbols(const HsacoreApiSymbols &) {}
- const HsacoreApiSymbols &operator=(const HsacoreApiSymbols &) {return *this; }
-
- // Data.
- void *hsacore_dll_handle_;
- const std::string hsacore_dll_name_;
-};
-#endif // header guard
diff --git a/rocclr/runtime/device/hsa/hsacounters.cpp b/rocclr/runtime/device/hsa/hsacounters.cpp
deleted file mode 100644
index 448d7f6d03..0000000000
--- a/rocclr/runtime/device/hsa/hsacounters.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#include "device/hsa/oclhsa_common.hpp"
-#include "device/hsa/hsacounters.hpp"
-#include "device/hsa/hsavirtual.hpp"
-
-namespace oclhsa {
-
-PerfCounter::~PerfCounter()
-{
- // Destroy the corresponding HSA counter object
- HsaStatus status;
- status = servicesapi->HsaPmuDestroyCounter(counter_block_, counter_);
- if (status != kHsaStatusSuccess) {
- LogError("Destroy counter failed");
- return;
- }
-
- // If no enabled counter corresponding to the PMU,
- // Release the PMU
- uint32_t counter_num;
- if (!getEnabledCounterNum(counter_num)) {
- LogError("getEnabledCounterNum failed");
- return;
- }
-
- if (counter_num == 0) {
- status = servicesapi->HsaReleasePmu(hsaPmu_);
- if (status != kHsaStatusSuccess) {
- LogError("Destroy pmu failed");
- return;
- }
- }
-}
-
-bool
-PerfCounter::create(HsaPmu hsaPmu)
-{
- HsaStatus status;
- hsaPmu_ = hsaPmu;
- uint32_t blockIndex = static_cast(info()->blockIndex_);
- status = servicesapi->HsaPmuGetCounterBlockById(hsaPmu_, blockIndex, &counter_block_);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuGetCounterBlockById, failed");
- return false;
- }
-
- status = servicesapi->HsaPmuCreateCounter(counter_block_, &counter_);
- if (status != kHsaStatusSuccess) {
- LogPrintfError("HsaPmuCreateCounter, failed.\
- Block: %d, counter: #d, event: %d",
- info()->blockIndex_,
- info()->counterIndex_,
- info()->eventIndex_);
-
- return false;
- }
-
- status = servicesapi->HsaPmuCounterSetEnabled(counter_, true);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuCounterSetEnabled, failed");
- return false;
- }
-
- uint32_t eventIndex = static_cast(info()->eventIndex_);
- status = servicesapi->HsaPmuCounterSetParameter(counter_,
- kHsaCounterParameterEventIndex,
- sizeof(uint32_t), (void *)&eventIndex);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuCounterSetParameter, failed");
- return false;
- }
-
- return true;
-}
-
-uint64_t
-PerfCounter::getInfo(uint64_t infoType) const
-{
- switch (infoType) {
- case CL_PERFCOUNTER_GPU_BLOCK_INDEX: {
- // Return the GPU block index
- return info()->blockIndex_;
- }
- case CL_PERFCOUNTER_GPU_COUNTER_INDEX: {
- // Return the GPU counter index
- return info()->counterIndex_;
- }
- case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
- // Return the GPU event index
- return info()->eventIndex_;
- }
- case CL_PERFCOUNTER_DATA: {
- HsaStatus status;
- uint64_t counterValue;
- status = servicesapi->HsaPmuCounterGetResult(counter_, &counterValue);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuCounterGetResult, failed");
- }
- return counterValue;
- }
- default:
- LogError("Wrong PerfCounter::getInfo parameter");
- }
-
- return 0;
-}
-
-bool
-PerfCounter::getEnabledCounterNum(uint32_t &counter_num)
-{
- // Collect all the program counter blocks
- uint32_t counterblock_num, num;
- uint32_t i;
- HsaStatus status;
- HsaCounter *pp_counters;
- HsaCounterBlock *pp_counterblocks;
- status = servicesapi->HsaPmuGetAllCounterBlocks(hsaPmu_,
- &pp_counterblocks,
- &counterblock_num);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuGetAllCounterBlocks, failed");
- return false;
- }
-
- counter_num = 0;
- for (i = 0; i < counterblock_num; i++) {
- // Retrieve all enabled pp_counters in each counter block
- status = servicesapi->HsaPmuGetEnabledCounters(pp_counterblocks[i],
- &pp_counters, &num);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuGetEnabledCounters, failed");
- return false;
- }
- counter_num += num;
- }
-
- return true;
-}
-
-
-} // namespace oclhsa
diff --git a/rocclr/runtime/device/hsa/hsacounters.hpp b/rocclr/runtime/device/hsa/hsacounters.hpp
deleted file mode 100644
index 3f6669e98a..0000000000
--- a/rocclr/runtime/device/hsa/hsacounters.hpp
+++ /dev/null
@@ -1,103 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-#ifndef HSACOUNTERS_HPP_
-#define HSACOUNTERS_HPP_
-
-#include "top.hpp"
-#include "device/device.hpp"
-#include "device/hsa/hsadevice.hpp"
-
-namespace oclhsa {
-
-class VirtualGPU;
-
-//! Performance counter implementation on GPU
-class PerfCounter : public device::PerfCounter
-{
-public:
- //! The performance counter info
- struct Info : public amd::EmbeddedObject
- {
- uint blockIndex_; //!< Index of the block to configure
- uint counterIndex_; //!< Index of the hardware counter
- uint eventIndex_; //!< Event you wish to count with the counter
- };
-
- //! The PerfCounter flags
- enum Flags
- {
- BeginIssued = 0x00000001,
- EndIssued = 0x00000002,
- ResultReady = 0x00000004
- };
-
- //! Constructor for the GPU PerfCounter object
- PerfCounter(
- const HsaDevice *device, //!< A GPU device object
- const VirtualGPU& gpu, //!< Virtual GPU device object
- cl_uint blockIndex, //!< HW block index
- cl_uint counterIndex, //!< Counter index within the block
- cl_uint eventIndex) //!< Event index for profiling
- : gpuDevice_(device)
- , gpu_(gpu)
- , hsaPmu_(NULL)
- , flags_(0)
- , counter_(0)
- , index_(0)
- {
- info_.blockIndex_ = blockIndex;
- info_.counterIndex_ = counterIndex;
- info_.eventIndex_ = eventIndex;
- }
-
- //! Destructor for the GPU PerfCounter object
- virtual ~PerfCounter();
-
- //! Creates the counter object
- bool create(
- HsaPmu hsaPmu //!< Reference counter
- );
-
- //! Returns the specific information about the counter
- uint64_t getInfo(
- uint64_t infoType //!< The type of returned information
- ) const;
-
- //! Returns the GPU device, associated with the current object
- const HsaDevice * dev() const { return gpuDevice_; }
-
- //! Returns the virtual GPU device
- const VirtualGPU& gpu() const { return gpu_; }
-
- //! Returns the CAL performance counter descriptor
- const Info* info() const { return &info_; }
-
- //! Returns the Info structure for performance counter
- HsaPmu getCounterPmu() const { return hsaPmu_; }
-
-private:
- //! Disable default copy constructor
- PerfCounter(const PerfCounter&);
-
- //! Disable default operator=
- PerfCounter& operator=(const PerfCounter&);
-
- //! Get enabled counter number
- bool getEnabledCounterNum(uint32_t &counter_num);
-
- const HsaDevice *gpuDevice_; //!< The backend device
- const VirtualGPU& gpu_; //!< The virtual GPU device object
-
- HsaPmu hsaPmu_; //!< Hsa pmu
- uint flags_; //!< The perfcounter object state
- Info info_; //!< The info structure for perfcounter
- HsaCounter counter_; //!< HSA counter object
- HsaCounterBlock counter_block_; //!< counter block that the counter belongs to
- uint index_; //!< Counter index in the CAL container
-};
-
-} // namespace oclhsa
-
-#endif // HSACOUNTERS_HPP_
-
diff --git a/rocclr/runtime/device/hsa/hsadefs.hpp b/rocclr/runtime/device/hsa/hsadefs.hpp
deleted file mode 100644
index 35a9964a7a..0000000000
--- a/rocclr/runtime/device/hsa/hsadefs.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSADEFS_HPP_
-#define _OPENCL_RUNTIME_DEVICE_HSA_HSADEFS_HPP_
-
-#ifndef WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-
-typedef uint HsaDeviceId;
-
-struct AMDDeviceInfo {
- HsaDeviceId hsaDeviceId_; //!< Machine id
- const char* targetName_; //!< Target name for compilation
- const char* machineTarget_; //!< Machine target
- uint simdPerCU_; //!< Number of SIMDs per CU
- uint simdWidth_; //!< Number of workitems processed per SIMD
- uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
- uint memChannelBankWidth_; //!< Memory channel bank width
- uint localMemSizePerCU_; //!< Local memory size per CU
- uint localMemBanks_; //!< Number of banks of local memory
-};
-
-//The device ID must match with the device's index into DeviceInfo
-const HsaDeviceId HSA_SPECTRE_ID = 0;
-const HsaDeviceId HSA_SPOOKY_ID = 1;
-const HsaDeviceId HSA_TONGA_ID = 2;
-const HsaDeviceId HSA_CARRIZO_ID = 3;
-const HsaDeviceId HSA_ICELAND_ID = 4;
-const HsaDeviceId HSA_INVALID_DEVICE_ID = -1;
-
-static const AMDDeviceInfo DeviceInfoTable[] = {
- // targetName machineTarget
-/* TARGET_KAVERI_SPECTRE */ {HSA_SPECTRE_ID, "Spectre", "Spectre", 4, 16, 1, 256, 64 * Ki, 32 },
-/* TARGET_KAVERI_SPOOKY */ {HSA_SPOOKY_ID, "Spooky", "Spooky", 4, 16, 1, 256, 64 * Ki, 32 },
-/* TARGET_TONGA */ {HSA_TONGA_ID, "Tonga", "Tonga", 4, 16, 1, 256, 64 * Ki, 32},
-/* TARGET_CARRIZO */ {HSA_CARRIZO_ID, "Carrizo", "Carrizo", 4, 16, 1, 256, 64 * Ki, 32},
-/* TARGET_ICELAND */ {HSA_ICELAND_ID, "Topaz", "Topaz", 4, 16, 1, 256, 64 * Ki, 32}
-};
-
-
-}
-#endif
-#endif
\ No newline at end of file
diff --git a/rocclr/runtime/device/hsa/hsadevice.cpp b/rocclr/runtime/device/hsa/hsadevice.cpp
deleted file mode 100644
index 20356d3227..0000000000
--- a/rocclr/runtime/device/hsa/hsadevice.cpp
+++ /dev/null
@@ -1,896 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-
-#ifndef WITHOUT_FSA_BACKEND
-
-
-#include "platform/program.hpp"
-#include "platform/kernel.hpp"
-#include "os/os.hpp"
-#include "utils/debug.hpp"
-#include "utils/flags.hpp"
-#include "utils/versions.hpp"
-#include "thread/monitor.hpp"
-#include "CL/cl_ext.h"
-
-#include "newcore.h"
-
-#include "amdocl/cl_common.hpp"
-#include "device/hsa/hsadevice.hpp"
-#include "device/hsa/hsavirtual.hpp"
-#include "device/hsa/hsaprogram.hpp"
-#include "device/hsa/hsablit.hpp"
-#include "device/hsa/hsacompilerlib.hpp"
-#include "device/hsa/hsamemory.hpp"
-#include "hsacore_symbol_loader.hpp"
-#include "device/hsa/oclhsa_common.hpp"
-#include "kv_id.h"
-#include "vi_id.h"
-#include "cz_id.h"
-#include "hsainterop.h"
-
-#include
-#include
-#include "CL/cl_gl.h"
-
-#ifdef _WIN32
-#include "CL/cl_d3d10.h"
-#endif // _WIN32
-
-#include
-#include
-#include
-#include
-#include
-#include
-#endif // WITHOUT_FSA_BACKEND
-
-const HsaCoreApiTable *hsacoreapi = NULL;
-const HsaServicesApiTable *servicesapi = NULL;
-#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
-
-#ifndef WITHOUT_FSA_BACKEND
-namespace device {
-extern const char* BlitSourceCode;
-}
-
-namespace oclhsa {
-
-aclCompiler* NullDevice::compilerHandle_;
-bool oclhsa::Device::isHsaInitialized_ = false;
-const bool oclhsa::Device::offlineDevice_ = false;
-const bool oclhsa::NullDevice::offlineDevice_= true;
-
-static HsaDeviceId getHsaDeviceId(const HsaDevice *device) {
- /*
- * Use the device id to determine the ASIC family
- */
- switch (device->device_id) {
- case DEVICE_ID_SPECTRE_MOBILE:
- case DEVICE_ID_SPECTRE_DESKTOP:
- case DEVICE_ID_SPECTRE_LITE_MOBILE_1309:
- case DEVICE_ID_SPECTRE_LITE_MOBILE_130A:
- case DEVICE_ID_SPECTRE_SL_MOBILE_130B:
- case DEVICE_ID_SPECTRE_MOBILE_130C:
- case DEVICE_ID_SPECTRE_LITE_MOBILE_130D:
- case DEVICE_ID_SPECTRE_SL_MOBILE_130E:
- case DEVICE_ID_SPECTRE_DESKTOP_130F:
- case DEVICE_ID_SPECTRE_WORKSTATION_1310:
- case DEVICE_ID_SPECTRE_WORKSTATION_1311:
- case DEVICE_ID_SPECTRE_LITE_DESKTOP_1313:
- case DEVICE_ID_SPECTRE_SL_DESKTOP_1315:
- case DEVICE_ID_SPECTRE_SL_MOBILE_1318:
- case DEVICE_ID_SPECTRE_SL_EMBEDDED_131B:
- case DEVICE_ID_SPECTRE_EMBEDDED_131C:
- case DEVICE_ID_SPECTRE_LITE_EMBEDDED_131D:
- return HSA_SPECTRE_ID;
- case DEVICE_ID_SPOOKY_MOBILE:
- case DEVICE_ID_SPOOKY_DESKTOP:
- case DEVICE_ID_SPOOKY_DESKTOP_1312:
- case DEVICE_ID_SPOOKY_DESKTOP_1316:
- case DEVICE_ID_SPOOKY_MOBILE_1317:
- return HSA_SPOOKY_ID;
- case DEVICE_ID_VI_TONGA_P_6920:
- case DEVICE_ID_VI_TONGA_P_6921:
- case DEVICE_ID_VI_TONGA_P_6928:
- case DEVICE_ID_VI_TONGA_P_692B:
- case DEVICE_ID_VI_TONGA_P_692F:
- case DEVICE_ID_VI_TONGA_P_6938:
- case DEVICE_ID_VI_TONGA_P_6939:
- return HSA_TONGA_ID;
- case DEVICE_ID_CZ_9870:
- case DEVICE_ID_CZ_9874:
- case DEVICE_ID_CZ_9875:
- case DEVICE_ID_CZ_9876:
- case DEVICE_ID_CZ_9877:
- return HSA_CARRIZO_ID;
- case DEVICE_ID_VI_ICELAND_M_6900:
- case DEVICE_ID_VI_ICELAND_M_6901:
- case DEVICE_ID_VI_ICELAND_M_6902:
- case DEVICE_ID_VI_ICELAND_M_6903:
- case DEVICE_ID_VI_ICELAND_M_6907:
- return HSA_ICELAND_ID;
- default:
- return HSA_INVALID_DEVICE_ID;
- }
-}
-bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
- online_ = false;
- deviceInfo_ = deviceInfo;
- // Mark the device as GPU type
- info_.type_ = CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD;
- info_.vendorId_ = 0x1002;
-
- settings_ = new Settings();
- oclhsa::Settings* hsaSettings = static_cast(settings_);
- if ((hsaSettings == NULL) ||
- // @Todo sramalin Use double precision from constsant
- !hsaSettings->create((true) & 0x1)) {
- LogError("Error creating settings for NULL HSA device");
- return false;
- }
- // Report the device name
- ::strcpy(info_.name_, deviceInfo_.machineTarget_);
- info_.extensions_ = getExtensionString();
- info_.maxWorkGroupSize_ = hsaSettings->maxWorkGroupSize_;
- ::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
- info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
- std::string driverVersion = AMD_BUILD_STRING;
- driverVersion.append(" (HSA)");
- strcpy(info_.driverVersion_, driverVersion.c_str());
- info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
- return true;
-}
-
-Device::Device(const HsaDevice *bkendDevice)
- : _bkendDevice(bkendDevice), context_(NULL), xferQueue_(NULL)
-{
-}
-
-Device::~Device()
-{
- // Destroy transfer queue
- if (xferQueue_ && xferQueue_->terminate()) {
- delete xferQueue_;
- xferQueue_ = NULL;
- }
-
- if (blitProgram_) {
- delete blitProgram_;
- blitProgram_ = NULL;
- }
-
- if (context_ != NULL) {
- context_->release();
- }
-
- if (info_.extensions_) {
- delete[]info_.extensions_;
- info_.extensions_ = NULL;
- }
-
- if (settings_) {
- delete settings_;
- settings_ = NULL;
- }
-}
-bool NullDevice::initCompiler(bool isOffline) {
- // Initializes g_complibModule and g_complibApi if they were not initialized
- if( g_complibModule == NULL ){
- if (!LoadCompLib(isOffline)) {
- if (!isOffline) {
- LogError("Error - could not find the compiler library");
- }
- return false;
- }
- }
- //Initialize the compiler handle if has already not been initialized
- //This is destroyed in Device::teardown
- acl_error error;
- if (!compilerHandle_) {
- compilerHandle_ = g_complibApi._aclCompilerInit(NULL, &error);
- if (error != ACL_SUCCESS) {
- LogError("Error initializing the compiler handle");
- return false;
- }
- }
- return true;
-}
-
-bool NullDevice::destroyCompiler() {
- if (compilerHandle_ != NULL) {
- acl_error error = g_complibApi._aclCompilerFini(compilerHandle_);
- if (error != ACL_SUCCESS) {
- LogError("Error closing the compiler");
- return false;
- }
- }
- if( g_complibModule != NULL ){
- UnloadCompLib();
- }
- return true;
-}
-
-void NullDevice::tearDown() {
- destroyCompiler();
-}
-bool NullDevice::init() {
- //Initialize the compiler
- if (!initCompiler(offlineDevice_)){
- return false;
- }
- //If there is an HSA enabled device online then skip any offline device
- std::vector devices;
- devices = getDevices(CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD, false);
-
- //Load the offline devices
- //Iterate through the set of available offline devices
- for (uint id = 0; id < sizeof(DeviceInfoTable)/sizeof(AMDDeviceInfo); id++) {
- bool isOnline = false;
- //Check if the particular device is online
- for (unsigned int i=0; i< devices.size(); i++) {
- if (static_cast(devices[i])->deviceInfo_.hsaDeviceId_ ==
- DeviceInfoTable[id].hsaDeviceId_){
- isOnline = true;
- }
- }
- if (isOnline) {
- continue;
- }
- NullDevice* nullDevice = new NullDevice();
- if (!nullDevice->create(DeviceInfoTable[id])) {
- LogError("Error creating new instance of Device.");
- delete nullDevice;
- return false;
- }
- nullDevice->registerDevice();
- }
- return true;
-}
-NullDevice::~NullDevice() {
- if (info_.extensions_) {
- delete[]info_.extensions_;
- info_.extensions_ = NULL;
- }
-
- if (settings_) {
- delete settings_;
- settings_ = NULL;
- }
-}
-bool Device::init() {
- // Assumption: init() will be called by ocl only once at the start of program
- // with a matching tearDown() when program exits.
- // TODO(papte) Check if init(),
- // tearDown(), init(), tearDown() repeat sequence is possible in one session
- // (process lifetime). If so we will be calling LoadLibrary() and
- // FreeLibrary() ifcn the similar repeat sequence. Investigate the effect of
- // this on the HSA Device and Core runtime's initialzers, where the device list
- // is generated in the runtime.
-#ifdef BUILD_STATIC_HSA
- HsaGetCoreApiTable(&hsacoreapi);
- HsaGetServicesApiTable(&servicesapi);
-#else
- bool core_dll_loaded = HsacoreApiSymbols::Instance().IsDllLoaded();
- bool service_dll_loaded = ServicesApiSymbols::Instance().IsDllLoaded();
-
- if (!core_dll_loaded && !service_dll_loaded ) {
- // Both DLLs are not loaded, assume HSA not installed on a non-HSA
- // machine, returning true.
- LogInfo("HSA stack not available.");
- return true; // Return true, indicating nothing is wrong and
- // assuming HSA not installed.
- } else if (core_dll_loaded ^ service_dll_loaded) {
- // If Only one of the two HSA DLLs failed, then its an ERROR.
- LogError("One of the HSA libraies, core or services failed to load.\n");
- return false;
- } else {
- // Both DLLs loaded, continue initializing HSA stack.
- LogInfo("Initializing HSA stack.");
- }
-
- // First thing first, initialize hsacoreapi and servicesapi to call core and
- // services API respectively.
- HsacoreApiSymbols::Instance().HsaGetCoreApiTable(&hsacoreapi);
- ServicesApiSymbols::Instance().HsaGetServicesApiTable(&servicesapi);
-#endif
- isHsaInitialized_ = false;
- if (hsacoreapi->HsaAmdInitialize() != kHsaStatusSuccess) {
- // Either an error in HSA core initialization or
- // KFD not installed on the machine.
- // Return without error, so OpenCL can continue without HSA stack.
- return true;
- }
- isHsaInitialized_ = true;
-
- // Initialize the structure used to configure the
- // behavior of Hsa Runtime
- // TODO (PA) : verify if this ito be called or not.
- // Latest code does not call.
- // SetHsaEnvConfig();
-
- //Initialize the compiler
- if (!initCompiler(offlineDevice_)){
- return false;
- }
-
- const HsaDevice *devices = NULL;
- unsigned num_devices = 0;
-
- // Initialize the Hsa Service layer
- servicesapi->HsaInitServices(128);
-
- HsaStatus status = hsacoreapi->HsaGetDevices(&num_devices, &devices);
- if (status != kHsaStatusSuccess) {
- LogPrintfError(
- "in %s(), Call to newcore HsaGetDevices() failed, HsaStatus: %d",
- __FUNCTION__, status);
- return false;
- }
-
- for (unsigned int i = 0; i < num_devices; i++) {
- Device *oclhsa_device = new Device(&devices[i]);
- if (!oclhsa_device) {
- LogError("Error creating new instance of Device on then heap.");
- return false;
- }
- HsaDeviceId deviceId = getHsaDeviceId(&devices[i]);
- if (deviceId == HSA_INVALID_DEVICE_ID) {
- LogError(" Invalid HSA device");
- return false;
- }
- //Find device id in the table
- unsigned sizeOfTable = sizeof(DeviceInfoTable)/sizeof(AMDDeviceInfo);
- uint id;
- for (id = 0; id < sizeOfTable; id++) {
- if (DeviceInfoTable[id].hsaDeviceId_ == deviceId){
- break;
- }
- }
- //If the AmdDeviceInfo for the HsaDevice Id could not be found return false
- if (id == sizeOfTable) {
- return false;
- }
- oclhsa_device->deviceInfo_ = DeviceInfoTable[id];
-
- if (!oclhsa_device->mapHSADeviceToOpenCLDevice(&devices[i])) {
- LogError("Failed mapping of HsaDevice to Device.");
- return false;
- }
-
- if (!oclhsa_device->create()) {
- LogError("Error creating new instance of Device.");
- return false;
- }
- oclhsa_device->registerDevice(); // no return code for this function
- }
- return true;
-}
-
-void
-Device::tearDown()
-{
- if (isHsaInitialized_) {
- if (servicesapi != NULL && servicesapi->HsaDestroyServices != NULL) {
- servicesapi->HsaDestroyServices();
- }
- hsacoreapi->HsaAmdShutdown();
- }
- NullDevice::tearDown();
- HsacoreApiSymbols::teardown();
- ServicesApiSymbols::teardown();
-}
-
-bool
-Device::create()
-{
- amd::Context::Info info = {0};
- std::vector devices;
- devices.push_back(this);
-
- // Create a dummy context
- context_ = new amd::Context(devices, info);
- if (context_ == NULL) {
- return false;
- }
-
- blitProgram_ = new BlitProgram(context_);
- // Create blit programs
- if (blitProgram_ == NULL || !blitProgram_->create(this)) {
- delete blitProgram_;
- blitProgram_ = NULL;
- LogError("Couldn't create blit kernels!");
- return false;
- }
-
- return true;
-}
-
-oclhsa::Memory*
-Device::getOclHsaMemory(amd::Memory* mem) const
-{
- return static_cast(mem->getDeviceMemory(*this));
-}
-
-device::Program*
-NullDevice::createProgram(bool hsail) {
- return new oclhsa::FSAILProgram(*this);
-}
-
-device::Program*
-Device::createProgram(bool hsail) {
- return new oclhsa::FSAILProgram(*this);
-}
-
-cl_device_svm_capabilities
-Device::getSvmCapabilities(const HsaDevice* device)
-{
- // KV supports all types of SVM
- if (device->device_id >= DEVICE_ID_SPECTRE_MOBILE &&
- device->device_id <= DEVICE_ID_SPECTRE_EMBEDDED_131C) {
-
- cl_bitfield atomics = CL_DEVICE_SVM_ATOMICS;
- // Atomics are allowed in 32 bits if a environment variable is set
- if (Is32Bits() && !settings().enableSvm32BitsAtomics_) {
- atomics = 0;
- }
- return CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
- CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
- CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
- atomics;
- }
- // Devices such as Bonaire enable some HSA features but they do not include
- // CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (because of addresses above 2^40) or
- // CL_DEVICE_SVM_ATOMICS capabilities.
- return CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
- CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
-}
-
-bool
-Device::mapHSADeviceToOpenCLDevice(const HsaDevice *dev)
-{
- // Create HSA settings
- settings_ = new Settings();
- oclhsa::Settings* hsaSettings = static_cast(settings_);
- if ((hsaSettings == NULL) ||
- !hsaSettings->create((dev->is_double_precision) & 0x1)) {
- return false;
- }
- // Report the device name
- ::strcpy(info_.name_, deviceInfo_.machineTarget_);
- strcpy(info_.boardName_, dev->device_name);
-
- if (dev->number_cache_descriptors != 0) {
- HsaCacheDescriptor* cacheDesc = dev->cache_descriptors;
- info_.globalMemCacheLineSize_ = cacheDesc->cache_line_size;
- info_.globalMemCacheSize_ = cacheDesc->cache_size * Ki;
-
- info_.globalMemCacheType_ = (cacheDesc->cache_type.value == 0) ?
- CL_NONE : CL_READ_WRITE_CACHE;
- }
- else {
- info_.globalMemCacheType_ = CL_NONE;
- info_.globalMemCacheLineSize_ = 0;
- info_.globalMemCacheSize_ = 0;
- }
-
- // Map HSA device types to OCL device types.
- // if (dev->device_type == kHsaDeviceTypeThroughput)
- info_.type_ = CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD;
-
- info_.maxComputeUnits_ = dev->number_compute_units;
- info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
- info_.deviceTopology_.pcie.bus = (dev->location_id&(0xFF<<8))>>8;
- info_.deviceTopology_.pcie.device = (dev->location_id&(0x1F<<3))>>3;
- info_.deviceTopology_.pcie.function = (dev->location_id&0x07);
- info_.extensions_ = getExtensionString();
- info_.nativeVectorWidthDouble_ =
- info_.preferredVectorWidthDouble_ = (settings().doublePrecision_) ? 1 : 0;
-
- info_.maxWorkGroupSize_ = dev->wave_front_size * dev->max_waves_per_simd;
- info_.maxClockFrequency_ = dev->max_clock_rate_of_f_compute;
- //info_.imageSupport_ = dev->is_image_support;
- info_.imageSupport_ = false;
-
- info_.localMemSizePerCU_ = dev->group_memory_size;
-
- if (populateOCLDeviceConstants() == false) {
- return false;
- }
-
- // Populate the single config setting.
- info_.singleFPConfig_ = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
- CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_FMA;
-
- if (hsaSettings->doublePrecision_) {
- info_.doubleFPConfig_ = info_.singleFPConfig_ | CL_FP_DENORM;
- info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
- }
-
- info_.svmCapabilities_ = getSvmCapabilities(dev);
- info_.preferredPlatformAtomicAlignment_ = 0;
- info_.preferredGlobalAtomicAlignment_ = 0;
- info_.preferredLocalAtomicAlignment_ = 0;
-
- return true;
-}
-
-static bool
-isFrameBufferDescriptor(HsaMemoryDescriptor &desc)
-{
- return (desc.heap_type == kHsaHeapTypeFrameBufferPrivate);
-}
-
-bool
-Device::populateOCLDeviceConstants()
-{
- info_.available_ = true;
- /*info_.maxWorkGroupSize_ = 256;*/
- info_.maxWorkItemDimensions_ = 3;
-
- // Get frame buffer memory descriptor.
- HsaMemoryDescriptor *memDescBegin = _bkendDevice->memory_descriptors;
- HsaMemoryDescriptor *memDescEnd =
- memDescBegin + _bkendDevice->number_memory_descriptors;
- HsaMemoryDescriptor *hsaFbDesc =
- std::find_if(memDescBegin, memDescEnd, isFrameBufferDescriptor);
-
- if ((hsaFbDesc != memDescEnd) && (hsaFbDesc->size_in_bytes > 0)) {
- // Device local memory exists. Populate OpenCL info field with
- // attributes of HSA GPU local memory descriptor.
- info_.globalMemSize_ = hsaFbDesc->size_in_bytes;
-
- info_.maxMemAllocSize_ =
- std::max(std::min(cl_ulong(1 * Gi), info_.globalMemSize_ / 4),
- cl_ulong(128 * Mi));
-
- // Make sure the max allocation size is not larger than the available
- // memory size.
- info_.maxMemAllocSize_ =
- std::min(info_.maxMemAllocSize_, info_.globalMemSize_);
- }
- else {
- // The HSA device backend does not have local memory, so we use system
- // memory as default.
- info_.globalMemSize_ = Os::getPhysicalMemSize();
- if (info_.globalMemSize_ == 0) {
- return false;
- }
-
- // Cap global memory
-#if defined (_LP64)
- // Cap at 8TiB for 64-bit
- const cl_ulong maxGlobalMemSize = 8ULL * Ki * Gi;
-#elif defined (_WIN32)
- // Cap at 2GiB (see http://msdn.microsoft.com/en-us/library/aa366778.aspx)
- const cl_ulong maxGlobalMemSize = 2ULL * Gi;
-#else // linux
- // Cap at 3.5GiB
- const cl_ulong maxGlobalMemSize = 3584ULL * Mi;
-#endif
- info_.globalMemSize_ = std::min(info_.globalMemSize_, maxGlobalMemSize);
-
- info_.maxMemAllocSize_ =
- info_.globalMemSize_ * CPU_MAX_ALLOC_PERCENT / 100;
- if (flagIsDefault(CPU_MAX_ALLOC_PERCENT)) {
- const cl_ulong minAllocSize = LP64_SWITCH(1ULL * Gi, 2ULL * Gi);
- info_.maxMemAllocSize_ = std::max(info_.maxMemAllocSize_,
- std::min(info_.globalMemSize_, minAllocSize));
- }
- }
-
- /*make sure we don't run anything over 8 params for now*/
- info_.maxParameterSize_ = 1024; // [TODO]: CAL stack values: 1024*
- // constant
- info_.maxWorkItemSizes_[0] = 256;
- info_.maxWorkItemSizes_[1] = 256;
- info_.maxWorkItemSizes_[2] = 256;
-
- info_.nativeVectorWidthChar_ = info_.preferredVectorWidthChar_ = 4;
- info_.nativeVectorWidthShort_ = info_.preferredVectorWidthShort_ = 2;
- info_.nativeVectorWidthInt_ = info_.preferredVectorWidthInt_ = 1;
- info_.nativeVectorWidthLong_ = info_.preferredVectorWidthLong_ = 1;
- info_.nativeVectorWidthFloat_ = info_.preferredVectorWidthFloat_ = 1;
-
- info_.localMemSize_ = 32 * 1024;
- info_.hostUnifiedMemory_ = CL_TRUE;
- info_.memBaseAddrAlign_ = 8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ?
- sizeof(cl_long16) : MEMOBJ_BASE_ADDR_ALIGN);
- info_.minDataTypeAlignSize_ = sizeof(cl_long16);
-
- info_.maxConstantArgs_ = 8;
- info_.maxConstantBufferSize_ = 64 * 1024;
- info_.localMemType_ = CL_LOCAL;
- info_.errorCorrectionSupport_ = false;
- info_.profilingTimerResolution_ = 1;
- info_.littleEndian_ = true;
- info_.compilerAvailable_ = true;
- info_.executionCapabilities_ = CL_EXEC_KERNEL;
- info_.queueProperties_ = CL_QUEUE_PROFILING_ENABLE;
- info_.platform_ = AMD_PLATFORM;
- info_.profile_ = "FULL_PROFILE";
- strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
-
- info_.addressBits_ = LP64_SWITCH(32, 64);
- info_.maxSamplers_ = 16;
- info_.maxReadImageArgs_ = 128;
- info_.maxWriteImageArgs_ = 8;
- info_.maxReadWriteImageArgs_ = 64;
- info_.image2DMaxWidth_ = 16 * 1024;
- info_.image2DMaxHeight_ = 16 * 1024;
- info_.image3DMaxWidth_ = 2 * 1024;
- info_.image3DMaxHeight_ = 2 * 1024;
- info_.image3DMaxDepth_ = 2 * 1024;
- info_.imageMaxArraySize_ = 2 * 1024;
- info_.imageMaxBufferSize_ = 64 * 1024;
- info_.imagePitchAlignment_ = 256;
- info_.imageBaseAddressAlignment_ = 256;
- info_.imageMaxArraySize_ = 2048;
- info_.imageMaxBufferSize_ = 65536;
- info_.bufferFromImageSupport_ = CL_TRUE;
- info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
- std::string driverVersion = AMD_BUILD_STRING;
- driverVersion.append(" (HSA)");
- strcpy(info_.driverVersion_, driverVersion.c_str());
- info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
-
- info_.builtInKernels_ = "";
- info_.linkerAvailable_ = true;
- info_.preferredInteropUserSync_ = true;
- info_.printfBufferSize_ = 1000 * 1024;
- info_.vendorId_ = 0x1002; // from gpudevice
-
- info_.maxGlobalVariableSize_ = static_cast(info_.maxMemAllocSize_);
- info_.globalVariablePreferredTotalSize_ =
- static_cast(info_.globalMemSize_);
- return true;
-}
-
-device::VirtualDevice*
-Device::createVirtualDevice(amd::CommandQueue* queue)
-{
- bool interopQueue = (queue != NULL) &&
- (0 != (queue->context().info().flags_ &
- (amd::Context::GLDeviceKhr |
- amd::Context::D3D10DeviceKhr |
- amd::Context::D3D11DeviceKhr)));
-
- // Initialization of heap and other resources occur during the command
- // queue creation time.
- HsaQueueType type = kHsaQueueTypeCompute;
- if (interopQueue) {
- type = kHsaQueueTypeInterop;
- }
-
- VirtualGPU *virtualDevice = new VirtualGPU(*this);
-
- if (!virtualDevice->create(type)) {
- delete virtualDevice;
- virtualDevice = NULL;
- }
-
- return virtualDevice;
-}
-
-bool
-Device::globalFreeMemory(size_t *freeMemory) const
-{
- return false;
-}
-
-bool
-Device::bindExternalDevice(
- intptr_t type,
- void* gfxDevice,
- void* gfxContext,
- bool validateOnly)
-{
- switch (type) {
-#ifdef _WIN32
- case CL_CONTEXT_D3D10_DEVICE_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaBeginD3D10Interop(
- _bkendDevice, reinterpret_cast(gfxDevice))) {
- LogError("Failed HsaBeginD3D10Interop()");
- return false;
- }
- break;
- case CL_CONTEXT_D3D11_DEVICE_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaBeginD3D11Interop(
- _bkendDevice, reinterpret_cast(gfxDevice))) {
- LogError("Failed HsaBeginD3D11Interop()");
- return false;
- }
- break;
-#endif // _WIN32
- case CL_GL_CONTEXT_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaBeginGLInterop(
- _bkendDevice, reinterpret_cast(gfxContext))) {
- LogError("Failed HsaBeginGLInterop()");
- return false;
- }
- break;
- default:
- LogError("Unknown external device!");
- return false;
- }
-
- if (validateOnly) {
- return unbindExternalDevice(type, gfxDevice, gfxContext, validateOnly);
- }
- return true;
-}
-
-bool
-Device::unbindExternalDevice(
- intptr_t type,
- void* gfxDevice,
- void* gfxContext,
- bool validateOnly)
-{
- switch (type) {
-#ifdef _WIN32
- case CL_CONTEXT_D3D10_DEVICE_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaEndD3D10Interop(
- _bkendDevice, reinterpret_cast(gfxDevice))) {
- LogError("Failed HsaEndD3D10Interop()");
- return false;
- }
- break;
- case CL_CONTEXT_D3D11_DEVICE_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaEndD3D11Interop(
- _bkendDevice, reinterpret_cast(gfxDevice))) {
- LogError("Failed HsaEndD3D11Interop()");
- return false;
- }
- break;
-#endif // _WIN32
- case CL_GL_CONTEXT_KHR:
- if (kHsaStatusSuccess != hsacoreapi->HsaEndGLInterop(
- _bkendDevice, reinterpret_cast(gfxContext))) {
- LogError("Failed HsaEndGLInterop()");
- return false;
- }
- break;
- default:
- LogError("Unknown external device!");
- return false;
- }
-
- return true;
-}
-
-device::Memory*
-Device::createMemory(amd::Memory &owner) const
-{
- oclhsa::Memory* memory = NULL;
-
- if (owner.asBuffer()) {
- memory = new oclhsa::Buffer(*this, owner);
- }
- else if (owner.asImage()) {
- memory = new oclhsa::Image(*this, owner);
- }
- else {
- LogError("Unknown memory type");
- }
-
- if (memory == NULL) {
- return NULL;
- }
-
- bool result = false;
- if (owner.isInterop() && (owner.parent() == NULL)) {
- result = memory->createInterop();
- }
- else {
- result = memory->create();
- }
-
- if (!result) {
- delete memory;
- return NULL;
- }
-
- if (!memory->isHostMemDirectAccess() && owner.asImage() &&
- owner.parent() == NULL &&
- (owner.getMemFlags() &
- (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR))) {
- // To avoid recurssive call to Device::createMemory, we perform
- // data transfer to the view of the image.
- amd::Image *imageView =
- owner.asImage()->createView(
- owner.getContext(), owner.asImage()->getImageFormat(), xferQueue());
-
- if (imageView == NULL) {
- LogError("[OCL] Fail to allocate view of image object");
- return NULL;
- }
-
- Image* devImageView =
- new oclhsa::Image(static_cast(*this), *imageView);
- if (devImageView == NULL) {
- LogError("[OCL] Fail to allocate device mem object for the view");
- imageView->release();
- return NULL;
- }
-
- if (devImageView != NULL &&
- !devImageView->createView(static_cast(*memory))) {
- LogError("[OCL] Fail to create device mem object for the view");
- delete devImageView;
- imageView->release();
- return NULL;
- }
-
- imageView->replaceDeviceMemory(this, devImageView);
-
- result = xferMgr().writeImage(
- owner.getHostMem(),
- *devImageView,
- amd::Coord3D(0),
- imageView->getRegion(),
- imageView->getRowPitch(),
- imageView->getSlicePitch(),
- true);
-
- imageView->release();
- }
-
- if (!result) {
- delete memory;
- return NULL;
- }
-
- return memory;
-}
-
-void*
-Device::hostAlloc(size_t size, size_t alignment, bool atomics) const
-{
- void* ret;
- alignment = std::max(alignment, static_cast(info_.memBaseAddrAlign_));
- assert(amd::isMultipleOf(alignment, info_.memBaseAddrAlign_));
- HsaAmdSystemMemoryType type = amd::Is64Bits() && atomics
- ? kHsaAmdSystemMemoryTypeCoherent : kHsaAmdSystemMemoryTypeDefault;
- hsacoreapi->HsaAmdAllocateSystemMemory(size, alignment, type, &ret);
- return ret;
-}
-
-void
-Device::hostFree(void* ptr, size_t size) const
-{
- hsacoreapi->HsaAmdFreeSystemMemory(ptr);
-}
-
-void*
-Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const
-{
- bool atomics = (flags & CL_MEM_SVM_ATOMICS) != 0;
- return hostAlloc(size, alignment, atomics);
-}
-
-void
-Device::svmFree(void* ptr) const
-{
- hostFree(ptr);
-}
-
-VirtualGPU*
-Device::xferQueue() const
-{
- if (!xferQueue_) {
- // Create virtual device for internal memory transfer
- Device* thisDevice = const_cast(this);
- thisDevice->xferQueue_ = reinterpret_cast(
- thisDevice->createVirtualDevice());
- if (!xferQueue_) {
- LogError("Couldn't create the device transfer manager!");
- }
- }
- return xferQueue_;
-}
-
-}
-#endif // WITHOUT_FSA_BACKEND
diff --git a/rocclr/runtime/device/hsa/hsadevice.hpp b/rocclr/runtime/device/hsa/hsadevice.hpp
deleted file mode 100644
index 30cfc9fcf5..0000000000
--- a/rocclr/runtime/device/hsa/hsadevice.hpp
+++ /dev/null
@@ -1,334 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSADEVICE_HPP_
-#define _OPENCL_RUNTIME_DEVICE_HSA_HSADEVICE_HPP_
-
-#ifndef WITHOUT_FSA_BACKEND
-
-#include "top.hpp"
-#include "device/device.hpp"
-#include "platform/command.hpp"
-#include "platform/program.hpp"
-#include "platform/perfctr.hpp"
-#include "platform/memory.hpp"
-#include "utils/concurrent.hpp"
-#include "thread/thread.hpp"
-#include "thread/monitor.hpp"
-#include "utils/versions.hpp"
-#include "aclTypes.h"
-
-#include "device/hsa/hsasettings.hpp"
-#include "device/hsa/hsavirtual.hpp"
-#include "device/hsa/hsadefs.hpp"
-
-#include "newcore.h"
-
-#include
-
-// extern hsa::Runtime* g_hsaruntime;
-
-/*! \addtogroup HSA
- * @{
- */
-
-//! HSA Device Implementation
-namespace oclhsa {
-
-/**
- * @brief List of environment variables that could be used to
- * configure the behavior of Hsa Runtime
- */
-#define ENVVAR_HSA_POLL_KERNEL_COMPLETION "HSA_POLL_COMPLETION"
-
-//! Forward declarations
-class Command;
-class Device;
-class GpuCommand;
-class Heap;
-class HeapBlock;
-class Program;
-class Kernel;
-class Memory;
-class Resource;
-class VirtualDevice;
-class PrintfDbg;
-
-//A NULL Device type used only for offline compilation
-// Only functions that are used for compilation will be in this device
-class NullDevice : public amd::Device {
-public:
- //! constructor
- NullDevice(){};
-
- //!create the device
- bool create(const AMDDeviceInfo& deviceInfo);
-
- //! Initialise all the offline devices that can be used for compilation
- static bool init();
- //! Teardown for offline devices
- static void tearDown();
-
- //! Destructor for the Null device
- virtual ~NullDevice();
-
- aclCompiler *compiler() const { return compilerHandle_; }
-
- //! Construct an HSAIL program object from the ELF assuming it is valid
- virtual device::Program *createProgram(bool hsail = false);
-
- const AMDDeviceInfo& deviceInfo() const {
- return deviceInfo_;
- }
- //! Gets the backend device for the NULL device type
- virtual const HsaDevice* getBackendDevice() const {
- ShouldNotReachHere();
- return NULL;
- }
-
- //List of dummy functions which are disabled for NullDevice
-
- //! Create sub-devices according to the given partition scheme.
- virtual cl_int createSubDevices(
- device::CreateSubDevicesInfo& create_info,
- cl_uint num_entries,
- cl_device_id* devices,
- cl_uint* num_devices) {
- ShouldNotReachHere();
- return CL_INVALID_VALUE; };
-
- //! Create a new virtual device environment.
- virtual device::VirtualDevice* createVirtualDevice(
- amd::CommandQueue* queue = NULL) { return NULL; }
-
- virtual bool registerSvmMemory(void* ptr, size_t size) const {
- ShouldNotReachHere();
- return false;
- }
-
- virtual void deregisterSvmMemory(void* ptr) const {
- ShouldNotReachHere();
- }
-
- //! Just returns NULL for the dummy device
- virtual device::Memory* createMemory(amd::Memory& owner) const {
- ShouldNotReachHere();
- return NULL; }
-
- //! Sampler object allocation
- virtual bool createSampler(
- const amd::Sampler& owner, //!< abstraction layer sampler object
- device::Sampler** sampler //!< device sampler object
- ) const
- {
- ShouldNotReachHere();
- return true;
- }
-
- //! Just returns NULL for the dummy device
- virtual device::Memory* createView(
- amd::Memory& owner, //!< Owner memory object
- const device::Memory& parent //!< Parent device memory object for the view
- ) const {
- ShouldNotReachHere();
- return NULL;
- }
-
- //! Just returns NULL for the dummy device
- virtual void* svmAlloc(
- amd::Context& context, //!< The context used to create a buffer
- size_t size, //!< size of svm spaces
- size_t alignment, //!< alignment requirement of svm spaces
- cl_svm_mem_flags flags, //!< flags of creation svm spaces
- void* svmPtr //!< existing svm pointer for mGPU case
- ) const {
- ShouldNotReachHere();
- return NULL;
- }
-
- //! Just returns NULL for the dummy device
- virtual void svmFree(
- void* ptr //!< svm pointer needed to be freed
- ) const {
- ShouldNotReachHere();
- return;
- }
-
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory& owner) const {
- ShouldNotReachHere();
- return false;
- }
-
- //! Acquire external graphics API object in the host thread
- //! Needed for OpenGL objects on CPU device
-
- virtual bool bindExternalDevice(
- intptr_t type, void* pDevice, void* pContext, bool validateOnly) {
- ShouldNotReachHere();
- return false;
- }
-
- virtual bool unbindExternalDevice(
- intptr_t type, void* pDevice, void* pContext, bool validateOnly) {
- ShouldNotReachHere();
- return false;
- }
-
- //! Releases non-blocking map target memory
- virtual void freeMapTarget(amd::Memory& mem, void* target) { ShouldNotReachHere();}
-
- //! Empty implementation on Null device
- virtual bool globalFreeMemory(size_t* freeMemory) const {
- ShouldNotReachHere();
- return false;
- }
-
-protected:
- //! Initialize compiler instance and handle
- static bool initCompiler(bool isOffline);
- //! destroy compiler instance and handle
- static bool destroyCompiler();
- //! Handle to the the compiler
- static aclCompiler* compilerHandle_;
- //! Device Id for an HsaDevice
- AMDDeviceInfo deviceInfo_;
-private:
- static const bool offlineDevice_;
-};
-
-//! A HSA device ordinal (physical HSA device)
-class Device : public NullDevice {
-public:
- //! Initialise the whole HSA device subsystem (CAL init, device enumeration, etc).
- static bool init();
- static void tearDown();
-
- static bool loadHsaModules();
-
- bool create();
-
- //! Construct a new physical HSA device
- Device(const HsaDevice *bkendDevice);
- virtual const HsaDevice *getBackendDevice() const
- {
- return (_bkendDevice);
- }
-
- //! Destructor for the physical HSA device
- virtual ~Device();
-
- bool mapHSADeviceToOpenCLDevice(const HsaDevice *hsadevice);
-
- // Temporary, delete it later when HSA Runtime and KFD is fully fucntional.
- void fake_device();
-
- ///////////////////////////////////////////////////////////////////////////////
- // TODO: Below are all mocked up virtual functions from amd::Device, they may
- // need real implementation.
- ///////////////////////////////////////////////////////////////////////////////
-
-// #ifdef cl_ext_device_fission
- //! Create sub-devices according to the given partition scheme.
- virtual cl_int createSubDevices(
- device::CreateSubDevicesInfo &create_inf,
- cl_uint num_entries,
- cl_device_id *devices,
- cl_uint *num_devices)
- { return CL_INVALID_VALUE; }
-// #endif // cl_ext_device_fission
-
- // bool Device::create(CALuint ordinal);
-
- //! Instantiate a new virtual device
- virtual device::VirtualDevice *createVirtualDevice(
- amd::CommandQueue* queue = NULL);
-
- //! Construct an HSAIL program object from the ELF assuming it is valid
- virtual device::Program *createProgram(bool hsail = false);
-
- virtual device::Memory *createMemory(amd::Memory &owner) const;
-
- //! Sampler object allocation
- virtual bool createSampler(
- const amd::Sampler& owner, //!< abstraction layer sampler object
- device::Sampler** sampler //!< device sampler object
- ) const
- {
- //! \todo HSA team has to implement sampler allocation
- *sampler = NULL;
- return true;
- }
-
-
- //! Just returns NULL for the dummy device
- virtual device::Memory *createView(
- amd::Memory &owner, //!< Owner memory object
- const device::Memory &parent //!< Parent device memory object for the view
- ) const { return NULL; }
-
- //! Reallocates the provided buffer object
- virtual bool reallocMemory(amd::Memory &owner) const {return true; }
-
- //! Acquire external graphics API object in the host thread
- //! Needed for OpenGL objects on CPU device
- virtual bool bindExternalDevice(
- intptr_t type, void *pDevice, void *pContext, bool validateOnly);
-
- /**
- * @brief Removes the external device as an available device.
- *
- * @note: The current implementation is to avoid build break
- * and does not represent actual / correct implementation. This
- * needs to be done.
- */
- bool unbindExternalDevice(
- intptr_t type, //!< Enum val. for ext.API type: GL, D3D10, etc.
- void *gfxDevice, //!< D3D device do D3D, HDC/Display handle of X Window for GL
- void *gfxContext, //!< HGLRC/GLXContext handle
- bool validateOnly //!< Only validate if the device can inter-operate with
- //!< pDevice/pContext, do not bind.
- );
-
- //! Gets free memory on a GPU device
- virtual bool globalFreeMemory(size_t *freeMemory) const;
-
- virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
-
- virtual void hostFree(void* ptr, size_t size = 0) const;
-
- virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, void* svmPtr = NULL) const;
-
- virtual void svmFree(void* ptr) const;
-
- //! Returns a OCLHSA memory object from AMD memory object
- oclhsa::Memory* getOclHsaMemory(
- amd::Memory* mem //!< Pointer to AMD memory object
- ) const;
-
- const Settings &settings() const { return reinterpret_cast(*settings_); }
-
- //! Returns transfer engine object
- const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr();}
-
-private:
- bool populateOCLDeviceConstants();
-
- cl_device_svm_capabilities getSvmCapabilities(const HsaDevice* device);
-
- VirtualGPU* xferQueue() const;
-
- static bool isHsaInitialized_;
- const HsaDevice *_bkendDevice;
- static const bool offlineDevice_;
- amd::Context *context_; //!< A dummy context for internal data transfer
- VirtualGPU *xferQueue_; //!< Transfer queue, created on demand
-}; // class oclhsa::Device
-} // namespace oclhsa
-
-/**
- * @}
- */
-#endif /*WITHOUT_FSA_BACKEND*/
-#endif /*HSA_HPP_*/
diff --git a/rocclr/runtime/device/hsa/hsakernel.cpp b/rocclr/runtime/device/hsa/hsakernel.cpp
deleted file mode 100644
index 844d28f646..0000000000
--- a/rocclr/runtime/device/hsa/hsakernel.cpp
+++ /dev/null
@@ -1,573 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#include "device/hsa/hsakernel.hpp"
-
-#include "device/hsa/oclhsa_common.hpp"
-
-#include
-
-#ifndef WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-
-inline static HSAIL_ARG_TYPE
-GetHSAILArgType(const aclArgData* argInfo)
-{
- switch (argInfo->type) {
- case ARG_TYPE_POINTER:
- return HSAIL_ARGTYPE_POINTER;
- case ARG_TYPE_VALUE:
- return HSAIL_ARGTYPE_VALUE;
- case ARG_TYPE_IMAGE:
- return HSAIL_ARGTYPE_IMAGE;
- case ARG_TYPE_SAMPLER:
- return HSAIL_ARGTYPE_SAMPLER;
- case ARG_TYPE_ERROR:
- default:
- return HSAIL_ARGTYPE_ERROR;
- }
-}
-
-inline static size_t
-GetHSAILArgAlignment(const aclArgData* argInfo)
-{
- switch (argInfo->type) {
- case ARG_TYPE_POINTER:
- return argInfo->arg.pointer.align;
- default:
- return 1;
- }
-}
-
-inline static HSAIL_ADDRESS_QUALIFIER
-GetHSAILAddrQual(const aclArgData* argInfo)
-{
- if (argInfo->type == ARG_TYPE_POINTER) {
- switch (argInfo->arg.pointer.memory) {
- case PTR_MT_CONSTANT_EMU:
- case PTR_MT_CONSTANT:
- case PTR_MT_UAV:
- case PTR_MT_GLOBAL:
- return HSAIL_ADDRESS_GLOBAL;
- case PTR_MT_LDS_EMU:
- case PTR_MT_LDS:
- return HSAIL_ADDRESS_LOCAL;
- case PTR_MT_ERROR:
- default:
- LogError("Unsupported address type");
- return HSAIL_ADDRESS_ERROR;
- }
- }
- else if ((argInfo->type == ARG_TYPE_IMAGE) ||
- (argInfo->type == ARG_TYPE_SAMPLER)) {
- return HSAIL_ADDRESS_GLOBAL;
- }
- return HSAIL_ADDRESS_ERROR;
-}
-
-/* f16 returns f32 - workaround due to comp lib */
-inline static HSAIL_DATA_TYPE
-GetHSAILDataType(const aclArgData* argInfo)
-{
- aclArgDataType dataType;
-
- if (argInfo->type == ARG_TYPE_POINTER) {
- dataType = argInfo->arg.pointer.data;
- }
- else if (argInfo->type == ARG_TYPE_VALUE) {
- dataType = argInfo->arg.value.data;
- }
- else {
- return HSAIL_DATATYPE_ERROR;
- }
- switch (dataType) {
- case DATATYPE_i1:
- return HSAIL_DATATYPE_B1;
- case DATATYPE_i8:
- return HSAIL_DATATYPE_S8;
- case DATATYPE_i16:
- return HSAIL_DATATYPE_S16;
- case DATATYPE_i32:
- return HSAIL_DATATYPE_S32;
- case DATATYPE_i64:
- return HSAIL_DATATYPE_S64;
- case DATATYPE_u8:
- return HSAIL_DATATYPE_U8;
- case DATATYPE_u16:
- return HSAIL_DATATYPE_U16;
- case DATATYPE_u32:
- return HSAIL_DATATYPE_U32;
- case DATATYPE_u64:
- return HSAIL_DATATYPE_U64;
- case DATATYPE_f16:
- return HSAIL_DATATYPE_F32;
- case DATATYPE_f32:
- return HSAIL_DATATYPE_F32;
- case DATATYPE_f64:
- return HSAIL_DATATYPE_F64;
- case DATATYPE_struct:
- return HSAIL_DATATYPE_STRUCT;
- case DATATYPE_opaque:
- return HSAIL_DATATYPE_OPAQUE;
- case DATATYPE_ERROR:
- default:
- return HSAIL_DATATYPE_ERROR;
- }
-}
-
-// returns size in number of bytes
-inline static int
-GetHSAILArgSize(const aclArgData *argInfo)
-{
- switch (argInfo->type) {
- case ARG_TYPE_VALUE:
- switch (GetHSAILDataType(argInfo)) {
- case HSAIL_DATATYPE_B1:
- return 1;
- case HSAIL_DATATYPE_B8:
- case HSAIL_DATATYPE_S8:
- case HSAIL_DATATYPE_U8:
- return 1;
- case HSAIL_DATATYPE_B16:
- case HSAIL_DATATYPE_U16:
- case HSAIL_DATATYPE_S16:
- case HSAIL_DATATYPE_F16:
- return 2;
- case HSAIL_DATATYPE_B32:
- case HSAIL_DATATYPE_U32:
- case HSAIL_DATATYPE_S32:
- case HSAIL_DATATYPE_F32:
- return 4;
- case HSAIL_DATATYPE_B64:
- case HSAIL_DATATYPE_U64:
- case HSAIL_DATATYPE_S64:
- case HSAIL_DATATYPE_F64:
- return 8;
- case HSAIL_DATATYPE_STRUCT:
- return argInfo->arg.value.numElements;
- default:
- return -1;
- }
- case ARG_TYPE_POINTER:
- case ARG_TYPE_IMAGE:
- case ARG_TYPE_SAMPLER:
- return sizeof(void*);
- default:
- return -1;
- }
-}
-
-inline static clk_value_type_t
-GetOclType(const aclArgData* argInfo)
-{
- static const clk_value_type_t ClkValueMapType[6][6] = {
- { T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
- { T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
- { T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
- { T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
- { T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
- { T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
- };
-
- uint sizeType;
- if ((argInfo->type == ARG_TYPE_POINTER) || (argInfo->type == ARG_TYPE_IMAGE)) {
- return T_POINTER;
- }
- else if (argInfo->type == ARG_TYPE_VALUE) {
- switch (argInfo->arg.value.data) {
- case DATATYPE_i8:
- case DATATYPE_u8:
- sizeType = 0;
- break;
- case DATATYPE_i16:
- case DATATYPE_u16:
- sizeType = 1;
- break;
- case DATATYPE_i32:
- case DATATYPE_u32:
- sizeType = 2;
- break;
- case DATATYPE_i64:
- case DATATYPE_u64:
- sizeType = 3;
- break;
- case DATATYPE_f16:
- case DATATYPE_f32:
- sizeType = 4;
- break;
- case DATATYPE_f64:
- sizeType = 5;
- break;
- default:
- return T_VOID;
- }
- switch (argInfo->arg.value.numElements) {
- case 1: return ClkValueMapType[sizeType][0];
- case 2: return ClkValueMapType[sizeType][1];
- case 3: return ClkValueMapType[sizeType][2];
- case 4: return ClkValueMapType[sizeType][3];
- case 8: return ClkValueMapType[sizeType][4];
- case 16: return ClkValueMapType[sizeType][5];
- default: return T_VOID;
- }
- }
- else if (argInfo->type == ARG_TYPE_SAMPLER) {
- return T_SAMPLER;
- }
- else {
- return T_VOID;
- }
-}
-
-inline static cl_kernel_arg_address_qualifier
-GetOclAddrQual(const aclArgData* argInfo)
-{
- if (argInfo->type == ARG_TYPE_POINTER) {
- switch (argInfo->arg.pointer.memory) {
- case PTR_MT_UAV:
- case PTR_MT_GLOBAL:
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- case PTR_MT_CONSTANT:
- case PTR_MT_UAV_CONSTANT:
- case PTR_MT_CONSTANT_EMU:
- return CL_KERNEL_ARG_ADDRESS_CONSTANT;
- case PTR_MT_LDS_EMU:
- case PTR_MT_LDS:
- return CL_KERNEL_ARG_ADDRESS_LOCAL;
- default:
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
- }
- }
- else if (argInfo->type == ARG_TYPE_IMAGE) {
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- }
- //default for all other cases
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
-}
-
-inline static cl_kernel_arg_access_qualifier
-GetOclAccessQual(const aclArgData* argInfo)
-{
- if (argInfo->type == ARG_TYPE_IMAGE) {
- switch (argInfo->arg.image.type) {
- case ACCESS_TYPE_RO:
- return CL_KERNEL_ARG_ACCESS_READ_ONLY;
- case ACCESS_TYPE_WO:
- return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
- case ACCESS_TYPE_RW:
- return CL_KERNEL_ARG_ACCESS_READ_WRITE;
- default:
- return CL_KERNEL_ARG_ACCESS_NONE;
- }
- }
- return CL_KERNEL_ARG_ACCESS_NONE;
-}
-
-inline static cl_kernel_arg_type_qualifier
-GetOclTypeQual(const aclArgData* argInfo)
-{
- cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
- if (argInfo->type == ARG_TYPE_POINTER) {
- if (argInfo->arg.pointer.isVolatile) {
- rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
- }
- if (argInfo->arg.pointer.isRestrict) {
- rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
- }
- if (argInfo->isConst) {
- rv |= CL_KERNEL_ARG_TYPE_CONST;
- }
- switch (argInfo->arg.pointer.memory) {
- case PTR_MT_CONSTANT:
- case PTR_MT_UAV_CONSTANT:
- case PTR_MT_CONSTANT_EMU:
- rv |= CL_KERNEL_ARG_TYPE_CONST;
- break;
- default:
- break;
- }
- }
- return rv;
-}
-
-static int
-GetOclSize(const aclArgData* argInfo)
-{
- switch (argInfo->type) {
- case ARG_TYPE_POINTER: return sizeof(void *);
- case ARG_TYPE_VALUE:
- switch (argInfo->arg.value.data) {
- case DATATYPE_i8:
- case DATATYPE_u8:
- case DATATYPE_struct:
- return 1 * argInfo->arg.value.numElements;
- case DATATYPE_u16:
- case DATATYPE_i16:
- case DATATYPE_f16:
- return 2 * argInfo->arg.value.numElements;
- case DATATYPE_u32:
- case DATATYPE_i32:
- case DATATYPE_f32:
- return 4 * argInfo->arg.value.numElements;
- case DATATYPE_i64:
- case DATATYPE_u64:
- case DATATYPE_f64:
- return 8 * argInfo->arg.value.numElements;
- case DATATYPE_ERROR:
- default: return -1;
- }
- case ARG_TYPE_IMAGE: return sizeof(cl_mem);
- case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
- default: return -1;
- }
-}
-
-KernelArg::KernelArg(aclArgData *argInfo) {
- argInfo_ = argInfo;
- name_ = argInfo_->argStr;
- typeName_ = argInfo->typeStr;
-}
-
-int KernelArg::size() {
- switch (argInfo_->type) {
- case ARG_TYPE_POINTER: {
- return sizeof(void *);
- }
- case ARG_TYPE_VALUE: {
- switch (argInfo_->arg.value.data) {
- case DATATYPE_ERROR: {
- return -1;
- }
- case DATATYPE_i8:
- case DATATYPE_u8:
- case DATATYPE_struct: {
- return 1 * argInfo_->arg.value.numElements;
- }
- case DATATYPE_u16:
- case DATATYPE_i16:
- case DATATYPE_f16: {
- return 2 * argInfo_->arg.value.numElements;
- }
- case DATATYPE_u32:
- case DATATYPE_i32:
- case DATATYPE_f32: {
- return 4 * argInfo_->arg.value.numElements;
- }
- case DATATYPE_i64:
- case DATATYPE_u64:
- case DATATYPE_f64: {
- return 8 * argInfo_->arg.value.numElements;
- }
- default:
- return -1;
- }
- }
- case ARG_TYPE_IMAGE: {
- return sizeof(cl_mem);
- }
- case ARG_TYPE_SAMPLER: {
- return sizeof(cl_sampler);
- }
- default:
- return -1;
- }
-}
-
-std::string& KernelArg::name() {
- return name_;
-}
-
-std::string& KernelArg::typeName()
-{
- return typeName_;
-}
-
-void
-Kernel::initArgList(const aclArgData* aclArg)
-{
- // Initialize the hsail argument list too
- initHsailArgs(aclArg);
-
- // Iterate through the arguments and insert into parameterList
- device::Kernel::parameters_t params;
- amd::KernelParameterDescriptor desc;
- size_t offset = 0;
-
- // Reserved arguments for HSAIL launch
- aclArg += ExtraArguments;
- for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
- desc.name_ = hsailArgList_[i]->name_.c_str();
- desc.type_ = GetOclType(aclArg);
- desc.addressQualifier_ = GetOclAddrQual(aclArg);
- desc.accessQualifier_ = GetOclAccessQual(aclArg);
- desc.typeQualifier_ = GetOclTypeQual(aclArg);
- desc.typeName_ = hsailArgList_[i]->typeName_.c_str();
-
- // Make a check if it is local or global
- if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
- desc.size_ = 0;
- }
- else {
- desc.size_ = GetOclSize(aclArg);
- }
-
- // Make offset alignment to match CPU metadata, since
- // in multidevice config abstraction layer has a single signature
- // and CPU sends the paramaters as they are allocated in memory
- size_t size = desc.size_;
- if (size == 0) {
- // Local memory for CPU
- size = sizeof(cl_mem);
- }
- offset = amd::alignUp(offset, std::min(size, size_t(16)));
- desc.offset_ = offset;
- offset += amd::alignUp(size, sizeof(uint32_t));
- params.push_back(desc);
- }
- createSignature(params);
-}
-
-void
-Kernel::initHsailArgs(const aclArgData* aclArg)
-{
- int offset = 0;
-
- // Reserved arguments for HSAIL launch
- aclArg += ExtraArguments;
-
- // Iterate through the each kernel argument
- for (; aclArg->struct_size != 0; aclArg++) {
- HsailKernelArg* arg = new HsailKernelArg;
- // Initialize HSAIL kernel argument
- arg->name_ = aclArg->argStr;
- arg->typeName_ = aclArg->typeStr;
- arg->size_ = GetHSAILArgSize(aclArg);
- arg->offset_ = offset;
- arg->type_ = GetHSAILArgType(aclArg);
- arg->addrQual_ = GetHSAILAddrQual(aclArg);
- arg->dataType_ = GetHSAILDataType(aclArg);
- // If vector of args we add additional arguments to flatten it out
- arg->numElem_ = ((aclArg->type == ARG_TYPE_VALUE) &&
- (aclArg->arg.value.data != DATATYPE_struct)) ?
- aclArg->arg.value.numElements : 1;
- arg->alignment_ = GetHSAILArgAlignment(aclArg);
- offset += GetHSAILArgSize(aclArg);
- hsailArgList_.push_back(arg);
- }
-}
-
-Kernel::Kernel(std::string name,
- FSAILProgram* prog,
- HsaBrig* brig,
- std::string compileOptions):
- device::Kernel(name),
- program_(prog),
- compileOptions_(compileOptions),
- brig_(brig),
- kernelCode_(NULL),
- debugInfo_(NULL){
-}
-
-bool Kernel::init(){
- acl_error errorCode;
- //compile kernel down to ISA
- const HsaDevice *hsaDevice = program_->hsaDevice();
- std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
- HsaStatus status = hsacoreapi->HsaFinalizeBrig(
- hsaDevice, brig_,
- openClKernelName.c_str(),
- compileOptions_.c_str(),
- &kernelCode_,
- &debugInfo_);
- if (status != kHsaStatusSuccess) {
- return false;
- }
- // Pull out metadata from the ELF
- size_t sizeOfArgList;
- aclCompiler* compileHandle = program_->dev().compiler();
- errorCode = g_complibApi._aclQueryInfo(compileHandle,
- program_->binaryElf(),
- RT_ARGUMENT_ARRAY,
- openClKernelName.c_str(),
- NULL,
- &sizeOfArgList);
- if (errorCode != ACL_SUCCESS) {
- return false;
- }
- char *argList = (char *)malloc(sizeOfArgList);
- errorCode = g_complibApi._aclQueryInfo(compileHandle,
- program_->binaryElf(),
- RT_ARGUMENT_ARRAY,
- openClKernelName.c_str(),
- argList,
- &sizeOfArgList);
- if (errorCode != ACL_SUCCESS) {
- return false;
- }
- //Set the argList
- initArgList((const aclArgData *) argList);
-
- //Pull out amdKernelInfo
- HsaKernelAmdInfo kernelAmdInfo;
- status = servicesapi->HsaGetKernelAmdInfo(kernelCode_, &kernelAmdInfo);
- if (status != kHsaStatusSuccess) {
- return false;
- }
- HsaDeviceAmdInfo devInfo;
- status = servicesapi->HsaGetDeviceAmdInfo(hsaDevice, &devInfo);
- if (status != kHsaStatusSuccess) {
- return false;
- }
- //Set the workgroup information for the kernel
- memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
- workGroupInfo_.availableLDSSize_ = hsaDevice->group_memory_size;
- workGroupInfo_.availableSGPRs_ = devInfo.max_number_of_sgprs;
- workGroupInfo_.availableVGPRs_ = devInfo.max_number_of_vgprs;
- size_t sizeOfWorkGroupSize;
- errorCode = g_complibApi._aclQueryInfo(compileHandle,
- program_->binaryElf(),
- RT_WORK_GROUP_SIZE,
- openClKernelName.c_str(),
- NULL,
- &sizeOfWorkGroupSize);
- if (errorCode != ACL_SUCCESS) {
- return false;
- }
- errorCode = g_complibApi._aclQueryInfo(compileHandle,
- program_->binaryElf(),
- RT_WORK_GROUP_SIZE,
- openClKernelName.c_str(),
- workGroupInfo_.compileSize_,
- &sizeOfWorkGroupSize);
- if (errorCode != ACL_SUCCESS) {
- return false;
- }
- //Setting it the same as used LDS
- workGroupInfo_.localMemSize_ = kernelCode_->workgroup_group_segment_byte_size;
- workGroupInfo_.privateMemSize_ = kernelCode_->workitem_private_segment_byte_size;
- workGroupInfo_.usedLDSSize_ = kernelCode_->workgroup_group_segment_byte_size;
- workGroupInfo_.preferredSizeMultiple_ = hsaDevice->wave_front_size;
- workGroupInfo_.usedSGPRs_ = kernelAmdInfo.wave_front_sgpr_count;
- workGroupInfo_.usedStackSize_ = 0;
- workGroupInfo_.usedVGPRs_ = kernelAmdInfo.work_item_vgpr_count;
- workGroupInfo_.wavefrontPerSIMD_ = hsaDevice->max_waves_per_simd;
- workGroupInfo_.wavefrontSize_ = hsaDevice->wave_front_size;
- //TODO: Need to populate it from the shader object
- workGroupInfo_.size_ = 256;
- return true;
-}
-
-Kernel::~Kernel() {
- while (!hsailArgList_.empty()) {
- HsailKernelArg* kernelArgPointer = hsailArgList_.back();
- delete kernelArgPointer;
- hsailArgList_.pop_back();
- }
- hsacoreapi->HsaFreeKernelCode(kernelCode_);
- hsacoreapi->HsaFreeKernelDebug(debugInfo_);
-}
-
-} // namespace oclhsa
-#endif // WITHOUT_FSA_BACKEND
diff --git a/rocclr/runtime/device/hsa/hsakernel.hpp b/rocclr/runtime/device/hsa/hsakernel.hpp
deleted file mode 100644
index e5d3af2477..0000000000
--- a/rocclr/runtime/device/hsa/hsakernel.hpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//
-// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
-//
-#ifndef HSAKERNEL_HPP_
-#define HSAKERNEL_HPP_
-
-#include "acl.h"
-#include "device/hsa/hsaprogram.hpp"
-#include "newcore.h"
-#include "top.hpp"
-
-#ifndef WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-
-#define MAX_INFO_STRING_LEN 0x40
-enum HSAIL_ADDRESS_QUALIFIER{
-HSAIL_ADDRESS_ERROR=0,
-HSAIL_ADDRESS_GLOBAL,
-HSAIL_ADDRESS_LOCAL,
-HSAIL_MAX_ADDRESS_QUALIFIERS
-} ;
-
-enum HSAIL_ARG_TYPE{
-HSAIL_ARGTYPE_ERROR=0,
-HSAIL_ARGTYPE_POINTER,
-HSAIL_ARGTYPE_VALUE,
-HSAIL_ARGTYPE_IMAGE,
-HSAIL_ARGTYPE_SAMPLER,
-HSAIL_ARGMAX_ARG_TYPES
-};
-
-enum HSAIL_DATA_TYPE{
-HSAIL_DATATYPE_ERROR=0,
-HSAIL_DATATYPE_B1,
-HSAIL_DATATYPE_B8,
-HSAIL_DATATYPE_B16,
-HSAIL_DATATYPE_B32,
-HSAIL_DATATYPE_B64,
-HSAIL_DATATYPE_S8,
-HSAIL_DATATYPE_S16,
-HSAIL_DATATYPE_S32,
-HSAIL_DATATYPE_S64,
-HSAIL_DATATYPE_U8,
-HSAIL_DATATYPE_U16,
-HSAIL_DATATYPE_U32,
-HSAIL_DATATYPE_U64,
-HSAIL_DATATYPE_F16,
-HSAIL_DATATYPE_F32,
-HSAIL_DATATYPE_F64,
-HSAIL_DATATYPE_STRUCT,
-HSAIL_DATATYPE_OPAQUE,
-HSAIL_DATATYPE_MAX_TYPES
-};
-
-struct HsailKernelArg
-{
- std::string name_; //!< Argument's name
- std::string typeName_; //!< Argument's type name
- uint size_; //!< Size in bytes
- uint offset_; //!< Argument's offset
- uint alignment_; //!< Argument's alignment
- HSAIL_ARG_TYPE type_; //!< Type of the argument
- HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
- HSAIL_DATA_TYPE dataType_; //!< The type of data
- uint numElem_; //!< Number of elements
-};
-
-class KernelArg
-{
-public:
- KernelArg(aclArgData* argInfo);
- //! Return type of the argument
- clk_value_type_t amdoclType();
- //! Global, local etc - returns amdocl types
- clk_address_space_t amdoclAddrQual();
- //! Global,localetc - returns opencl type
- cl_kernel_arg_address_qualifier oclAddrQual();
- //! read , write etc - returns amdocl type
- clk_arg_qualifier_t amdoclAccessQual();
- //! read , write etc - returns opencl type type
- cl_kernel_arg_access_qualifier oclAccessQual();
- //! const,volatile,restrict etc - returns opencl type type
- cl_kernel_arg_type_qualifier oclTypeQual();
-
- //! Name of the argument
- std::string& name();
- //! Name of the argument
- std::string& typeName();
- //! reflection
- std::string reflection(){ return name(); };
- //! Returns the size of the argument
- int size();
- //! returns the offset
- int offset();
-
- void setOffset();
-
-private:
- aclArgData* argInfo_;
- int offset_;
- std::string name_;
- std::string typeName_;
-};
-
-class Kernel : public device::Kernel
-{
-public:
- // Global offsets located in the first 3 elements
- static const uint ExtraArguments = 3;
-
- Kernel(std::string name,
- FSAILProgram* prog,
- HsaBrig* brig,
- std::string compileOptions);
-
- ~Kernel();
-
- //! Initializes the metadata required for this kernel
- bool init();
-
- const FSAILProgram* program() {
- return static_cast(program_);
- }
-
- //! Returns the AqlKernel associated with this Kernel
- const HsaKernelCode* kernelCode() { return
- static_cast(kernelCode_);
- }
-
- //! Returns the BRIG that was used to compile this kernel
- const HsaBrig* brig() {
- return static_cast(brig_);
- }
-
- //!returns a pointer to the hsail argument at the specified index
- HsailKernelArg* hsailArgAt(size_t index) {
- return hsailArgList_[index];
- }
-
-private:
- //! Populates hsailArgList_
- void initArgList(const aclArgData* aclArg);
-
- //! Initializes Hsail Argument metadata and info ;
- void initHsailArgs(const aclArgData* aclArg);
-
- FSAILProgram *program_; //!< The oclhsa::FSAILProgram context
- std::vector hsailArgList_; //!< Vector list of HSAIL Arguments
- std::string compileOptions_; //!< compile used for finalizing this kernel
- HsaBrig* brig_; //!< The brig used to generate ISA for this kernel
- HsaKernelCode* kernelCode_; //!< AQL kernel code for this kernel
- HsaKernelDebug* debugInfo_; //!< Dwarf info for this kernel
-};
-
-} // namespace oclhsa
-
-#endif // WITHOUT_FSA_BACKEND
-
-#endif // HSAKERNEL_HPP_
-
diff --git a/rocclr/runtime/device/hsa/hsamemory.cpp b/rocclr/runtime/device/hsa/hsamemory.cpp
deleted file mode 100644
index 08b0b6d5b0..0000000000
--- a/rocclr/runtime/device/hsa/hsamemory.cpp
+++ /dev/null
@@ -1,938 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef WITHOUT_FSA_BACKEND
-
-#include "CL/cl_ext.h"
-
-#include "device/device.hpp"
-#include "device/hsa/hsamemory.hpp"
-#include "device/hsa/hsadevice.hpp"
-#include "device/hsa/hsablit.hpp"
-#include "device/hsa/oclhsa_common.hpp"
-#include "thread/monitor.hpp"
-#include "platform/memory.hpp"
-#include "platform/sampler.hpp"
-
-namespace oclhsa {
-
-/////////////////////////////////oclhsa::Memory//////////////////////////////
-Memory::Memory(const oclhsa::Device &dev, amd::Memory &owner)
- : device::Memory(owner),
- dev_(dev),
- deviceMemory_(NULL),
- interopType_(InteropNone)
-{
-}
-
-Memory::~Memory()
-{}
-
-bool
-Memory::allocateMapMemory(size_t allocationSize)
-{
- assert(mapMemory_ == NULL);
-
- void *mapData = NULL;
-
- // Use/reuse system memory from HSA system memory pool as backing
- // storage of the map target.
- if (kHsaStatusSuccess !=
- servicesapi->HsaAllocateSystemMemory(
- owner()->getSize(), 0, kHsaSystemMemoryTypeDefault, &mapData)) {
- LogError("[OCL] Fail to allocate the backing storage for map target");
- return false;
- }
-
- // Create buffer object to contain the map target.
- amd::Memory *mapMemory =
- new(owner()->getContext()) amd::Buffer(
- owner()->getContext(), CL_MEM_USE_HOST_PTR, owner()->getSize());
-
- if ((mapMemory == NULL) || (!mapMemory->create(mapData))) {
- LogError("[OCL] Fail to allocate map target object");
- servicesapi->HsaFreeSystemMemory(mapData);
- if (mapMemory) {
- mapMemory->release();
- }
- return false;
- }
-
- mapMemory_ = mapMemory;
-
- return true;
-}
-
-void
-Memory::freeMapMemory()
-{
- // Return the memory to HSA system memory pool.
- assert(mapMemory_ != NULL);
- servicesapi->HsaFreeSystemMemory(mapMemory_->getHostMem());
-
- // Release the buffer object containing the map data.
- mapMemory_->release();
- mapMemory_ = NULL;
-}
-
-void *
-Memory::allocMapTarget(const amd::Coord3D &origin,
- const amd::Coord3D ®ion,
- uint mapFlags,
- size_t *rowPitch,
- size_t *slicePitch)
-{
- // Map/Unmap must be serialized.
- amd::ScopedLock lock(owner()->lockMemoryOps());
-
- incIndMapCount();
-
- // If the device backing storage is direct accessible, use it.
- if (isHostMemDirectAccess()) {
- return (static_cast(deviceMemory_) + origin[0]);
- }
-
- // Otherwise, check for host memory.
- void *hostMem = owner()->getHostMem();
- if (hostMem != NULL) {
- return (static_cast(hostMem) + origin[0]);
- }
-
- // Allocate one if needed.
- if (indirectMapCount_ == 1) {
- if (!allocateMapMemory(owner()->getSize())) {
- decIndMapCount();
- return NULL;
- }
- }
- else {
- // Did the map resource allocation fail?
- if (mapMemory_ == NULL) {
- LogError("Could not map target resource");
- return NULL;
- }
- }
-
- return (static_cast(mapMemory_->getHostMem()) + origin[0]);
-}
-
-void
-Memory::decIndMapCount()
-{
- // Map/Unmap must be serialized.
- amd::ScopedLock lock(owner()->lockMemoryOps());
-
- if (indirectMapCount_ == 0) {
- LogError("decIndMapCount() called when indirectMapCount_ already zero");
- return;
- }
-
- // Decrement the counter and release indirect map if it's the last op
- if (--indirectMapCount_ == 0 &&
- mapMemory_ != NULL) {
- freeMapMemory();
- }
-}
-
-void *
-Memory::cpuMap(
- device::VirtualDevice& vDev,
- uint flags,
- uint startLayer,
- uint numLayers,
- size_t* rowPitch,
- size_t* slicePitch
- )
-{
- // Create the map target.
- void * mapTarget =
- allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch);
-
- // Sync to map target if no direct access.
- if (!isHostMemDirectAccess()) {
- if (!vDev.blitMgr().readBuffer(
- *this, mapTarget, amd::Coord3D(0), amd::Coord3D(size()), true)) {
- decIndMapCount();
- return NULL;
- }
- }
-
- return mapTarget;
-}
-
-void
-Memory::cpuUnmap(device::VirtualDevice& vDev)
-{
- // Sync to device backing storage if no direct access.
- if (!isHostMemDirectAccess()) {
- if (!vDev.blitMgr().writeBuffer(
- mapMemory_->getHostMem(), *this, amd::Coord3D(0),
- amd::Coord3D(size()), true)) {
- LogError("[OCL] Fail sync the device memory on cpuUnmap");
- }
- }
-
- decIndMapCount();
-}
-
-void Memory::destroyInterop()
-{
- HsaStatus status;
-#ifdef _WIN32
- if (interopType_ == InteropD3D10) {
- HsaStatus status = hsacoreapi->HsaUnmapD3D10Resource(
- dev_.getBackendDevice(), d3d10Resource_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaUnmapD3D10Resource");
- return;
- }
- }
-
- else if (interopType_ == InteropD3D11) {
- HsaStatus status = hsacoreapi->HsaUnmapD3D11Resource(
- dev_.getBackendDevice(), d3d11Resource_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaUnmapD3D11Resource");
- return;
- }
- }
-#endif
-
- if (interopType_ == InteropGL) {
- void * glContext =owner()->getContext().info().hCtx_;
- status = hsacoreapi->HsaReleaseGLResources( dev_.getBackendDevice(),
- glContext,
- &glResource_,
- 1);
- if (kHsaStatusSuccess != status) {
- LogError("[OCL] Fail on HsaReleaseGLResources");
- }
-
- status = hsacoreapi->HsaUnmapGLResource(
- dev_.getBackendDevice(), glContext, &glResource_);
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaUnmapGLResource");
- return;
- }
- }
-}
-
-bool
-Memory::isHsaLocalMemory() const {
- if (owner()->isInterop()) {
- return true;
- }
- else {
- if (amd::Is64Bits()) {
- uint64_t addr = reinterpret_cast(deviceMemory_);
-
- // Fast check: in 64 bits, CPU can only access the high area
- // (VA[63:47] == 0x1FFFF) and low area (VA[63:47 == 0).
- // Reference: GFXIP7_ShaderIO_Delt.doc
- addr >>= 47; // discard least significant 47 bits
- return (addr != 0x1FFFF && addr != 0);
- }
- else {
- const HsaMemoryDescriptor &memDesc =
- dev_.getBackendDevice()->memory_descriptors[0];
-
- if (memDesc.heap_type == kHsaHeapTypeFrameBufferPrivate) {
- const uintptr_t addr =
- reinterpret_cast(deviceMemory_);
- const uintptr_t gpuvmBase = memDesc.virtual_base_address;
- const size_t size = memDesc.size_in_bytes;
- return (addr >= gpuvmBase && addr < (gpuvmBase + size));
- }
- }
- }
- return false;
-}
-
-/////////////////////////////////oclhsa::Buffer//////////////////////////////
-
-Buffer::Buffer(const oclhsa::Device &dev, amd::Memory &owner)
- : oclhsa::Memory(dev, owner)
-{}
-
-Buffer::~Buffer()
-{
- destroy();
-}
-
-void
-Buffer::destroy()
-{
- if (owner()->parent() != NULL) {
- return;
- }
-
- if (owner()->isInterop()) {
- destroyInterop();
- return;
- }
-
- if (isHostMemoryRegistered()) {
- hsacoreapi->HsaDeregisterSystemMemory(deviceMemory_);
- }
- else {
- if (!isHostMemDirectAccess()) {
- hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
- }
- else if (deviceMemory_ != owner()->getHostMem()) {
- // if they are identical, the host pointer will be
- // deallocated later on => avoid double deallocation
- hsacoreapi->HsaAmdFreeSystemMemory(deviceMemory_);
- }
- }
-}
-
-bool Buffer::createInterop()
-{
- amd::InteropObject *interopObject = owner()->getInteropObj();
-
-#ifdef _WIN32
- if (interopObject->asD3D10Object() != NULL) {
- amd::D3D10Object *d3d10Object = interopObject->asD3D10Object();
- // 1. Get the D3D11 resource
- ID3D10Resource *resource = d3d10Object->getD3D10Resource();
- ID3D10Buffer *d3d10Buffer = static_cast(resource);
-
- HsaStatus status = hsacoreapi->HsaMapD3D10Buffer(
- dev_.getBackendDevice(), d3d10Buffer, &deviceMemory_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaMapD3D10Buffer");
- return false;
- }
- interopType_ = InteropD3D10;
- d3d10Resource_ = d3d10Buffer;
- }
-
- if (interopObject->asD3D11Object() != NULL) {
- amd::D3D11Object *d3d11Object = interopObject->asD3D11Object();
- // 1. Get the D3D11 resource
- ID3D11Resource *resource = d3d11Object->getD3D11Resource();
- ID3D11Buffer *d3d11Buffer = static_cast(resource);
-
- HsaStatus status = hsacoreapi->HsaMapD3D11Buffer(
- dev_.getBackendDevice(), d3d11Buffer, &deviceMemory_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaMapD3D10Buffer");
- return false;
- }
- interopType_ = InteropD3D11;
- d3d11Resource_ = d3d11Buffer;
- }
-#endif
-
- if (interopObject->asBufferGL()) {
- amd::BufferGL *buffer_gl = interopObject->asBufferGL();
- HsaGLResource gl_resource = {0};
- gl_resource.name = buffer_gl->getGLName();
- gl_resource.type = buffer_gl->getGLInternalFormat();
-
- void * glContext =owner()->getContext().info().hCtx_;
- HsaStatus status = hsacoreapi->HsaMapGLBuffer(
- dev_.getBackendDevice(), glContext, &gl_resource, &deviceMemory_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaMapGLBuffer");
- return false;
- }
-
- status = hsacoreapi->HsaAcquireGLResources( dev_.getBackendDevice(),
- glContext,
- &gl_resource,
- 1);
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaAcquireGLResources");
- return false;
- }
- interopType_ = InteropGL;
- glResource_ = gl_resource;
- }
- return true;
-}
-
-bool
-Buffer::create()
-{
- if (owner()->parent()) {
- // Sub-Buffer creation.
- oclhsa::Memory *parentBuffer =
- static_cast(owner()->parent()->getDeviceMemory(dev_));
-
- if (parentBuffer == NULL) {
- LogError("[OCL] Fail to allocate parent buffer");
- return false;
- }
-
- const size_t offset = owner()->getOrigin();
- deviceMemory_ =
- static_cast(parentBuffer->getDeviceMemory()) + offset;
-
- void* parentHostPtr = parentBuffer->owner()->getHostMem();
- if (parentHostPtr) {
- owner()->setHostMem(static_cast(parentHostPtr) + offset);
- }
-
- flags_ |= owner()->parent()->getMemFlags();
- return true;
- }
-
- // Allocate backing storage in device local memory unless UHP or AHP are set
- const cl_mem_flags memFlags = owner()->getMemFlags();
- if (!(memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
- bool useDeviceMemory = dev_.settings().enableLocalMemory_;
- size_t alignment = static_cast(dev_.info().memBaseAddrAlign_);
- if (useDeviceMemory) {
- hsacoreapi->HsaAllocateDeviceMemory(
- size(), alignment, dev_.getBackendDevice(), &deviceMemory_);
- if (deviceMemory_ && (memFlags & CL_MEM_COPY_HOST_PTR)) {
- bool ret = dev_.xferMgr().writeBuffer(owner()->getHostMem(), *this,
- amd::Coord3D(0), amd::Coord3D(size()), true);
- if (!ret) {
- hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
- deviceMemory_ = NULL;
- }
- return ret;
- }
- // if device memory is depleted, do not fall back to system memory
- return deviceMemory_ != NULL;
- }
- else if (!(owner()->getHostMem())) {
- flags_ |= HostMemoryDirectAccess;
- deviceMemory_ = dev_.hostAlloc(size(), alignment);
- // no need to copy - otherwise, the host pointer will not be NULL
- return deviceMemory_ != NULL;
- }
- }
-
- flags_ |= HostMemoryDirectAccess;
- void* hostMem = owner()->getHostMem();
- assert(hostMem);
- // If there is a host ptr, then register it only if it was not allocated,
- // (=> allocated by us)
- if (!(owner()->getHostMemRef()->alloced())) {
- // Reuse existing host memory for the backing storage and register it.
- //
- // SVM precludes a possible 64-bits optimization in which host buffers
- // allocated by the user (UHP) in the default, coherent space could be
- // mapped into the non-coherent space by means of CreateFileMapping/mmap
- // without copying any data (the "device memory" would be the
- // non-coherent buffer).
- // The optimization cannot be applied because regular buffers allocated
- // using UHP are expected to have same characteristics as the original
- // buffer, i.e., if the original buffer supports atomics then the
- // corresponding OpenCL buffer will support atomics too.
- flags_ |= HostMemoryRegistered;
- if (hsacoreapi->HsaRegisterSystemMemory(hostMem, size()) != kHsaStatusSuccess) {
- LogError("[OCL] Failed to register system memory");
- return false;
- }
- }
- deviceMemory_ = hostMem;
- return true;
-}
-
-bool
-Buffer::recreate(size_t newSize, size_t newAlignment, bool forceSystem) {
- const size_t memFlag = static_cast(owner()->getMemFlags());
- if ((memFlag & CL_MEM_ALLOC_HOST_PTR) ||
- (memFlag & CL_MEM_USE_HOST_PTR) ||
- !dev_.settings().enableLocalMemory_) {
- forceSystem = true;
- }
-
- void *newDeviceMemory = NULL;
- uint hostDirectAccess = 0;
-
- if (forceSystem) {
- newDeviceMemory = dev_.hostAlloc(newSize, newAlignment);
- if (newDeviceMemory == NULL) {
- LogError("[OCL] Fail to reallocate system memory");
- return false;
- }
-
- // Copy the old data to the new memory location.
- if (!dev_.xferMgr().readBuffer(*this, newDeviceMemory,
- amd::Coord3D(0),
- amd::Coord3D(size()),
- true)) {
- LogError("[OCL] Fail to copy the current value");
- dev_.hostFree(newDeviceMemory);
- newDeviceMemory = NULL;
- return false;
- }
-
- hostDirectAccess = HostMemoryDirectAccess;
- }
- else {
- hsacoreapi->HsaAllocateDeviceMemory(
- newSize, newAlignment, dev_.getBackendDevice(), &newDeviceMemory);
-
- if (newDeviceMemory == NULL) {
- LogError("[OCL] Fail to reallocate device local memory");
- return false;
- }
-
- assert(
- amd::isMultipleOf(static_cast(newDeviceMemory),
- newAlignment));
-
- // Copy the old data to the new memory location.
- if (!dev_.xferMgr().readBuffer(
- *this, newDeviceMemory, amd::Coord3D(0), amd::Coord3D(size()),
- true)) {
- LogError("[OCL] Fail to copy the current value");
- hsacoreapi->HsaFreeDeviceMemory(newDeviceMemory);
- newDeviceMemory = NULL;
- return false;
- }
- }
-
- destroy();
-
- deviceMemory_ = newDeviceMemory;
-
- if ((memFlag & CL_MEM_ALLOC_HOST_PTR) &&
- (owner()->getContext().devices().size() == 1)) {
- owner()->setHostMem(deviceMemory_);
- }
-
- flags_ &= (~HostMemoryDirectAccess & ~HostMemoryRegistered);
- flags_ |= hostDirectAccess;
-
- return true;
-}
-
-/////////////////////////////////oclhsa::Image//////////////////////////////
-
-Image::Image(const oclhsa::Device& dev, amd::Memory& owner) :
- oclhsa::Memory(dev, owner)
-{
- flags_ &= (~HostMemoryDirectAccess & ~HostMemoryRegistered);
- populateImageDescriptor();
-}
-
-struct ImageFormatLayout {
- cl_image_format clFormat;
- HsaImageFormat hsaFormat;
-};
-
-static const ImageFormatLayout
- ImageFormatLayoutMap[] = {
- { { CL_R, CL_UNORM_INT8 }, HSA_IMAGE_FMT_R8_UNORM },
- { { CL_R, CL_UNORM_INT16}, HSA_IMAGE_FMT_R16_UNORM },
- { { CL_R, CL_SNORM_INT8 }, HSA_IMAGE_FMT_R8_SNORM },
- { { CL_R, CL_SNORM_INT16}, HSA_IMAGE_FMT_R16_SNORM },
- { { CL_R, CL_SIGNED_INT8}, HSA_IMAGE_FMT_R8_SINT },
- { { CL_R, CL_SIGNED_INT16}, HSA_IMAGE_FMT_R16_SINT},
- { { CL_R, CL_SIGNED_INT32}, HSA_IMAGE_FMT_R32_SINT},
- { { CL_R, CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8_UINT },
- { { CL_R, CL_UNSIGNED_INT16}, HSA_IMAGE_FMT_R16_UINT},
- { { CL_R, CL_UNSIGNED_INT32}, HSA_IMAGE_FMT_R32_UINT},
- { { CL_R, CL_HALF_FLOAT}, HSA_IMAGE_FMT_R_HALFFLOAT},
- { { CL_R, CL_FLOAT }, HSA_IMAGE_FMT_R_FLOAT},
- { { CL_A, CL_UNORM_INT8 }, HSA_IMAGE_FMT_A8_UNORM},
- { { CL_A, CL_UNORM_INT16 }, HSA_IMAGE_FMT_A16_UNORM},
- { { CL_A, CL_SNORM_INT8 }, HSA_IMAGE_FMT_A8_SNORM},
- { { CL_A, CL_SNORM_INT16 }, HSA_IMAGE_FMT_A16_SNORM},
- { { CL_A, CL_SIGNED_INT8 }, HSA_IMAGE_FMT_A8_SINT},
- { { CL_A, CL_SIGNED_INT16 },HSA_IMAGE_FMT_A16_SINT},
- { { CL_A, CL_SIGNED_INT32}, HSA_IMAGE_FMT_A32_SINT},
- { { CL_A, CL_UNSIGNED_INT8 },HSA_IMAGE_FMT_A8_UINT},
- { { CL_A, CL_UNSIGNED_INT16}, HSA_IMAGE_FMT_A16_UINT},
- { { CL_A, CL_UNSIGNED_INT32}, HSA_IMAGE_FMT_A32_UINT},
- { { CL_A, CL_HALF_FLOAT}, HSA_IMAGE_FMT_A_HALFFLOAT},
- { { CL_A, CL_FLOAT}, HSA_IMAGE_FMT_A_FLOAT},
- { { CL_RG,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8G8_UNORM},
- { { CL_RG,CL_UNORM_INT16},HSA_IMAGE_FMT_R16G16_UNORM},
- { { CL_RG,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8G8_SNORM},
- { { CL_RG,CL_SNORM_INT16},HSA_IMAGE_FMT_R16G16_SNORM},
- { { CL_RG,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8G8_SINT},
- { { CL_RG,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16G16_SINT},
- { { CL_RG,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32G32_SINT},
- { { CL_RG,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8G8_UINT},
- { { CL_RG,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16G16_UINT},
- { { CL_RG,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32G32_UINT},
- { { CL_RG,CL_HALF_FLOAT},HSA_IMAGE_FMT_RG_HALFFLOAT},
- { { CL_RG,CL_FLOAT},HSA_IMAGE_FMT_RG_FLOAT},
- { { CL_RA,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8A8_UNORM},
- { { CL_RA,CL_UNORM_INT16},HSA_IMAGE_FMT_R16A16_UNORM},
- { { CL_RA,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8A8_SNORM},
- { { CL_RA,CL_SNORM_INT16},HSA_IMAGE_FMT_R16A16_SNORM},
- { { CL_RA,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8A8_SINT},
- { { CL_RA,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16A16_SINT},
- { { CL_RA,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32A32_SINT},
- { { CL_RA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8A8_UINT},
- { { CL_RA,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16A16_UINT},
- { { CL_RA,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32A32_UINT},
- { { CL_RA,CL_HALF_FLOAT},HSA_IMAGE_FMT_RA_HALFFLOAT},
- { { CL_RA,CL_FLOAT},HSA_IMAGE_FMT_RA_FLOAT},
- { { CL_RGBA,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8G8B8A8_UNORM},
- { { CL_RGBA,CL_UNORM_INT16},HSA_IMAGE_FMT_R16G16B16A16_UNORM},
- { { CL_RGBA,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8G8B8A8_SNORM},
- { { CL_RGBA,CL_SNORM_INT16},HSA_IMAGE_FMT_R16G16B16A16_SNORM},
- { { CL_RGBA,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8G8B8A8_SINT},
- { { CL_RGBA,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16G16B16A16_SINT},
- { { CL_RGBA,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32G32B32A32_SINT},
- { { CL_RGBA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8G8B8A8_UINT},
- { { CL_RGBA,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16G16B16A16_UINT},
- { { CL_RGBA,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32G32B32A32_UINT},
- { { CL_RGBA,CL_HALF_FLOAT},HSA_IMAGE_FMT_RGBA_HALFFLOAT},
- { { CL_RGBA,CL_FLOAT},HSA_IMAGE_FMT_RGBA_FLOAT},
- { { CL_ARGB,CL_UNORM_INT8},HSA_IMAGE_FMT_A8R8G8B8_UNORM},
- { { CL_ARGB,CL_SNORM_INT8},HSA_IMAGE_FMT_A8R8G8B8_SNORM},
- { { CL_ARGB,CL_SIGNED_INT8},HSA_IMAGE_FMT_A8R8G8B8_SINT},
- { { CL_ARGB,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_A8R8G8B8_UINT},
- { { CL_BGRA,CL_UNORM_INT8},HSA_IMAGE_FMT_B8G8R8A8_UNORM},
- { { CL_BGRA,CL_SNORM_INT8},HSA_IMAGE_FMT_B8G8R8A8_SNORM},
- { { CL_BGRA,CL_SIGNED_INT8},HSA_IMAGE_FMT_B8G8R8A8_SINT},
- { { CL_BGRA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_B8G8R8A8_UINT},
- { {CL_LUMINANCE,CL_SNORM_INT8}, HSA_IMAGE_FMT_L8_SNORM},
- { {CL_LUMINANCE,CL_SNORM_INT16},HSA_IMAGE_FMT_L16_SNORM},
- { {CL_LUMINANCE,CL_UNORM_INT8},HSA_IMAGE_FMT_L8_UNORM},
- { {CL_LUMINANCE,CL_UNORM_INT16},HSA_IMAGE_FMT_L16_UNORM},
- { {CL_LUMINANCE,CL_HALF_FLOAT},HSA_IMAGE_FMT_L_HALFFLOAT},
- { {CL_LUMINANCE,CL_FLOAT},HSA_IMAGE_FMT_L_FLOAT},
- { {CL_INTENSITY,CL_SNORM_INT8}, HSA_IMAGE_FMT_I8_SNORM},
- { {CL_INTENSITY,CL_SNORM_INT16},HSA_IMAGE_FMT_I16_SNORM},
- { {CL_INTENSITY,CL_UNORM_INT8},HSA_IMAGE_FMT_I8_UNORM},
- { {CL_INTENSITY,CL_UNORM_INT16},HSA_IMAGE_FMT_I16_UNORM},
- { {CL_INTENSITY,CL_HALF_FLOAT},HSA_IMAGE_FMT_I_HALFFLOAT},
- { {CL_INTENSITY,CL_FLOAT},HSA_IMAGE_FMT_I_FLOAT},
- { {CL_RGB, CL_UNORM_SHORT_565},HSA_IMAGE_FMT_R5G6B5_UNORM},
- { {CL_RGB, CL_UNORM_SHORT_555},HSA_IMAGE_FMT_R5G5B5_UNORM},
- { {CL_RGB, CL_UNORM_INT_101010},HSA_IMAGE_FMT_R10G10B10_UNORM}
-};
-
-void
-Image::populateImageDescriptor()
-{
- amd::Image* image = owner()->asImage();
-
- // build HSA runtime image descriptor
- imageDescriptor_.width = image->getWidth();
- imageDescriptor_.height = image->getHeight();
- imageDescriptor_.depth = image->getDepth();
- imageDescriptor_.arraySize = 0;
-
- // Device specific image does not require rowpitch/slicepitch information.
- // Only image buffer is required to specify rowpitch size.
- imageDescriptor_.rowPitchInBytes = 0;
- imageDescriptor_.slicePitchInBytes = 0;
-
- switch (image->getType())
- {
- case CL_MEM_OBJECT_IMAGE1D:
- imageDescriptor_.geometry = HSA_GEOMETRY_1D;
- imageDescriptor_.height = 1;
- imageDescriptor_.depth = 1;
- break;
- case CL_MEM_OBJECT_IMAGE1D_BUFFER:
- imageDescriptor_.geometry = HSA_GEOMETRY_1DBuffer;
- imageDescriptor_.height = 1;
- imageDescriptor_.depth = 1;
- break;
- case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- //@todo - arraySize = height ?!
- imageDescriptor_.geometry = HSA_GEOMETRY_1DArray;
- imageDescriptor_. height = 1;
- imageDescriptor_.arraySize = image->getHeight();
- break;
- case CL_MEM_OBJECT_IMAGE2D:
- imageDescriptor_.geometry = HSA_GEOMETRY_2D;
- imageDescriptor_.depth = 1;
- break;
- case CL_MEM_OBJECT_IMAGE2D_ARRAY:
- //@todo - arraySize = depth ?!
- imageDescriptor_.geometry = HSA_GEOMETRY_2DArray;
- imageDescriptor_.depth = 1;
- imageDescriptor_.arraySize = image->getDepth();
- break;
- case CL_MEM_OBJECT_IMAGE3D:
- imageDescriptor_.geometry = HSA_GEOMETRY_3D;
- break;
- }
-
- for (uint i = 0; i < sizeof(ImageFormatLayoutMap) / sizeof(ImageFormatLayout); ++i) {
- if ((image->getImageFormat().image_channel_data_type ==
- ImageFormatLayoutMap[i].clFormat.image_channel_data_type) &&
- (image->getImageFormat().image_channel_order ==
- ImageFormatLayoutMap[i].clFormat.image_channel_order)) {
- imageDescriptor_.format = ImageFormatLayoutMap[i].hsaFormat;
- }
- }
-}
-
-bool Image::createInterop() {
- amd::ScopedLock lock(owner()->lockMemoryOps());
- amd::InteropObject *interopObject = owner()->getInteropObj();
- void *hsaImageObjectInterop = NULL;
- size_t hsaImageObjectInteropSize = 0;
-#ifdef _WIN32
- if (interopObject->asD3D10Object()) {
- amd::D3D10Object *d3d10Object = interopObject->asD3D10Object();
- // 1. Get the D3D11 resource
- ID3D10Resource *resource = d3d10Object->getD3D10Resource();
- HsaStatus status = hsacoreapi->HsaMapD3D10Texture(
- dev_.getBackendDevice(), resource, &hsaImageObjectInterop,
- &hsaImageObjectInteropSize, kHsaMapFlagsReadWrite);
- if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0 ) {
- LogError("[OCL] Fail on HsaMapD3D10Texture");
- return false;
- }
- interopType_ = InteropD3D10;
- d3d10Resource_ = resource;
- }
-
- if (interopObject->asD3D11Object()) {
- amd::D3D11Object *d3d11Object = interopObject->asD3D11Object();
-
- // 1. Get the D3D11 resource
- ID3D11Resource *resource = d3d11Object->getD3D11Resource();
- HsaStatus status = hsacoreapi->HsaMapD3D11Texture(
- dev_.getBackendDevice(), resource, &hsaImageObjectInterop,
- &hsaImageObjectInteropSize, kHsaMapFlagsReadWrite,
- d3d11Object->getPlane());
- if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0 ) {
- LogError("[OCL] Fail on HsaMapD3D11Texture");
- return false;
- }
- interopType_ = InteropD3D11;
- d3d11Resource_ = resource;
- }
-#endif
-
- if (interopObject->asGLObject()) {
- amd::GLObject* gl_object = interopObject->asGLObject();
- HsaGLResource gl_resource = {0};
- gl_resource.name = gl_object->getGLName();
- if (gl_object->getGLTarget() != GL_TEXTURE_CUBE_MAP) {
- gl_resource.type = gl_object->getGLTarget();
- }
- else {
- gl_resource.type = gl_object->getCubemapFace();
- }
- gl_resource.mipmap_level = gl_object->getGLMipLevel();
-
- void * glContext =owner()->getContext().info().hCtx_;
-
- // Get the texture SRD.
- HsaStatus status = hsacoreapi->HsaMapGLTexture(
- dev_.getBackendDevice(), glContext, &gl_resource,
- &hsaImageObjectInterop, &hsaImageObjectInteropSize);
- if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0) {
- LogError("[OCL] Fail on HsaMapGLTexture");
- return false;
- }
-
- status = hsacoreapi->HsaAcquireGLResources( dev_.getBackendDevice(),
- glContext,
- &gl_resource,
- 1);
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaAcquireGLResources");
- return false;
- }
-
- // Get the flat address for texture buffer.
- if (owner()->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- // Map the texture buffer resource as buffer.
- HsaStatus status = hsacoreapi->HsaMapGLBuffer(
- dev_.getBackendDevice(), glContext, &gl_resource,
- &deviceMemory_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail on HsaMapGLBuffer");
- return false;
- }
- // Sanity check.
- assert((deviceMemory_ != NULL) &&
- "deviceMemory_ should not be \
- NULL upon successful return from HsaMapGLBuffer");
- }
-
- interopType_ = InteropGL;
- glResource_ = gl_resource;
- }
-
- // Populate HSA specific information to the interop image object.
- HsaStatus status = hsacoreapi->HsaAmdCreateDeviceImageView(
- &imageDescriptor_, hsaImageObjectInterop, hsaImageObject_);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail to tranform interop image SRD");
- return false;
- }
- return true;
-}
-
-bool Image::create()
-{
- if (owner()->parent()) {
- // Image view creation
- oclhsa::Image *parentImage =
- static_cast(owner()->parent()->getDeviceMemory(dev_));
-
- if (parentImage == NULL) {
- LogError("[OCL] Fail to allocate parent image");
- return false;
- }
-
- return createView(*parentImage);
- }
-
- amd::ScopedLock lock(owner()->lockMemoryOps());
-
- // Get memory size requirement for device specific image.
- HsaStatus status = hsacoreapi->HsaGetDeviceImageInfo(
- dev_.getBackendDevice(), &imageDescriptor_,
- &deviceImageInfo_);
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail to allocate image memory");
- return false;
- }
-
- if (dev_.settings().enableLocalMemory_) {
- status = hsacoreapi->HsaAllocateDeviceMemory(
- deviceImageInfo_.imageSizeInBytes,
- deviceImageInfo_.imageAlignmentInBytes,
- dev_.getBackendDevice(),
- &deviceMemory_);
- } else {
- status = servicesapi->HsaAllocateSystemMemory(
- deviceImageInfo_.imageSizeInBytes,
- deviceImageInfo_.imageAlignmentInBytes,
- kHsaSystemMemoryTypeDefault,
- &deviceMemory_);
- }
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail to allocate image memory");
- return false;
- }
-
- assert(amd::isMultipleOf(
- deviceMemory_, deviceImageInfo_.imageAlignmentInBytes));
-
- status = hsacoreapi->HsaCreateDeviceImage(
- dev_.getBackendDevice(), &imageDescriptor_,
- deviceMemory_, &hsaImageObject_[0]);
-
- return true;
-}
-
-bool
-Image::createView(Image &parent)
-{
- amd::ScopedLock lock(owner()->lockMemoryOps());
-
- if (parent.owner()->asBuffer()) {
- // Get new texture SRD since parent is a buffer.
- deviceMemory_ = parent.getDeviceMemory();
-
- // Force device specific image implementation to use rowpitch size.
- amd::Image* image = owner()->asImage();
- imageDescriptor_.rowPitchInBytes = image->getRowPitch();
-
- HsaStatus status = hsacoreapi->HsaCreateDeviceImage(
- dev_.getBackendDevice(), &imageDescriptor_,
- deviceMemory_, &hsaImageObject_[0]);
-
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail to create HSA image object");
- return false;
- }
- } else {
- // Get the view of the existing parent's SRD based on the child's image
- // descriptor.
- HsaStatus status = hsacoreapi->HsaAmdCreateDeviceImageView(
- &imageDescriptor_, parent.getHsaImageObjectAddress(),
- &hsaImageObject_[0]);
- if (status != kHsaStatusSuccess) {
- LogError("[OCL] Fail to get view of parent image");
- return false;
- }
- }
-
- return true;
-}
-
-void* Image::allocMapTarget(const amd::Coord3D& origin,
- const amd::Coord3D& region,
- uint mapFlags,
- size_t* rowPitch,
- size_t* slicePitch)
-{
- amd::ScopedLock lock(owner()->lockMemoryOps());
-
- incIndMapCount();
-
- void* pHostMem = owner()->getHostMem();
-
- if (pHostMem == NULL) {
- if (indirectMapCount_ == 1) {
- if (!allocateMapMemory(owner()->getSize())) {
- decIndMapCount();
- return NULL;
- }
- }
- else {
- // Did the map resource allocation fail?
- if (mapMemory_ == NULL) {
- LogError("Could not map target resource");
- return NULL;
- }
- }
-
- pHostMem = mapMemory_->getHostMem();
- }
-
- amd::Image* image = owner()->asImage();
-
- size_t elementSize = image->getImageFormat().getElementSize();
-
- size_t offset = origin[0] * elementSize;
-
- // Adjust offset with Y dimension
- offset += image->getRowPitch() * origin[1];
-
- // Adjust offset with Z dimension
- offset += image->getSlicePitch() * origin[2];
-
- *rowPitch = image->getRowPitch();
- if (slicePitch != NULL)
- *slicePitch = image->getSlicePitch();
-
- return (static_cast(pHostMem) + offset);
-}
-
-Image::~Image()
-{
- destroy();
-}
-
-void
-Image::destroy()
-{
- if (owner()->parent() != NULL) {
- return;
- }
-
- if (owner()->isInterop()) {
- destroyInterop();
- return;
- }
-
- if (dev_.settings().enableLocalMemory_) {
- hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
- }
- else {
- servicesapi->HsaFreeSystemMemory(deviceMemory_);
- }
-}
-}
-#endif // WITHOUT_FSA_BACKEND
diff --git a/rocclr/runtime/device/hsa/hsamemory.hpp b/rocclr/runtime/device/hsa/hsamemory.hpp
deleted file mode 100644
index 3ebdb3e7cc..0000000000
--- a/rocclr/runtime/device/hsa/hsamemory.hpp
+++ /dev/null
@@ -1,202 +0,0 @@
-#ifndef HSAMEMORY_HPP_
-#define HSAMEMORY_HPP_
-
-#include "top.hpp"
-#include "platform/memory.hpp"
-#include "utils/debug.hpp"
-#include "hsadevice.hpp"
-#include "services.h"
-#ifdef _WIN32
-#include "amdocl/cl_d3d11_amd.hpp"
-#endif
-#include "amdocl/cl_gl_amd.hpp"
-#include "hsainterop.h"
-
-namespace oclhsa {
-
-enum InteropType {
- InteropNone = 0,
- InteropD3D9 = 1,
- InteropD3D10 = 2,
- InteropD3D11 = 3,
- InteropGL = 4
-};
-
-class Memory : public device::Memory {
- public:
- Memory(const oclhsa::Device &dev, amd::Memory &owner);
-
- virtual ~Memory();
-
- // Getter for deviceMemory_.
- void *getDeviceMemory() const { return deviceMemory_; }
-
- // Gets a pointer to a region of host-visible memory for use as the target
- // of an indirect map for a given memory object
- virtual void *allocMapTarget(const amd::Coord3D &origin,
- const amd::Coord3D ®ion,
- uint mapFlags,
- size_t *rowPitch,
- size_t *slicePitch);
-
- // Create device memory according to OpenCL memory flag.
- virtual bool create() = 0;
- virtual bool createInterop() = 0;
-
- // Pins system memory associated with this memory object.
- virtual bool pinSystemMemory(void *hostPtr, // System memory address
- size_t size // Size of allocated system memory
- ) {
- Unimplemented();
- return true;
- }
-
- // Immediate blocking write from device cache to owners's backing store.
- // Marks owner as "current" by resetting the last writer to NULL.
- virtual void syncHostFromCache(SyncFlags syncFlags = SyncFlags())
- {
- // Need to revisit this when multi-devices is supported.
- }
-
- bool processGLResource (GLResourceOP operation) { return true;}
-
- // Releases indirect map surface
- void releaseIndirectMap() { decIndMapCount(); }
-
- //! Map the device memory to CPU visible
- virtual void* cpuMap(
- device::VirtualDevice& vDev, //!< Virtual device for map operaiton
- uint flags = 0, //!< flags for the map operation
- // Optimization for multilayer map/unmap
- uint startLayer = 0, //!< Start layer for multilayer map
- uint numLayers = 0, //!< End layer for multilayer map
- size_t* rowPitch = NULL,//!< Row pitch for the device memory
- size_t* slicePitch = NULL //!< Slice pitch for the device memory
- );
-
- //! Unmap the device memory
- virtual void cpuUnmap(
- device::VirtualDevice& vDev //!< Virtual device for unmap operaiton
- );
-
- bool isHsaLocalMemory() const;
-
- // Accessors for indirect map memory object
- amd::Memory *mapMemory() const { return mapMemory_; }
-
- protected:
- bool allocateMapMemory(size_t allocationSize);
-
- void freeMapMemory();
-
- // Decrement map count
- virtual void decIndMapCount();
-
- // Free / deregister device memory.
- virtual void destroy() = 0;
-
- //This function is called in the destructor ~Buffer() and ~Image(),
- //since InteropObject belonging to owner() is destroyed before
- //the destructor is called, we use the cached values of
- //interopType and Resource in this function.
- virtual void destroyInterop();
-
- // Pointer to the device associated with this memory object.
- const oclhsa::Device &dev_;
-
- // Pointer to the device memory. This could be in system or device local mem.
- void* deviceMemory_;
-
- InteropType interopType_;
-#ifdef _WIN32
- ID3D10Resource* d3d10Resource_;
- ID3D11Resource* d3d11Resource_;
-#endif
- HsaGLResource glResource_;
-
- private:
- // Disable copy constructor
- Memory(const Memory &);
-
- // Disable operator=
- Memory &operator=(const Memory &);
-};
-
-
-
-class Buffer : public oclhsa::Memory {
- public:
- Buffer(const oclhsa::Device &dev, amd::Memory &owner);
-
- virtual ~Buffer();
-
- // Create device memory according to OpenCL memory flag.
- virtual bool create();
-
- // Recreate the device memory using new size and alignment.
- bool recreate(size_t newSize, size_t newAlignment, bool forceSystem);
-
- //! Create a interop memory
- bool createInterop();
-
- private:
- // Disable copy constructor
- Buffer(const Buffer &);
-
- // Disable operator=
- Buffer &operator=(const Buffer &);
-
- // Free / deregister device memory.
- void destroy();
-};
-
-class Image : public oclhsa::Memory
-{
-public:
- Image(const oclhsa::Device& dev, amd::Memory& owner);
-
- virtual ~Image();
-
- //! Create device memory according to OpenCL memory flag.
- virtual bool create();
-
- //! Create an image view
- bool createView(Image &image);
-
- virtual bool createInterop();
-
- //! Gets a pointer to a region of host-visible memory for use as the target
- //! of an indirect map for a given memory object
- virtual void* allocMapTarget(const amd::Coord3D& origin,
- const amd::Coord3D& region,
- uint mapFlags,
- size_t* rowPitch,
- size_t* slicePitch);
-
- size_t getDeviceRowPitchSize() { return deviceImageInfo_.rowPitchInBytes; }
- size_t getDeviceSlicePitchSize() { return deviceImageInfo_.slicePitchInBytes; }
- size_t getDeviceDataSize() { return deviceImageInfo_.imageSizeInBytes; }
- size_t getDeviceDataAlignment() { return deviceImageInfo_.imageAlignmentInBytes; }
-
- void* getHsaImageObjectAddress() { return &hsaImageObject_[0];}
- size_t getHsaImageObjectSizeInBytes() {return sizeof(hsaImageObject_); }
-
-private:
- //! Disable copy constructor
- Image(const Buffer&);
-
- //! Disable operator=
- Image& operator=(const Buffer&);
-
- // Free / deregister device memory.
- void destroy();
-
- void populateImageDescriptor();
-
- HsaImageDescriptor imageDescriptor_;
- HsaDeviceImageInfo deviceImageInfo_;
- uint8_t hsaImageObject_[HSA_IMAGE_OBJECT_SIZE];
-};
-
-}
-#endif
diff --git a/rocclr/runtime/device/hsa/hsaprogram.cpp b/rocclr/runtime/device/hsa/hsaprogram.cpp
deleted file mode 100644
index b2e93aaaa3..0000000000
--- a/rocclr/runtime/device/hsa/hsaprogram.cpp
+++ /dev/null
@@ -1,726 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-
-#ifndef WITHOUT_FSA_BACKEND
-
-#include "device/hsa/hsaprogram.hpp"
-
-#include "compiler/lib/loaders/elf/elf.hpp"
-#include "compiler/lib/utils/options.hpp"
-#include "runtime/device/hsa/hsakernel.hpp"
-#include "runtime/device/hsa/hsacompilerlib.hpp"
-#include "runtime/device/hsa/oclhsa_common.hpp"
-#include "utils/bif_section_labels.hpp"
-#include "utils/libUtils.h"
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-
-#endif // WITHOUT_FSA_BACKEND
-
-namespace oclhsa {
-#ifndef WITHOUT_FSA_BACKEND
- /* Temporary log function for the compiler library */
- static void logFunction(const char *msg, size_t size) {
- std::cout << "Compiler Library log :" << msg << std::endl;
- }
-
- FSAILProgram::~FSAILProgram() {
- unloadBrig();
- acl_error error;
- // Free the elf binary
- if (binaryElf_ != NULL) {
- error = g_complibApi._aclBinaryFini(binaryElf_);
- if (error != ACL_SUCCESS) {
- LogWarning( "Error while destroying the acl binary \n" );
- }
- }
- }
-
- FSAILProgram::FSAILProgram(oclhsa::NullDevice& device): device::Program(device),
- llvmBinary_(),
- binaryElf_(NULL),
- device_(device),
- isBrigLoaded_(false)
- {
- memset(&binOpts_, 0, sizeof(binOpts_));
- binOpts_.struct_size = sizeof(binOpts_);
- //binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
- //Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
- //when aclGetTargetInfo is called - EPR# 377910
- binOpts_.elfclass = ELFCLASS32;
- binOpts_.bitness = ELFDATA2LSB;
- binOpts_.alloc = &::malloc;
- binOpts_.dealloc = &::free;
- }
-
- bool FSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the
- // original
- // binary that
- // isn't owned
- // by ClBinary
- clBinary()->saveOrigBinary(binaryIn, size);
-
- char *bin = binaryIn;
- size_t sz = size;
-
- int encryptCode;
-
- char *decryptedBin;
- size_t decryptedSize;
- if (!clBinary()->decryptElf(binaryIn, size,
- &decryptedBin, &decryptedSize, &encryptCode)) {
- return false;
- }
- if (decryptedBin != NULL) {
- // It is decrypted binary.
- bin = decryptedBin;
- sz = decryptedSize;
- }
-
- // Both 32-bit and 64-bit are allowed!
- if (!amd::isElfMagic(bin)) {
- // Invalid binary.
- if (decryptedBin != NULL) {
- delete[]decryptedBin;
- }
- return false;
- }
-
- clBinary()->setFlags(encryptCode);
-
- return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
- }
-
- bool FSAILProgram::initBuild(amd::option::Options *options) {
- if (!device::Program::initBuild(options)) {
- return false;
- }
-
- // Need to get device information from CAL !?!?
- // Needs the device pointer from CAL to send to options class
- //
- // Shreyas: Commenting this might cause a bug - keeping this fro now
- // options->setPerBuildInfo("hsa",
- // binary_.getEncryptCode()
- // );
-
- // Elf Binary setup
- std::string outFileName;
-
- // true means fsail required
- clBinary()->init(options, true);
- if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
- outFileName = options->getDumpFileName(".bin");
- }
-
- bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
- if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
- (outFileName.size() >
- 0) ? outFileName.c_str() : NULL)) {
- LogError("Setup elf out for gpu failed");
- return false;
- }
- return true;
- }
-
- // ! post-compile setup for GPU
- bool FSAILProgram::finiBuild(bool isBuildGood) {
- clBinary()->resetElfOut();
- clBinary()->resetElfIn();
-
- if (!isBuildGood) {
- // Prevent the encrypted binary form leaking out
- clBinary()->setBinary(NULL, 0);
-
- }
-
- return device::Program::finiBuild(isBuildGood);
- }
-
- static char *readFile(std::string source_filename, size_t &size) {
- FILE *fp = ::fopen(source_filename.c_str(), "rb");
- unsigned int length;
- size_t offset = 0;
- char *ptr;
-
- if (!fp) {
- return NULL;
- }
-
- // obtain file size.
- ::fseek(fp, 0, SEEK_END);
- length = ::ftell(fp);
- ::rewind(fp);
-
- ptr = reinterpret_cast(malloc(offset + length + 1));
- if (length != fread(&ptr[offset], 1, length, fp)) {
- free(ptr);
- return NULL;
- }
-
- ptr[offset + length] = '\0';
- size = offset + length;
- ::fclose(fp);
- return ptr;
- }
-
- aclType FSAILProgram::getNextCompilationStageFromBinary() {
- acl_error errorCode;
- size_t secSize = 0;
- aclType from = ACL_TYPE_DEFAULT;
- // Checking llvmir in .llvmir section
- bool isLlvmirText = true;
- const void *llvmirText = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclLLVMIR,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isLlvmirText = false;
- }
- // Checking compile & link options in .comment section
- bool isOpts = true;
- const void *opts = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclCOMMENT,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isOpts = false;
- }
- if (isLlvmirText) {
- from = ACL_TYPE_LLVMIR_BINARY;
- } else {
- if (!isLlvmirText) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing LLVMIR section)\n" ;
- }
- if (!isOpts) {
- buildLog_ +="Warning while linking : \
- Invalid binary (Missing COMMENT section)\n" ;
- }
- return ACL_TYPE_DEFAULT;
- }
- bool isHsailText = true;
- // Checking HSAIL in .cg section
- const void *hsailText = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclCODEGEN,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isHsailText = false;
- }
- // Checking BRIG STRTAB in .brig_strtab section
- bool isBrigStrtab = true;
- const void *brigStrtab = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclBRIGstrs,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isBrigStrtab = false;
- }
- // Checking BRIG CODE in .brig_code section
- bool isBrigCode = true;
- const void *brigCode = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclBRIGcode,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isBrigCode = false;
- }
- // Checking BRIG OPERANDS in .brig_operands section
- bool isBrigOps = true;
- const void *brigOps = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclBRIGoprs,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isBrigOps = false;
- }
- if (isHsailText && isBrigStrtab && isBrigCode && isBrigOps) {
- from = ACL_TYPE_HSAIL_BINARY;
- } else if (!isHsailText && !isBrigStrtab && !isBrigCode && !isBrigOps) {
- from = ACL_TYPE_LLVMIR_BINARY;
- } else {
- if (!isHsailText) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing CG section)\n" ;
- }
- if (!isBrigStrtab) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing BRIG_STRTAB section)\n" ;
- }
- if (!isBrigCode) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing BRIG_CODE section)\n" ;
- }
- if (!isBrigOps) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing BRIG_OPERANDS section)\n" ;
- }
- return ACL_TYPE_DEFAULT;
- }
- // Checking ISA in .text section
- bool isShaderIsa = true;
- const void *shaderIsa = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &secSize,
- aclTEXT,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- isShaderIsa = false;
- }
- if (isShaderIsa && from == ACL_TYPE_LLVMIR_BINARY) {
- from = ACL_TYPE_DEFAULT;
- }
- return from;
- }
- bool FSAILProgram::updateAclBinaryWithKernelIsaAndDebug(std::string kernelName){
- assert(brig_.loadmap_section != NULL);
- aclBinary * internalAclBinary = reinterpret_cast(brig_.loadmap_section);
-
- std::string openClKernelName("&__OpenCL_" + kernelName + "_kernel");
- const oclBIFSymbolStruct* isaSymbolStruct = findBIF30SymStruct(symISABinary);
- assert(isaSymbolStruct && "symbol not found");
- std::string kernelIsaSymbol = isaSymbolStruct->str[bif::PRE] +
- openClKernelName + isaSymbolStruct->str[bif::POST];
-
- const oclBIFSymbolStruct* debugSymbolStruct = findBIF30SymStruct(symDebugInfo);
- assert(debugSymbolStruct && "symbol not found");
- //For debug symbols, the PRE is used for BRIG debug and the POST is used for
- //ISA debug
- std::string kernelIsaDebugSymbol = debugSymbolStruct->str[bif::POST] + openClKernelName;
-
- //Extract the ISA section
- size_t symbolSize;
- acl_error errorCode;
- const void* isaSymbol = g_complibApi._aclExtractSymbol(device().compiler(),
- internalAclBinary,
- &symbolSize,
- aclTEXT,
- kernelIsaSymbol.c_str(),
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Failed to extract ISA for kernel";
- return false;
- }
- //Insert the ISA section
- errorCode = g_complibApi._aclInsertSymbol(device().compiler(),
- binaryElf_,
- isaSymbol,
- symbolSize,
- aclTEXT,
- kernelIsaSymbol.c_str());
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Failed to insert ISA for kernel";
- return false;
- }
- const void* debugSymbol = g_complibApi._aclExtractSymbol(device().compiler(),
- internalAclBinary,
- &symbolSize,
- aclHSADEBUG,
- kernelIsaDebugSymbol.c_str(),
- &errorCode);
- //If debug information is available
- if (errorCode == ACL_SUCCESS) {
- //Update binary with the debug section for the kernel
- errorCode = g_complibApi._aclInsertSymbol(device().compiler(),
- binaryElf_,
- debugSymbol,
- symbolSize,
- aclHSADEBUG,
- kernelIsaDebugSymbol.c_str());
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Failed to insert debug information for kernel";
- return false;
- }
- }
- return true;
- }
- bool FSAILProgram::ExtractSymbolAndCopy(aclSections id,
- const char *symbol_name,
- void** address_to_copy,
- size_t* symbol_size_bytes,
- bool verify) {
- acl_error error_code;
- *symbol_size_bytes = 0;
- const void* symbol_data = g_complibApi._aclExtractSymbol(
- device().compiler(),
- binaryElf_,
- symbol_size_bytes,
- id,
- symbol_name,
- &error_code);
- //If the section is not mandatory and the section does not exist
- //skip this section
- if (error_code != ACL_SUCCESS) {
- if (!verify) {
- return true;
- }
- std::string error = "Could not find Brig Directive in BIFF: ";
- error += symbol_name;
- LogError(error.c_str());
- buildLog_ += error;
- return false;
- }
- *address_to_copy = malloc(*symbol_size_bytes);
- if (*address_to_copy == NULL) {
- LogError(" Failed to allocate memory");
- return false;
- }
- memcpy(*address_to_copy, symbol_data, *symbol_size_bytes);
-
- return true;
- }
-
- bool FSAILProgram::saveBinaryAndSetType(type_t type) {
- //Write binary to memory
- void *rawBinary = NULL;
- size_t size;
- if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
- != ACL_SUCCESS) {
- buildLog_ += "Failed to write binary to memory \n";
- return false;
- }
- clBinary()->saveBIFBinary((char*)rawBinary, size);
- //Set the type of binary
- setType(type);
- //Free memory containing rawBinary
- binaryElf_->binOpts.dealloc(rawBinary);
- return true;
- }
-
- bool FSAILProgram::linkImpl(const std::vector &inputPrograms,
- amd::option::Options *options,
- bool createLibrary) {
- std::vector::const_iterator it
- = inputPrograms.begin();
- std::vector::const_iterator itEnd
- = inputPrograms.end();
- acl_error errorCode;
-
- // For each program we need to extract the LLVMIR and create
- // aclBinary for each
- std::vector binaries_to_link;
-
- for (size_t i = 0; it != itEnd; ++it, ++i) {
- FSAILProgram *program = (FSAILProgram *)*it;
- // Check if the program was created with clCreateProgramWIthBinary
- binary_t binary = program->binary();
- if ((binary.first != NULL) && (binary.second > 0)) {
- // Binary already exists -- we can also check if there is no
- // opencl source code
- // Need to check if LLVMIR exists in the binary
- // If LLVMIR does not exist then is it valid
- // We need to pull out all the compiled kernels
- // We cannot do this at present because we need at least
- // Hsail text to pull the kernels oout
- void *mem = const_cast(binary.first);
- binaryElf_ = g_complibApi._aclReadFromMem(mem,
- binary.second,
- &errorCode);
-
- if (errorCode != ACL_SUCCESS) {
- LogWarning("Error while linking : Could not read from raw binary");
- return false;
- }
- }
- // At this stage each FSAILProgram contains a valid binary_elf
- // Check if LLVMIR is in the binary
- // @TODO - Memory leak , cannot free this buffer
- // need to fix this.. File EPR on compiler library
- size_t llvmirSize = 0;
- const void *llvmirText = g_complibApi._aclExtractSection(device().compiler(),
- binaryElf_,
- &llvmirSize,
- aclLLVMIR,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- buildLog_ +="Error while linking : \
- Invalid binary (Missing LLVMIR section)" ;
- return false;
- }
- // Create a new aclBinary for each LLVMIR and save it in a list
- aclBIFVersion ver = g_complibApi._aclBinaryVersion(binaryElf_);
- aclBinary *bin = g_complibApi._aclCreateFromBinary(binaryElf_, ver);
- binaries_to_link.push_back(bin);
- }
-
- // At this stage each FSAILProgram in the list has an aclBinary initialized
- // and contains LLVMIR
- // We can now go ahead and link them.
- if (binaries_to_link.size() > 1) {
- errorCode = g_complibApi._aclLink(device().compiler(),
- binaries_to_link[0],
- binaries_to_link.size() - 1,
- &binaries_to_link[1],
- ACL_TYPE_LLVMIR_BINARY,
- "-create-library",
- NULL);
- }
- else {
- errorCode = g_complibApi._aclLink(device().compiler(),
- binaries_to_link[0],
- 0,
- NULL,
- ACL_TYPE_LLVMIR_BINARY,
- "-create-library",
- NULL);
- }
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Failed to link programs";
- return false;
- }
- // Store the newly linked aclBinary for this program.
- binaryElf_ = binaries_to_link[0];
- // Free all the other aclBinaries
- for (size_t i = 1; i < binaries_to_link.size(); i++) {
- g_complibApi._aclBinaryFini(binaries_to_link[i]);
- }
- if (createLibrary) {
- saveBinaryAndSetType(TYPE_LIBRARY);
- return true;
- }
-
- // Now call linkImpl with the new options
- return linkImpl(options);
- }
-
- bool FSAILProgram::loadBrig() {
- //Copy all the sections into BRIG
- memset(&brig_, 0 ,sizeof(HsaBrig));
- bool codeStatus = ExtractSymbolAndCopy(aclBRIGcode,
- "__BRIG__code",
- &brig_.code_section,
- &brig_.code_section_byte_size,
- true
- );
- bool oprStatus = ExtractSymbolAndCopy(aclBRIGoprs,
- "__BRIG__operands",
- &brig_.operand_section,
- &brig_.operand_section_byte_size,
- true
- );
- bool strStatus = ExtractSymbolAndCopy(aclBRIGstrs,
- "__BRIG__strtab",
- &brig_.string_section,
- &brig_.string_section_byte_size,
- true
- );
- bool dbgStatus = ExtractSymbolAndCopy(aclHSADEBUG ,
- "__debug_brig__",
- &brig_.debug_section,
- &brig_.debug_section_byte_size,
- false
- );
- if (!codeStatus || !oprStatus || !strStatus || !dbgStatus) {
- LogError("Failed to Extract one or more BRIG sections");
- buildLog_ += "Error: Failed to Extract one or more BRIG sections";
- return false;
- }
- if(hsacoreapi->HsaLoadBrig(device_.getBackendDevice(), &brig_)
- != kHsaStatusSuccess){
- return false;
- }
- isBrigLoaded_ = true;
- return true;
- }
-
- bool FSAILProgram::unloadBrig() {
- if (isBrigLoaded_ == true) {
- HsaStatus status = hsacoreapi->HsaUnloadBrig(&brig_);
- if (status != kHsaStatusSuccess){
- return false;
- }
- //Destroy the BRIG
- free(brig_.code_section);
- free(brig_.operand_section);
- free(brig_.string_section);
- free(brig_.debug_section);
- }
- return true;
- }
-
- bool FSAILProgram::linkImpl(amd::option::Options *options) {
- acl_error errorCode;
- aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
- //If the binaryElf_ is not set then program must have been created
- // using clCreateProgramWithBinary
- if (!binaryElf_) {
- binary_t binary = this->binary();
- if ((binary.first != NULL) && (binary.second > 0)) {
- // Binary already exists -- we can also check if there is no
- // opencl source code
- // Need to check if LLVMIR exists in the binary
- // If LLVMIR does not exist then is it valid
- // We need to pull out all the compiled kernels
- // We cannot do this at present because we need at least
- // Hsail text to pull the kernels oout
- void *mem = const_cast(binary.first);
- binaryElf_ = g_complibApi._aclReadFromMem(mem,
- binary.second,
- &errorCode);
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Error while converting to BRIG: aclBinary init failure \n" ;
- LogWarning("aclBinaryInit failed");
- return false;
- }
- // Check that all needed section also exist in binaryElf_
- // No any validity checks here
- continueCompileFrom = getNextCompilationStageFromBinary();
- if (ACL_TYPE_DEFAULT == continueCompileFrom) {
- return false;
- }
- if (ACL_TYPE_HSAIL_BINARY == continueCompileFrom) {
- // Save binary in the interface class
- // Also load compile & link options from binary into Program class members:
- // compileOptions_ & linkOptions_
- setBinary(static_cast(mem), binary.second);
- // Compare options loaded from binary with current ones
- // If they differ then recompile from ACL_TYPE_LLVMIR_BINARY
- // @TODO It is needed to compare options taking into account that:
- // 1. options are order independent;
- // 2. (may be not trivial) compare only options that affect binary
- std::string curOptions = options->origOptionStr + hsailOptions();
- if (compileOptions_ + linkOptions_ != curOptions) {
- continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
- }
- }
- }
- }
- // Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
- // 1. if the program is not created with binary;
- // 2. if the program is created with binary and contains only .llvmir & .comment
- // 3. if the program is created with binary, contains all brig sections,
- // but the binary's compile & link options differ from current ones (recompilation);
- if (ACL_TYPE_LLVMIR_BINARY == continueCompileFrom) {
- std::string curOptions = options->origOptionStr + hsailOptions();
- errorCode = g_complibApi._aclCompile(device().compiler(),
- binaryElf_,
- curOptions.c_str(),
- ACL_TYPE_LLVMIR_BINARY,
- ACL_TYPE_CG,
- logFunction);
- }
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Error while converting to BRIG: Compiling LLVMIR to BRIG \n" ;
- return false;
- }
- //Stop compilation if it is an offline device - HSA runtime does not
- //support ISA compiled offline
- if (!dev().isOnline()) {
- return true;
- }
-
- const HsaDevice *hsaDevice = dev().getBackendDevice();
- if (!loadBrig()) {
- buildLog_ += "Error while loading BRIG" ;
- return false;
- }
-
- size_t kernelNamesSize = 0;
- errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Error while Finalization phase: kernel names query from the ELF failed\n";
- return false;
- }
- if (kernelNamesSize > 0) {
- char* kernelNames = new char[kernelNamesSize];
- errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
- if (errorCode != ACL_SUCCESS) {
- buildLog_ += "Error while Finalization phase: kernel's Metadata is corrupted in the ELF\n";
- delete kernelNames;
- return false;
- }
- std::vector vKernels = splitSpaceSeparatedString(kernelNames);
- delete kernelNames;
- std::vector::iterator it = vKernels.begin();
- bool dynamicParallelism = false;
- for (it; it != vKernels.end(); ++it) {
- std::string kernelName = *it;
- Kernel *aKernel = new oclhsa::Kernel(kernelName,
- this,
- &brig_,
- options->origOptionStr + hsailOptions());
- if (!aKernel->init() ) {
- return false;
- }
- aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
- // Update the binary in the FSAILProgram to save the ISA and debug information.
- // This is so the debugger and the profiler can use the a single aclBinary for all their needs.
- if (!updateAclBinaryWithKernelIsaAndDebug(kernelName)) {
- return false;
- }
- kernels()[kernelName] = aKernel;
- }
- }
- saveBinaryAndSetType(TYPE_EXECUTABLE);
- buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
- return true;
- }
-
- bool FSAILProgram::createBinary(amd::option::Options *options) {
- return false;
- }
-
- bool FSAILProgram::initClBinary() {
- if (clBinary_ == NULL) {
- clBinary_ = new ClBinary(static_cast(device()));
- if (clBinary_ == NULL) {
- return false;
- }
- }
- return true;
- }
-
- void FSAILProgram::releaseClBinary() {
- if (clBinary_ != NULL) {
- delete clBinary_;
- clBinary_ = NULL;
- }
- }
-
- std::string FSAILProgram::hsailOptions() {
- std::string hsailOptions;
- //Set options for the standard device specific options
- //This is just for legacy compiler code
- // All our devices support these options now
- hsailOptions.append(" -DFP_FAST_FMAF=1");
- hsailOptions.append(" -DFP_FAST_FMA=1");
- //TODO(sramalin) : Query the device for opencl version
- // and only set if -cl-std wasn't specified in
- // original build options (app)
- //hsailOptions.append(" -cl-std=CL1.2");
- //check if the host is 64 bit or 32 bit
- LP64_ONLY(hsailOptions.append(" -m64"));
- //Now append each extension supported by the device
- // one by one
- std::string token;
- std::istringstream iss("");
- iss.str(device().info().extensions_);
- while (getline(iss, token, ' ')) {
- if (!token.empty()) {
- hsailOptions.append(" -D");
- hsailOptions.append(token);
- hsailOptions.append("=1");
- }
- }
- return hsailOptions;
- }
-
-#endif // WITHOUT_FSA_BACKEND
-} // namespace hsa
-
diff --git a/rocclr/runtime/device/hsa/hsaprogram.hpp b/rocclr/runtime/device/hsa/hsaprogram.hpp
deleted file mode 100644
index e1d96f1515..0000000000
--- a/rocclr/runtime/device/hsa/hsaprogram.hpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef HSAPROGRAM_HPP_
-#define HSAPROGRAM_HPP_
-
-#ifndef WITHOUT_FSA_BACKEND
-
-#include "hsabinary.hpp"
-#include "hsacompilerlib.hpp"
-#include "services.h"
-#include "acl.h"
-#include "oclhsa_common.hpp"
-#include
-#include
-#include
-#include
-#include "hsadevice.hpp"
-
-//! \namespace oclhsa HSA Device Implementation
-namespace oclhsa {
-
- //! \class empty program
- class FSAILProgram : public device::Program
- {
- friend class ClBinary;
- public:
- //! Default constructor
- FSAILProgram(oclhsa::NullDevice& device);
- //! Default destructor
- ~FSAILProgram();
-
- // Initialize Binary for GPU (used only for clCreateProgramWithBinary()).
- virtual bool initClBinary(char *binaryIn, size_t size);
-
- //! Returns the aclBinary associated with the progrm
- const aclBinary* binaryElf() const {
- return static_cast(binaryElf_); }
-
- //! Returns the brig associated with the progrm
- const HsaBrig* brig() {
- return static_cast(&brig_); }
-
- const NullDevice& dev() const { return device_; }
- //! Returns the hsaBinary associated with the progrm
- const HsaDevice* hsaDevice() const {
- return dev().getBackendDevice();
- }
-
- protected:
- //! pre-compile setup for GPU
- virtual bool initBuild(amd::option::Options* options);
-
- //! post-compile setup for GPU
- virtual bool finiBuild(bool isBuildGood);
-
- /*! \brief Compiles GPU CL program to LLVM binary (compiler frontend)
- *
- * \return True if we successefully compiled a GPU program
- */
- virtual bool compileImpl(
- const std::string& sourceCode, //!< the program's source code
- const std::vector& headers,
- const char** headerIncludeNames,
- amd::option::Options* options //!< compile options's object
- );
-
- /*! \brief Compiles LLVM binary to FSAIL code (compiler backend: link+opt+codegen)
- *
- * \return The build error code
- */
- int compileBinaryToFSAIL(
- amd::option::Options* options //!< options for compilation
- );
-
-
- virtual bool linkImpl(amd::option::Options* options);
-
- //! Link the device programs.
- virtual bool linkImpl (const std::vector& inputPrograms,
- amd::option::Options* options,
- bool createLibrary);
-
- virtual bool createBinary(amd::option::Options* options);
-
- //! Initialize Binary
- virtual bool initClBinary();
-
- //! Release the Binary
- virtual void releaseClBinary();
-
- virtual const aclTargetInfo & info(const char * str = ""){
- return info_;
- }
-
- virtual bool isElf(const char* bin) const {
- return amd::isElfMagic(bin);
- //return false;
- }
-
- //! Returns the binary
- // This should ensure that the binary is updated with all the kernels
- // ClBinary& clBinary() { return binary_; }
- ClBinary* clBinary() {
- return static_cast(device::Program::clBinary());
- }
- const ClBinary* clBinary() const {
- return static_cast(device::Program::clBinary());
- }
-
- private:
-
- //! Extracts a symbol from the binaryElf_
- // and copies it to a buffer allocated
- // by the function
- bool ExtractSymbolAndCopy(aclSections id,
- const char *symbol_name,
- void** address_to_copy,
- size_t* symbol_size_bytes,
- bool verify);
- //! Extracts the aclBinary used internally within the brig
- // and pulls the debug and ISA section for a particular kernel
- // and inserts it into aclBinary contained in the program
- bool updateAclBinaryWithKernelIsaAndDebug(std::string kernelName);
- //! Checks the existence of sections in binaryElf_
- // and calculates the next stage of compilation;
- // if set of the section is impossible, then
- // binary is invalid and function returns ACL_TYPE_DEFAULT
- aclType getNextCompilationStageFromBinary();
- //! Loads the global variables for the BRIG
- bool loadBrig();
- //! Unloads the global variables for the BRIG
- bool unloadBrig();
- bool saveBinaryAndSetType(type_t type);
- //! Disable default copy constructor
- FSAILProgram(const FSAILProgram&);
-
- //! Disable operator=
- FSAILProgram& operator=(const FSAILProgram&);
-
- //! Returns all the options to be appended while passing to the
- //compiler library
- std::string hsailOptions();
-
- std::string openCLSource_; //!< Original OpenCL source
- std::string fsailProgram_; //!< FSAIL program after compilation.
- std::string llvmBinary_; //!< LLVM IR binary code
- //!< aclBinary and aclCompiler - for the compiler libray
- aclBinary* binaryElf_; //!
-#include
-
-namespace oclhsa {
-
-Timestamp::~Timestamp() {
- if (signal_ != 0) {
- hsacoreapi->HsaDestroySignal(signal_);
- }
-}
-
-HsaSignal Timestamp::createSignal() {
- start_ = 0;
- end_ = 0;
-
- HsaStatus status = hsacoreapi->HsaCreateSignal(&signal_);
- if (status != kHsaStatusSuccess) {
- LogError("HsaCreateSignal failed, could not create signal for timestamp");
- return 0;
- }
- return signal_;
-}
-
-void Timestamp::start() {
- start_ = amd::Os::timeNanos();
- signal_ = 0;
-}
-
-void Timestamp::end() {
- end_ = amd::Os::timeNanos();
-}
-
-/**
- * @brief Waits on an outstanding kernel without regard to how
- * it was dispatched - with or without a signal
- *
- * @return bool true if Wait returned successfully, false
- * otherwise
- */
-bool VirtualGPU::releaseGpuMemoryFence() {
-
- // Return if there is no pending dispatch
- if (!hasPendingDispatch_) {
- return false;
- }
-
- // Reset the wait on dispatch flag
- HsaStatus status;
- hasPendingDispatch_ = false;
-
- // This is the first call to wait on a kernel, issue
- // a End Of Pipe - Release_Mem command
- HsaQueue *hsaQueue;
- hsaQueue = (lastSubmitQueue_ == kHsaQueueTypeCompute) ?
- gpu_queue_ : interopQueue_;
- if (hsaQueue != NULL) {
- status = hsacoreapi->HsaAmdReleaseGpuFence(hsaQueue);
- if (status == kHsaStatusSuccess) {
- return true;
- }
- }
-
- LogError("Call to HsaAmdReleaseGpuFence() failed.\n");
- return false;
-}
-
-VirtualGPU::VirtualGPU(Device &device)
- : device::VirtualDevice(device), oclhsa_device_(device)
-{
- lastSubmitQueue_ = static_cast(0xFFFF);
- gpu_device_ = const_cast(device.getBackendDevice());
- interopQueue_ = NULL;
- timestamp_ = NULL;
-
- // Initialize the last signal and dispatch flags
- hasPendingDispatch_ = false;
-}
-
-VirtualGPU::~VirtualGPU()
-{
- if (timestamp_ != NULL) {
- delete timestamp_;
- timestamp_ = NULL;
- LogError("There was a timestamp that was not used; deleting.");
- }
-}
-
-/* profilingBegin, when profiling is enabled, creates a timestamp to save in
- * virtualgpu's timestamp_, and calls start() to get the current host
- * timestamp.
- */
-void VirtualGPU::profilingBegin(amd::Command &command, bool drmProfiling)
-{
- if (command.profilingInfo().enabled_) {
- if (timestamp_ != NULL) {
- LogWarning("Trying to create a second timestamp in VirtualGPU. \
- This could have unintended consequences.");
- return;
- }
- timestamp_ = new Timestamp;
- timestamp_->start();
- }
-}
-
-/* profilingEnd, when profiling is enabled, checks to see if a signal was
- * created for whatever command we are running and calls end() to get the
- * current host timestamp if no signal is available. It then saves the pointer
- * timestamp_ to the command's data.
- */
-void VirtualGPU::profilingEnd(amd::Command &command)
-{
- if (command.profilingInfo().enabled_) {
- if (timestamp_->getSignal() == 0) {
- timestamp_->end();
- }
- command.setData(reinterpret_cast(timestamp_));
- timestamp_ = NULL;
- }
-}
-
-bool VirtualGPU::profilingCollectResults(amd::Command *list)
-{
- uint32_t cmdType;
- HsaAmdProfileObject profileObj;
- Timestamp *ts = NULL;
- HsaStatus status;
-
- amd::Command* current = list;
- amd::Command* next = NULL;
-
- // If the command list is, empty then exit
- if (current == NULL) {
- return true;
- }
-
- // Determine profiling has been enabled.
- if (!current->profilingInfo().enabled_) {
- return false;
- }
-
- // This block gets the current device and system clock counters, and uses
- // the delta between the two to adjust the device clock to the host domain.
- uint64_t endTimeStampGPU = 0;
- uint64_t endTimeStamp = 0;
- // Device frequency
- double deviceNsPerTick = 0;
- HsaDeviceClockCounterInfo clockCounterInfo;
- if (kHsaStatusSuccess == hsacoreapi->HsaDeviceGetClockCounters(gpu_device_, &clockCounterInfo)) {
- // Device frequency
- deviceNsPerTick = 1000000000.0 /
- clockCounterInfo.device_clock_frequency_hz;
- endTimeStampGPU = clockCounterInfo.device_clock_counter * deviceNsPerTick;
- // keep this order of operations for accuracy
- endTimeStamp = clockCounterInfo.system_clock_counter *
- (1000000000.0 / clockCounterInfo.system_clock_frequency_hz);
- } else {
- LogWarning("Could not get device/system counters. Device times could be off.");
- endTimeStamp = amd::Os::timeNanos();
- }
-
- uint64_t startTimeStamp = endTimeStamp;
- uint64_t readjustTimeGPU = 0;
- if (endTimeStampGPU != 0) {
- readjustTimeGPU = endTimeStampGPU - endTimeStamp;
- }
-
- // This block gets the first valid timestamp from the first command that has
- // one. This timestamp is used below to mark any command that came before
- // it to start and end with this first valid start time.
- current = list;
- while (current != NULL) {
- cmdType = current->type();
- if (current->data() != NULL) {
- ts = reinterpret_cast(current->data());
- if (ts->getSignal() != 0) {
- status = hsacoreapi->HsaAmdGetProfileObject(ts->getSignal(), &profileObj);
- if (status != kHsaStatusSuccess) {
- LogError("Error reading profile data.");
- continue;
- }
- startTimeStamp = *profileObj.launch_time_ * deviceNsPerTick;
- startTimeStamp -= readjustTimeGPU;
- endTimeStamp = startTimeStamp;
- } else {
- startTimeStamp = ts->getStart();
- endTimeStamp = ts->getStart();
- }
- break;
- }
- current = current->getNext();
- }
-
- // Iterate through the list of commands, and set timestamps as appropriate
- // Note, if a command does not have a timestamp, it does one of two things:
- // - if the command (without a timestamp), A, precedes another command, C,
- // that _does_ contain a valid timestamp, command A will set RUNNING and
- // COMPLETE with the RUNNING (start) timestamp from command C. This would
- // also be true for command B, which is between A and C. These timestamps
- // are actually retrieved in the block above (startTimeStamp, endTimeStamp).
- // - if the command (without a timestamp), C, follows another command, A,
- // that has a valid timestamp, command C will be set RUNNING and COMPLETE
- // with the COMPLETE (end) timestamp of the previous command, A. This is
- // also true for any command B, which falls between A and C.
- current = list;
- while (current != NULL) {
- cmdType = current->type();
- if (current->data() != NULL) {
- // Since this is a valid command to get a timestamp, we use the
- // timestamp provided by the runtime (saved in the data())
- ts = reinterpret_cast(current->data());
- if (ts->getSignal() != 0) {
- status = hsacoreapi->HsaAmdGetProfileObject(ts->getSignal(), &profileObj);
- if (status != kHsaStatusSuccess) {
- LogError("Error reading profile data.");
- continue;
- }
- startTimeStamp = *profileObj.launch_time_ * deviceNsPerTick;
- endTimeStamp = *profileObj.completion_time_ * deviceNsPerTick;
- startTimeStamp -= readjustTimeGPU;
- endTimeStamp -= readjustTimeGPU;
- } else {
- startTimeStamp = ts->getStart();
- endTimeStamp = ts->getEnd();
- }
- delete ts;
- current->setData(NULL);
- } else {
- // If we don't have a command that contains a valid timestamp, we
- // simply use the end timestamp of the previous command.
- // Note, if this is a command before the first valid timestamp,
- // this will be equal to the start timestamp of the first valid
- // timestamp at this point.
- startTimeStamp = endTimeStamp;
- }
-
- if (current->status() == CL_SUBMITTED) {
- current->setStatus(CL_RUNNING, startTimeStamp);
- current->setStatus(CL_COMPLETE, endTimeStamp);
- }
- else if (current->status() != CL_COMPLETE) {
- LogPrintfError("Unexpected command status - %d.", current->status());
- }
-
- next = current->getNext();
- current->release();
- current = next;
- }
-
- // Release the memory blocks allocated for the various
- // struct arguments of one or more kernel submissions
- std::for_each(kernelArgList_.begin(),
- kernelArgList_.end(),
- std::ptr_fun(servicesapi->HsaFreeSystemMemory));
- kernelArgList_.clear();
-
- // Reset the queue parameter
- lastSubmitQueue_ = static_cast(0xFFFF);
-
- // Return True so that OpenCL commands are
- // not processed again
- return true;
-}
-
-bool
-VirtualGPU::create(HsaQueueType queueType)
-{
- //context was created with d3d11 or d3d10 or gl
- //extension enabled, RT still needs to create
- //two queues even for an interop application.
- bool isInterop = (queueType == kHsaQueueTypeInterop);
- if (kHsaStatusSuccess !=
- hsacoreapi->HsaCreateUserModeQueue(gpu_device_,
- NULL,
- 0,
- kHsaQueueTypeCompute,
- kHsaQueuePriorityMaximum,
- kHsaQueueFractionTen,
- &gpu_queue_)) {
- LogError("Error creating hsa queue");
- return false;
- }
-
- if ((dev().settings().enableLocalMemory_ || isInterop) &&
- kHsaStatusSuccess !=
- hsacoreapi->HsaCreateUserModeQueue(gpu_device_,
- NULL,
- 0,
- kHsaQueueTypeInterop,
- kHsaQueuePriorityMaximum,
- kHsaQueueFractionTen,
- &interopQueue_)) {
- LogError("Error creating hsa interop queue");
- return false;
- }
-
- device::BlitManager::Setup blitSetup;
- blitMgr_ = new KernelBlitManager(*this, blitSetup);
- if ((NULL == blitMgr_) || !blitMgr_->create(oclhsa_device_)) {
- LogError("Could not create BlitManager!");
- return false;
- }
-
- return true;
-}
-
-bool
-VirtualGPU::terminate()
-{
- delete blitMgr_;
-
- // Release the resources of signal
- releaseGpuMemoryFence();
-
- // Close the user mode queue
- if (interopQueue_) {
- hsacoreapi->HsaDestroyUserModeQueue(interopQueue_);
- }
- hsacoreapi->HsaDestroyUserModeQueue(gpu_queue_);
-
- return true;
-}
-
-void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand &cmd)
-{
- device::Memory *devMem = cmd.source().getDeviceMemory(dev());
- void *dst = cmd.destination();
- amd::Coord3D size = cmd.size();
-
- //! @todo: add multi-devices synchronization when supported.
-
- cl_command_type type = cmd.type();
- bool result = false;
- bool imageBuffer = false;
-
- // Force buffer read for IMAGE1D_BUFFER
- if ((type == CL_COMMAND_READ_IMAGE) &&
- (cmd.source().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
- type = CL_COMMAND_READ_BUFFER;
- imageBuffer = true;
- }
-
- profilingBegin(cmd);
-
- switch (type) {
- case CL_COMMAND_READ_BUFFER: {
- amd::Coord3D origin(cmd.origin()[0]);
- if (imageBuffer) {
- size_t elemSize =
- cmd.source().asImage()->getImageFormat().getElementSize();
- origin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- }
- result = blitMgr().readBuffer(
- *devMem, dst, origin, size,
- cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_READ_BUFFER_RECT: {
- result = blitMgr().readBufferRect(
- *devMem, dst, cmd.bufRect(), cmd.hostRect(), size,
- cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_READ_IMAGE: {
- result = blitMgr().readImage(
- *devMem, dst, cmd.origin(), size, cmd.rowPitch(),
- cmd.slicePitch(), cmd.isEntireMemory());
- break;
- }
- default:
- ShouldNotReachHere();
- break;
- }
-
- profilingEnd(cmd);
-
- if (!result) {
- LogError("submitReadMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
-}
-
-void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand &cmd)
-{
- device::Memory *devMem = cmd.destination().getDeviceMemory(dev());
- const char *src = static_cast(cmd.source());
- amd::Coord3D size = cmd.size();
-
- //! @todo add multi-devices synchronization when supported.
-
- cl_command_type type = cmd.type();
- bool result = false;
- bool imageBuffer = false;
-
- // Force buffer write for IMAGE1D_BUFFER
- if ((type == CL_COMMAND_WRITE_IMAGE) &&
- (cmd.destination().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
- type = CL_COMMAND_WRITE_BUFFER;
- imageBuffer = true;
- }
-
- profilingBegin(cmd);
-
- switch (type) {
- case CL_COMMAND_WRITE_BUFFER: {
- amd::Coord3D origin(cmd.origin()[0]);
- if (imageBuffer) {
- size_t elemSize =
- cmd.destination().asImage()->getImageFormat().getElementSize();
- origin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- }
- result = blitMgr().writeBuffer(
- src, *devMem , origin, size,
- cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_WRITE_BUFFER_RECT: {
- result = blitMgr().writeBufferRect(
- src, *devMem, cmd.hostRect(), cmd.bufRect(), size,
- cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_WRITE_IMAGE: {
- result = blitMgr().writeImage(
- src, *devMem, cmd.origin(), size, cmd.rowPitch(),
- cmd.slicePitch(), cmd.isEntireMemory());
- break;
- }
- default:
- ShouldNotReachHere();
- break;
- }
-
- if (!result) {
- LogError("submitWriteMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
- else {
- cmd.destination().signalWrite(&dev());
- }
-
- profilingEnd(cmd);
-}
-
-void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand &cmd)
-{
- device::Memory *srcDevMem = cmd.source().getDeviceMemory(dev());
- device::Memory *destDevMem = cmd.destination().getDeviceMemory(dev());
- amd::Coord3D size = cmd.size();
-
- //! @todo add multi-devices synchronization when supported.
-
- cl_command_type type = cmd.type();
- bool result = false;
- bool srcImageBuffer = false;
- bool dstImageBuffer = false;
-
- // Force buffer copy for IMAGE1D_BUFFER
- if (cmd.source().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- srcImageBuffer = true;
- type = CL_COMMAND_COPY_BUFFER;
- }
- if (cmd.destination().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- dstImageBuffer = true;
- type = CL_COMMAND_COPY_BUFFER;
- }
-
- profilingBegin(cmd);
-
- switch (cmd.type()) {
- case CL_COMMAND_COPY_BUFFER: {
- amd::Coord3D srcOrigin(cmd.srcOrigin()[0]);
- amd::Coord3D dstOrigin(cmd.dstOrigin()[0]);
-
- if (srcImageBuffer) {
- const size_t elemSize =
- cmd.source().asImage()->getImageFormat().getElementSize();
- srcOrigin.c[0] *= elemSize;
- if (dstImageBuffer) {
- dstOrigin.c[0] *= elemSize;
- }
- size.c[0] *= elemSize;
- }
- else if (dstImageBuffer) {
- const size_t elemSize =
- cmd.destination().asImage()->getImageFormat().getElementSize();
- dstOrigin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- }
-
- result = blitMgr().copyBuffer(
- *srcDevMem, *destDevMem, srcOrigin,
- dstOrigin, size, cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_COPY_BUFFER_RECT: {
- result = blitMgr().copyBufferRect(
- *srcDevMem, *destDevMem, cmd.srcRect(),
- cmd.dstRect(), size, cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_COPY_IMAGE: {
- result = blitMgr().copyImage(
- *srcDevMem, *destDevMem, cmd.srcOrigin(),
- cmd.dstOrigin(), size, cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_COPY_IMAGE_TO_BUFFER: {
- result = blitMgr().copyImageToBuffer(
- *srcDevMem, *destDevMem, cmd.srcOrigin(),
- cmd.dstOrigin(), size, cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_COPY_BUFFER_TO_IMAGE: {
- result = blitMgr().copyBufferToImage(
- *srcDevMem, *destDevMem, cmd.srcOrigin(),
- cmd.dstOrigin(), size, cmd.isEntireMemory());
- break;
- }
- default:
- ShouldNotReachHere();
- break;
- }
-
- if (!result) {
- LogError("submitCopyMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
-
- profilingEnd(cmd);
-
- cmd.destination().signalWrite(&dev());
-}
-
-void VirtualGPU::submitMapMemory(amd::MapMemoryCommand &cmd)
-{
- //! @todo add multi-devices synchronization when supported.
-
- profilingBegin(cmd);
-
- device::Memory *devMemory = cmd.memory().getDeviceMemory(dev(), false);
-
- cl_command_type type = cmd.type();
- bool imageBuffer = false;
-
- // Force buffer read for IMAGE1D_BUFFER
- if ((type == CL_COMMAND_MAP_IMAGE) &&
- (cmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
- type = CL_COMMAND_MAP_BUFFER;
- imageBuffer = true;
- }
-
- cl_map_flags mapFlag = cmd.mapFlags();
-
- // Treat no map flag as read-write.
- if (mapFlag == 0) {
- mapFlag = CL_MAP_READ | CL_MAP_WRITE;
- }
-
- // Save map write requirement.
- if (mapFlag & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
- devMemory->saveMapInfo(cmd.origin(), cmd.size(),
- mapFlag, cmd.isEntireMemory());
- }
-
- // Sync to the map target.
- if ((!devMemory->isHostMemDirectAccess()) &&
- (mapFlag & (CL_MAP_READ | CL_MAP_WRITE))) {
- bool result = false;
-
- oclhsa::Memory *hsaMemory = static_cast(devMemory);
-
- amd::Memory* mapMemory = hsaMemory->mapMemory();
- void *hostPtr = mapMemory == NULL ?
- hsaMemory->owner()->getHostMem() :
- mapMemory->getHostMem();
-
- if (type == CL_COMMAND_MAP_BUFFER) {
- amd::Coord3D origin(cmd.origin()[0]);
- amd::Coord3D size(cmd.size()[0]);
- if (imageBuffer) {
- size_t elemSize =
- cmd.memory().asImage()->getImageFormat().getElementSize();
- origin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- }
- result = blitMgr().readBuffer(
- *hsaMemory,
- static_cast(hostPtr) + origin[0],
- origin,
- size,
- cmd.isEntireMemory());
- }
- else if (type == CL_COMMAND_MAP_IMAGE) {
- amd::Image* image = cmd.memory().asImage();
- result = blitMgr().readImage(
- *hsaMemory, hostPtr, amd::Coord3D(0),
- image->getRegion(), image->getRowPitch(),
- image->getSlicePitch(), true);
- }
- else {
- ShouldNotReachHere();
- }
-
- if (!result) {
- LogError("submitMapMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
- }
-
- profilingEnd(cmd);
-}
-
-void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand &cmd)
-{
- profilingBegin(cmd);
-
- device::Memory *devMemory = cmd.memory().getDeviceMemory(dev(), false);
-
- // Force buffer write for IMAGE1D_BUFFER
- bool imageBuffer =
- (cmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER);
-
- if (devMemory->isUnmapWrite()) {
- // Commit the changes made by the user.
- if (!devMemory->isHostMemDirectAccess()) {
- bool result = false;
-
- if (cmd.memory().asImage() && !imageBuffer) {
- amd::Image *image = cmd.memory().asImage();
- result = blitMgr().writeImage(
- cmd.mapPtr(), *devMemory,
- devMemory->writeMapInfo()->origin_,
- devMemory->writeMapInfo()->region_,
- image->getRowPitch(), image->getSlicePitch());
- }
- else {
- amd::Coord3D origin(devMemory->writeMapInfo()->origin_[0]);
- amd::Coord3D size(devMemory->writeMapInfo()->region_[0]);
- if (imageBuffer) {
- size_t elemSize =
- cmd.memory().asImage()->getImageFormat().getElementSize();
- origin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- }
- result = blitMgr().writeBuffer(
- cmd.mapPtr(), *devMemory,
- origin,
- size);
- }
-
- if (!result) {
- LogError("submitMapMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
- }
-
- devMemory->clearUnmapFlags();
-
- cmd.memory().signalWrite(&dev());
- }
-
- profilingEnd(cmd);
-}
-
-void VirtualGPU::submitFillMemory(amd::FillMemoryCommand &cmd)
-{
- device::Memory *devMemory = cmd.memory().getDeviceMemory(dev(), false);
-
- //! @todo add multi-devices synchronization when supported.
-
- cl_command_type type = cmd.type();
- bool result = false;
- bool imageBuffer = false;
- float fillValue[4];
-
- // Force fill buffer for IMAGE1D_BUFFER
- if ((type == CL_COMMAND_FILL_IMAGE) &&
- (cmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
- type = CL_COMMAND_FILL_BUFFER;
- imageBuffer = true;
- }
-
- profilingBegin(cmd);
-
- // Find the the right fill operation
- switch (type) {
- case CL_COMMAND_FILL_BUFFER: {
- const void* pattern = cmd.pattern();
- size_t patternSize = cmd.patternSize();
- amd::Coord3D origin(cmd.origin()[0]);
- amd::Coord3D size(cmd.size()[0]);
- // Reprogram fill parameters if it's an IMAGE1D_BUFFER object
- if (imageBuffer) {
- size_t elemSize =
- cmd.memory().asImage()->getImageFormat().getElementSize();
- origin.c[0] *= elemSize;
- size.c[0] *= elemSize;
- memset(fillValue, 0, sizeof(fillValue));
- cmd.memory().asImage()->getImageFormat().formatColor(pattern, fillValue);
- pattern = fillValue;
- patternSize = elemSize;
- }
- result = blitMgr().fillBuffer(
- *devMemory, pattern, patternSize, origin, size,
- cmd.isEntireMemory());
- break;
- }
- case CL_COMMAND_FILL_IMAGE: {
- result = blitMgr().fillImage(
- *devMemory, cmd.pattern(), cmd.origin(), cmd.size(),
- cmd.isEntireMemory());
- break;
- }
- default:
- ShouldNotReachHere();
- break;
- }
-
- if (!result) {
- LogError("submitFillMemory failed!");
- cmd.setStatus(CL_OUT_OF_RESOURCES);
- }
-
- cmd.memory().signalWrite(&dev());
-
- profilingEnd(cmd);
-}
-
-void VirtualGPU::submitMigrateMemObjects(amd::MigrateMemObjectsCommand &vcmd)
-{
- // Wait on a kernel if one is outstanding
- releaseGpuMemoryFence();
-
- profilingBegin(vcmd);
-
- std::vector::const_iterator itr;
-
- for (itr = vcmd.memObjects().begin();
- itr != vcmd.memObjects().end();
- itr++) {
- // Find device memory
- device::Memory *m = (*itr)->getDeviceMemory(dev());
- oclhsa::Memory *memory = static_cast(m);
-
- if (vcmd.migrationFlags() & CL_MIGRATE_MEM_OBJECT_HOST) {
- //! @todo revisit this when multi devices is supported.
- } else if (vcmd.migrationFlags() &
- CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
- //! @todo revisit this when multi devices is supported.
- } else {
- LogWarning("Unknown operation for memory migration!");
- }
- }
-
- profilingEnd(vcmd);
-}
-
-HsaStatus VirtualGPU::getDispatchConfig(uint32_t lds_size,
- bool profile_enable,
- HsaDispatchConfig* config,
- const amd::NDRangeContainer& sizes,
- const amd::Kernel& kernel)
-{
- uint32_t idx;
- uint32_t dimensions;
-
- //Used to detect whether runtime implemetation should
- //set up the work group size
- bool overrideLwgSize = true;
-
- device::Kernel *devKernel = const_cast
- (kernel.getDeviceKernel(dev()));
-
- // Initialize the work grid parameter
- for (idx = 0; idx < 3; idx++) {
- config->local_work_size.dimension[idx] = 1;
- config->global_work_size.dimension[idx] = 1;
- config->global_work_offset.dimension[idx] = 0;
- }
-
- // Retrieve user provided work grid values
- dimensions = sizes.dimensions();
- amd::NDRange local(sizes.local());
- amd::NDRange global(sizes.global());
- amd::NDRange offset(sizes.offset());
-
- // Update the work grid with user provided values
- for (idx = 0; idx < dimensions; idx++) {
- config->global_work_size.dimension[idx] = global[idx];
-
- config->global_work_offset.dimension[idx] = offset[idx];
-
- //if reqd_work_group_size is set use that
- //otherwise use the ones passed into NDRange
- //In both cases, no need to further override work group size
- if (devKernel->workGroupInfo()->compileSize_[idx]) {
- config->local_work_size.dimension[idx] =
- devKernel->workGroupInfo()->compileSize_[idx];
- overrideLwgSize = false;
- }
- else if (local[idx]) {
- config->local_work_size.dimension[idx] = local[idx];
- overrideLwgSize = false;
- }
- }
-
- //If true, set work group sizes
- if (overrideLwgSize) {
- if (dimensions == 1) {
- config->local_work_size.dimension[0] =
- dev().settings().maxWorkGroupSize_;
- }
- else if (dimensions == 2) {
- config->local_work_size.dimension[0] =
- dev().settings().maxWorkGroupSize2DX_;
- config->local_work_size.dimension[1] =
- dev().settings().maxWorkGroupSize2DY_;
- }
- else if (dimensions == 3) {
- config->local_work_size.dimension[0] =
- dev().settings().maxWorkGroupSize3DX_;
- config->local_work_size.dimension[1] =
- dev().settings().maxWorkGroupSize3DY_;
- config->local_work_size.dimension[2] =
- dev().settings().maxWorkGroupSize3DZ_;
- }
- else {
- assert("Invalid Work Dimensions");
- }
- }
- // Update Local Data Store and Profiling parameters
- config->lds_size = lds_size;
- config->work_dimensions = dimensions;
- config->profile = profile_enable;
- return kHsaStatusSuccess;
-}
-
-HsaStatus VirtualGPU::synchronizeInterQueueKernels(HsaQueue *dispatchQueue) {
-
- // Determine current kernel type based on queue used to submit
- HsaQueueType currQueue = (dispatchQueue == gpu_queue_) ?
- kHsaQueueTypeCompute : kHsaQueueTypeInterop;
-
- // An outstanding kernel exists, a new one can be submitted
- // as long as it belongs to the same class of queue type
- if (lastSubmitQueue_ == currQueue) {
- return kHsaStatusSuccess;
- }
-
- // If there is no outstanding kernel, a new one can be
- // submitted unconditionally
- if (lastSubmitQueue_ == 0xFFFF) {
- lastSubmitQueue_ = currQueue;
- return kHsaStatusSuccess;
- }
-
- // Current kernel submit cannot occur until all outstanding
- // kernels on the queue type have completed.
- releaseGpuMemoryFence();
- lastSubmitQueue_ = currQueue;
- return kHsaStatusSuccess;
-}
-
-/*! \brief Writes to the buffer and incrememts the write pointer to the
- * buffer. Also, ensures that the argument is written to an
- * aligned memory as specified
- *
- * @param dst The write pointer to the buffer
- * @param src The source pointer
- * @param size The size in bytes to copy
- * @param alignment The alignment to follow while writing to the buffer
- */
-static void
-addArg(unsigned char** dst, const void* src,
- size_t size, uint32_t alignment)
-{
- *dst = amd::alignUp(*dst, alignment);
- memcpy(*dst, src, size);
- *dst += size;
-}
-
-static inline void
-addArg(unsigned char** dst, const void* src, size_t size)
-{
- assert(size < UINT32_MAX);
- addArg(dst, src, size, size);
-}
-
-static void
-fillSampleDescriptor(HsaSamplerDescriptor& samplerDescriptor,
- const amd::Sampler& sampler)
-{
- samplerDescriptor.filterType = sampler.filterMode() == CL_FILTER_NEAREST ?
- HSA_SAMP_FILTER_NEAREST : HSA_SAMP_FILTER_LINEAR;
- samplerDescriptor.coordinateMode = sampler.normalizedCoords() ?
- HSA_SAMP_COORDINATE_NORMALIZED : HSA_SAMP_COORDINATE_UNNORMALIZED;
- HsaSamplerAddressMode mode = HSA_SAMP_ADDRESS_NONE;
- switch (sampler.addressingMode()) {
- case CL_ADDRESS_CLAMP_TO_EDGE:
- mode = HSA_SAMP_ADDRESS_CLAMPEDGE;
- break;
- case CL_ADDRESS_REPEAT:
- mode = HSA_SAMP_ADDRESS_WRAP;
- break;
- case CL_ADDRESS_CLAMP:
- mode = HSA_SAMP_ADDRESS_CLAMPBORDER;
- break;
- case CL_ADDRESS_MIRRORED_REPEAT:
- mode = HSA_SAMP_ADDRESS_MIRROR;
- break;
- case CL_ADDRESS_NONE:
- mode = HSA_SAMP_ADDRESS_MIRRORONCE;
- break;
- default:
- return;
- }
- samplerDescriptor.addressModeX = mode;
- samplerDescriptor.addressModeY = mode;
- samplerDescriptor.addressModeZ = mode;
-}
-
-bool
-VirtualGPU::submitKernelInternal(
- const amd::NDRangeContainer& sizes,
- const amd::Kernel& kernel,
- const_address parameters,
- void *eventHandle)
-{
- device::Kernel *devKernel = const_cast
- (kernel.getDeviceKernel(dev()));
- Kernel &gpuKernel = static_cast(*devKernel);
- HsaKernelCode *kernelCode = const_cast(gpuKernel.kernelCode());
- const size_t compilerLdsUsage = kernelCode->workgroup_group_segment_byte_size;
- size_t ldsUsage = compilerLdsUsage;
- bool useInteropQueue = false;
-
- // Allocate buffer to hold kernel arguments
- address argBuffer = NULL;
- HsaStatus status = servicesapi->HsaAllocateSystemMemory(
- kernelCode->kernarg_segment_byte_size, 256,
- kHsaSystemMemoryTypeUncached, reinterpret_cast(&argBuffer));
- if (status != kHsaStatusSuccess) {
- LogError("Out of memory");
- return false;
- }
- kernelArgList_.push_back(argBuffer);
- address argPtr = argBuffer;
-
- // The HLC generates 3 additional arguments for the global offsets
- for (uint j = 0; j < Kernel::ExtraArguments; ++j) {
- const size_t offset = j < sizes.dimensions() ? sizes.offset()[j] : 0;
- addArg(&argPtr, &offset, sizeof(size_t));
- }
-
- const amd::KernelSignature& signature = kernel.signature();
- const amd::KernelParameters& kernelParams = kernel.parameters();
-
- // Find all parameters for the current kernel
- for (uint i = 0; i != signature.numParameters(); ++i) {
- const HsailKernelArg* arg = gpuKernel.hsailArgAt(i);
- const_address srcArgPtr = parameters + signature.at(i).offset_;
-
- if (arg->type_ == HSAIL_ARGTYPE_POINTER ) {
- const size_t size = sizeof(size_t);
- if (arg->addrQual_ == HSAIL_ADDRESS_LOCAL) {
- ldsUsage = amd::alignUp(ldsUsage, arg->alignment_); //!< do we need this?
- addArg(&argPtr, &ldsUsage, size);
- ldsUsage += *reinterpret_cast(srcArgPtr);
- continue;
- }
- assert((arg->addrQual_ == HSAIL_ADDRESS_GLOBAL) &&
- "Unsupported address qualifier");
- if (kernelParams.boundToSvmPointer(dev(), parameters, i)) {
- addArg(&argPtr, srcArgPtr, size);
- continue;
- }
- amd::Memory* mem = *reinterpret_cast(srcArgPtr);
- if (mem == NULL) {
- addArg(&argPtr, srcArgPtr, size);
- continue;
- }
-
- Memory *devMem = static_cast(mem->getDeviceMemory(dev()));
- //! @todo add multi-devices synchronization when supported.
- void* globalAddress = devMem->getDeviceMemory();
- addArg(&argPtr, &globalAddress, size);
-
- //! @todo Compiler has to return read/write attributes
- const cl_mem_flags flags = mem->getMemFlags();
- if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
- mem->signalWrite(&dev());
- }
-
- useInteropQueue |= devMem->isHsaLocalMemory();
- }
- else if (arg->type_ == HSAIL_ARGTYPE_VALUE) {
- if (arg->dataType_ == HSAIL_DATATYPE_STRUCT) {
- void *mem = NULL;
- if (kHsaStatusSuccess != servicesapi->HsaAllocateSystemMemory(
- arg->size_, 0, kHsaSystemMemoryTypeUncached, &mem)) {
- LogError("Out of memory");
- return false;
- }
- memcpy(mem, srcArgPtr, arg->size_);
- addArg(&argPtr, &mem, sizeof(void*));
- kernelArgList_.push_back(mem);
- continue;
- }
- for (uint e = 0; e < arg->numElem_; ++e) {
- addArg(&argPtr, srcArgPtr, arg->size_);
- srcArgPtr += arg->size_;
- }
- }
- else if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
- amd::Memory* mem = *reinterpret_cast(srcArgPtr);
- Image* image = static_cast(mem->getDeviceMemory(dev()));
- if (image == NULL) {
- LogError( "Kernel image argument is not an image object");
- return false;
- }
-
- // Image arguments are of size 48 bytes and are aligned to 16 bytes
- addArg(&argPtr, image->getHsaImageObjectAddress(),
- HSA_IMAGE_OBJECT_SIZE, HSA_IMAGE_OBJECT_ALIGNMENT);
-
- //! @todo Compiler has to return read/write attributes
- const cl_mem_flags flags = mem->getMemFlags();
- if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
- mem->signalWrite(&dev());
- }
-
- useInteropQueue |= image->isHsaLocalMemory();
- }
- else {
- assert((arg->type_ == HSAIL_ARGTYPE_SAMPLER) &&
- "Unsupported address type");
- amd::Sampler* sampler = *reinterpret_cast(srcArgPtr);
- if (sampler == NULL) {
- LogError("Kernel sampler argument is not an sampler object");
- return false;
- }
-
- HsaSamplerDescriptor samplerDescriptor;
- fillSampleDescriptor(samplerDescriptor, *sampler);
-
- argPtr = amd::alignUp(argPtr, HSA_SAMPLER_OBJECT_ALIGNMENT);
- status = hsacoreapi->HsaCreateDeviceSampler(dev().getBackendDevice(),
- &samplerDescriptor, argPtr);
- if (status != kHsaStatusSuccess) {
- LogError("Error creating device sampler object!");
- return false;
- }
- argPtr += HSA_SAMPLER_OBJECT_SIZE;
- }
- }
-
- // Check there is no arguments' buffer overflow
- assert(argPtr <= argBuffer + kernelCode->kernarg_segment_byte_size);
-
- // Check for group memory overflow
- //! @todo Check should be in HSA - here we should have at most an assert
- if (ldsUsage > gpu_device_->group_memory_size) {
- LogError("No local memory available\n");
- return false;
- }
-
- HsaQueue *queue = useInteropQueue ? interopQueue_ : gpu_queue_;
-
- // Set the acl_binary and ocl event for possible debugger use
- if (eventHandle != NULL) {
- const HsaDevice *device = queue->device;
- servicesapi->HsaDebuggerCorrelationHandler(device, eventHandle);
- assert(gpuKernel.brig()->loadmap_section != NULL);
- void * acl_binary =
- reinterpret_cast(gpuKernel.brig()->loadmap_section);
- servicesapi->HsaSetAclBinary(device,
- const_cast(gpuKernel.program()->binaryElf()));
- }
-
- // Obtain handle to an instance of Dispatch configuration object
- HsaDispatchConfig config;
- bool profilingEnable = timestamp_ != NULL;
- status = getDispatchConfig(ldsUsage - compilerLdsUsage, profilingEnable,
- &config, sizes, kernel);
- if (status != kHsaStatusSuccess) {
- LogError("Call to HsaPopulateDispatchConfig failed.\n");
- return false;
- }
-
- // Determine if enqueue must wait on last kernel submit
- status = synchronizeInterQueueKernels(queue);
- if (status != kHsaStatusSuccess) {
- LogError("synchronizeInterQueueKernels failed");
- return false;
- }
-
- // Create a signal object to monitor kernel completion when needed
- HsaSignal signal = profilingEnable ? timestamp_->createSignal() : 0;
- status = servicesapi->HsaDispatchKernel(queue, signal, kernelCode, &config,
- (uint64_t*)argBuffer, 1);
- if (status != kHsaStatusSuccess) {
- LogError("Call to HsaDispatchKernel failed.\n");
- return false;
- }
-
- // Mark the flag indicating if a dispatch is outstanding
- hasPendingDispatch_ = true;
- return true;
-}
-/**
- * @brief Api to dispatch a kernel for execution. The implementation
- * parses the input object, an instance of virtual command to obtain
- * the parameters of global size, work group size, offsets of work
- * items, enable/disable profiling, etc.
- *
- * It also parses the kernel arguments buffer to inject into Hsa Runtime
- * the list of kernel parameters.
- */
-void VirtualGPU::submitKernel(amd::NDRangeKernelCommand &vcmd) {
- profilingBegin(vcmd);
-
- // Submit kernel to HW
- if (!submitKernelInternal(
- vcmd.sizes(), vcmd.kernel(), vcmd.parameters(),
- static_cast(as_cl(&vcmd.event())))) {
- vcmd.setStatus(CL_INVALID_OPERATION);
- }
-
- profilingEnd(vcmd);
-}
-
-void VirtualGPU::submitNativeFn(amd::NativeFnCommand &cmd) {
- // std::cout<<__FUNCTION__<<" not implemented"<<"*********"<::const_iterator it = vcmd.getMemList().begin();
- amd::InteropObject *interop;
- std::vector d3d10Resources;
- std::vector d3d11Resources;
- amd::D3D10Object *d3d10Obj;
- amd::D3D11Object *d3d11Obj;
-
- for (std::vector::const_iterator it =
- vcmd.getMemList().begin();
- it != vcmd.getMemList().end(); it++) {
- // amd::Memory object should never be NULL
- assert(*it && "Memory object for interop is NULL");
-
- device::Memory *m = (*it)->getDeviceMemory(dev());
- oclhsa::Memory *memory = static_cast(m);
-
- interop = (*it)->getInteropObj();
- // [TODO]: Check if this is need in case of HSA.
-
- if (interop) {
- d3d10Obj = interop->asD3D10Object();
- if (d3d10Obj != NULL) {
- if (d3d10Obj->getD3D10ResOrig() != NULL) {
- // Resource is a shared copy of original resource
- // Need to copy data from original resource
- d3d10Obj->copyOrigToShared();
- }
- assert(d3d10Obj->getD3D10Resource() != NULL);
- d3d10Resources.push_back(d3d10Obj->getD3D10Resource());
- }
-
- d3d11Obj = interop->asD3D11Object();
- if (d3d11Obj != NULL) {
- if (d3d11Obj->getD3D11ResOrig() != NULL) {
- // Resource is a shared copy of original resource
- // Need to copy data from original resource
- d3d11Obj->copyOrigToShared();
- }
- assert(d3d11Obj->getD3D11Resource() != NULL);
- d3d11Resources.push_back(d3d11Obj->getD3D11Resource());
- }
- }
-
- } //end of for loop
-
- if (!d3d10Resources.empty()) {
- HsaStatus status = hsacoreapi->HsaAcquireD3D10Resources(gpu_device_,
- &d3d10Resources[0],
- d3d10Resources.size());
- if (status != kHsaStatusSuccess) {
- LogError("HsaAcquireD3D10Resources - failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
-
- if (!d3d11Resources.empty()) {
- HsaStatus status = hsacoreapi->HsaAcquireD3D11Resources(gpu_device_,
- &d3d11Resources[0],
- d3d11Resources.size());
- if (status != kHsaStatusSuccess) {
- LogError("HsaAcquireD3D11Resources - failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
-#endif
-
- profilingEnd(vcmd);
-}
-
-void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand &vcmd) {
-
- // Wait on a kernel if one is outstanding
- releaseGpuMemoryFence();
-
- profilingBegin(vcmd);
- std::vector::const_iterator it = vcmd.getMemList().begin();
-
- amd::InteropObject *interop;
-
-#ifdef _WIN32
- std::vector d3d10Resources;
- std::vector d3d11Resources;
-
- amd::D3D10Object *d3d10Obj;
- amd::D3D11Object *d3d11Obj;
-
- for (std::vector::const_iterator it =
- vcmd.getMemList().begin();
- it != vcmd.getMemList().end(); it++) {
- // amd::Memory object should never be NULL
- assert(*it && "Memory object for interop is NULL");
-
- device::Memory *m = (*it)->getDeviceMemory(dev());
- oclhsa::Memory *memory = static_cast(m);
- interop = (*it)->getInteropObj();
-
- if (interop) {
- d3d10Obj = interop->asD3D10Object();
- if (d3d10Obj != NULL) {
- if (d3d10Obj->getD3D10ResOrig() != NULL) {
- // Resource is a shared copy of original resource
- // Need to copy data from original resource
- d3d10Obj->copySharedToOrig();
- }
- assert(d3d10Obj->getD3D10Resource() != NULL);
- d3d10Resources.push_back(d3d10Obj->getD3D10Resource());
- }
-
- d3d11Obj = interop->asD3D11Object();
- if (d3d11Obj != NULL) {
- if (d3d11Obj->getD3D11ResOrig() != NULL) {
- // Resource is a shared copy of original resource
- // Need to copy data from original resource
- d3d11Obj->copySharedToOrig();
- }
- assert(d3d11Obj->getD3D11Resource() != NULL);
- d3d11Resources.push_back(d3d11Obj->getD3D11Resource());
- }
- }
- }
-
- if (!d3d10Resources.empty()) {
- HsaStatus status = hsacoreapi->HsaReleaseD3D10Resources(gpu_device_,
- &d3d10Resources[0],
- d3d10Resources.size());
- if (status != kHsaStatusSuccess) {
- LogError("HsaReleaseD3D10Resources - failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
-
- if (!d3d11Resources.empty()) {
- HsaStatus status = hsacoreapi->HsaReleaseD3D11Resources(gpu_device_,
- &d3d11Resources[0],
- d3d11Resources.size());
- if (status != kHsaStatusSuccess) {
- LogError("HsaReleaseD3D11Resources - failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
-#endif // _WIN32
-
- profilingEnd(vcmd);
-}
-
-void
-VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd)
-{
- // in-order semantics: previous commands need to be done before we start
- releaseGpuMemoryFence();
-
- profilingBegin(cmd);
- const std::vector& svmPointers = cmd.svmPointers();
- if (cmd.pfnFreeFunc() == NULL) {
- // pointers allocated using clSVMAlloc
- for (cl_uint i = 0; i < svmPointers.size(); i++) {
- amd::SvmBuffer::free(cmd.context(), svmPointers[i]);
- }
- }
- else {
- cmd.pfnFreeFunc()(as_cl(cmd.queue()->asCommandQueue()), svmPointers.size(),
- (void**) (&(svmPointers[0])), cmd.userData());
- }
- profilingEnd(cmd);
-}
-
-void
-VirtualGPU::submitSvmCopyMemory(amd::SvmCopyMemoryCommand& cmd)
-{
- releaseGpuMemoryFence();
- profilingBegin(cmd);
- SvmBuffer::memFill(cmd.dst(), cmd.src(), cmd.srcSize(), 1);
- profilingEnd(cmd);
-}
-
-void
-VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd)
-{
- releaseGpuMemoryFence();
- profilingBegin(cmd);
- SvmBuffer::memFill(cmd.dst(), cmd.pattern(), cmd.patternSize(), cmd.times());
- profilingEnd(cmd);
-}
-
-void
-VirtualGPU::submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd)
-{
- // no fence is needed since this is a no-op: the command will be completed
- // only after all the previous commands are complete
- profilingBegin(cmd);
- profilingEnd(cmd);
-}
-
-void
-VirtualGPU::submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd)
-{
- // no fence is needed since this is a no-op: the command will be completed
- // only after all the previous commands are complete
- profilingBegin(cmd);
- profilingEnd(cmd);
-}
-
-void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand &vcmd) {
-
- // Wait on a kernel if one is outstanding
- releaseGpuMemoryFence();
-
- HsaPmu hsaPmu = NULL;
- HsaStatus status;
- const amd::PerfCounterCommand::PerfCounterList counters = vcmd.getCounters();
- for (uint i = 0; i < vcmd.getNumCounters(); ++i) {
- amd::PerfCounter* amdCounter =
- static_cast(counters[i]);
- const PerfCounter* counter =
- reinterpret_cast(amdCounter->getDeviceCounter());
-
- // Make sure we have a valid gpu performance counter
- if (NULL == counter) {
- if (hsaPmu == NULL) {
- status = servicesapi->HsaCreatePmu(gpu_device_, &hsaPmu);
- if (status != kHsaStatusSuccess) {
- LogError("HsaCreatePmu - failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
-
- amd::PerfCounter::Properties prop = amdCounter->properties();
- PerfCounter* hsaCounter = new PerfCounter(
- gpu_device_,
- *this,
- prop[CL_PERFCOUNTER_GPU_BLOCK_INDEX],
- prop[CL_PERFCOUNTER_GPU_COUNTER_INDEX],
- prop[CL_PERFCOUNTER_GPU_EVENT_INDEX]);
- if (NULL == hsaCounter) {
- LogError("We failed to allocate memory for the GPU perfcounter");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- else if (hsaCounter->create(hsaPmu)) {
- amdCounter->setDeviceCounter(hsaCounter);
- }
- else {
- LogPrintfError("We failed to allocate a perfcounter in Hsa.\
- Block: %d, counter: #d, event: %d",
- hsaCounter->info()->blockIndex_,
- hsaCounter->info()->counterIndex_,
- hsaCounter->info()->eventIndex_);
- delete hsaCounter;
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- counter = NULL;
- }
- }
-
- if (vcmd.getState() == amd::PerfCounterCommand::Begin) {
- hsaPmu = NULL;
- for (uint i = 0; i < vcmd.getNumCounters(); ++i) {
- amd::PerfCounter* amdCounter =
- static_cast(counters[i]);
- const PerfCounter* counter =
- static_cast(amdCounter->getDeviceCounter());
-
- if (hsaPmu != counter->getCounterPmu()) {
- hsaPmu = counter->getCounterPmu();
- status = servicesapi->HsaPmuBegin(hsaPmu, gpu_queue_, true);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuBegin failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
- }
- }
- else if (vcmd.getState() == amd::PerfCounterCommand::End) {
- hsaPmu = NULL;
- for (uint i = 0; i < vcmd.getNumCounters(); ++i) {
- amd::PerfCounter* amdCounter =
- static_cast(counters[i]);
- const PerfCounter* counter =
- static_cast(amdCounter->getDeviceCounter());
-
- if (hsaPmu != counter->getCounterPmu()) {
- hsaPmu = counter->getCounterPmu();
- status = servicesapi->HsaPmuEnd(hsaPmu, gpu_queue_);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuEnd failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
-
- status = servicesapi->HsaPmuWaitForCompletion(hsaPmu, HSA_TIMEOUT_INFINITE);
- if (status != kHsaStatusSuccess) {
- LogError("HsaPmuWaitForCompletion failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
- }
- }
- }
- else {
- LogError("Unsupported performance counter state");
- vcmd.setStatus(CL_INVALID_OPERATION);
- return;
- }
-}
-
-void VirtualGPU::flush(amd::Command *list, bool wait) {
-
- /**
- * VT TODO temporarily setting the status complete at flush
- * This is not the correct way of handling completion, the
- * correct way is to either register a callback that sets
- * command status or tie-in event from higher levels to HSA
- * Event. There are no known thread safety issues if an HSA
- * event is exposed to OCL level and mapped to its event
- *
- * list->setStatus(CL_COMPLETE);
- */
- amd::Command *current = list;
-
- // Query the status of openCL kernel task i.e. is still
- // running or has completed.
- releaseGpuMemoryFence();
-
- // If profiling is enabled collect the results
- if (profilingCollectResults(list)) {
- return;
- }
-
- // The openCL task has completed successfully
- while (current != NULL) {
-
- // @note: Currently Commands coming into Hsa Runtime
- // already have their status set as CL_SUBMITTED
- // SUBMITTED -> RUNNING -> COMPLETE
- if (current->status() == CL_SUBMITTED) {
- current->setStatus(CL_RUNNING);
- current->setStatus(CL_COMPLETE);
- }
- else if (current->status() == CL_RUNNING) {
- current->setStatus(CL_COMPLETE);
- }
-
- // Get the next command in the list for updates and free current.
- amd::Command *next = current->getNext();
- current->release();
- current = next;
- }
-
- // Release the memory blocks allocated for the various
- // struct arguments of one or more kernel submissions
- std::for_each(kernelArgList_.begin(),
- kernelArgList_.end(),
- std::ptr_fun(servicesapi->HsaFreeSystemMemory));
- kernelArgList_.clear();
-
- // Reset the queue parameter
- lastSubmitQueue_ = static_cast(0xFFFF);
-}
-} // End of oclhsa namespace
diff --git a/rocclr/runtime/device/hsa/hsavirtual.hpp b/rocclr/runtime/device/hsa/hsavirtual.hpp
deleted file mode 100644
index 8ab98c05c3..0000000000
--- a/rocclr/runtime/device/hsa/hsavirtual.hpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//
-// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef HSAVIRTUAL_HPP_
-#define HSAVIRTUAL_HPP_
-#include "hsadevice.hpp"
-#include "services.h"
-#include "utils/util.hpp"
-
-namespace oclhsa {
-class Device;
-
-// Timestamp for keeping track of some profiling information for various events
-// including EnqueueNDRangeKernel and clEnqueueCopyBuffer.
-class Timestamp {
-private:
- HsaSignal signal_;
- uint64_t start_;
- uint64_t end_;
-
-public:
- // get-ers
- uint64_t getStart() const { return start_; }
- uint64_t getEnd() const { return end_; }
- HsaSignal getSignal() const { return signal_; }
-
- // Default constructor
- Timestamp()
- : signal_(0),
- start_(0),
- end_(0) {}
-
- // Deconstructor, which will delete the signal if we created one
- ~Timestamp();
-
- // Creates a signal for the timestamp, saves it, and returns it
- HsaSignal createSignal();
-
- // Start a timestamp (get timestamp from OS)
- void start();
-
- // End a timestamp (get timestamp from OS)
- void end();
-};
-
-class VirtualGPU : public device::VirtualDevice {
-public:
- VirtualGPU(Device &device);
- ~VirtualGPU();
-
- bool create(HsaQueueType queueType);
- bool terminate();
-
- void profilingBegin(amd::Command &command, bool drmProfiling = false);
- const Device& dev() const { return oclhsa_device_; }
- //! End the command profiling
- void profilingEnd(amd::Command &command);
-
- //! Collect the profiling results
- bool profilingCollectResults(
- amd::Command* list //!< List of all commands in the batch.
- );
- void submitReadMemory(amd::ReadMemoryCommand& cmd);
- void submitWriteMemory(amd::WriteMemoryCommand& cmd);
- void submitCopyMemory(amd::CopyMemoryCommand& cmd);
- void submitMapMemory(amd::MapMemoryCommand& cmd);
- void submitUnmapMemory(amd::UnmapMemoryCommand& cmd);
- void submitKernel(amd::NDRangeKernelCommand& cmd);
- bool submitKernelInternal(
- const amd::NDRangeContainer& sizes, //!< Workload sizes
- const amd::Kernel& kernel, //!< Kernel for execution
- const_address parameters, //!< Parameters for the kernel
- void *event_handle //!< Handle to OCL event for debugging
- );
- void submitNativeFn(amd::NativeFnCommand& cmd);
- void submitMarker(amd::Marker& cmd);
- void submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd);
- void submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd);
- void submitPerfCounter(amd::PerfCounterCommand& cmd);
- void flush(amd::Command* list = NULL, bool wait = false);
- void submitFillMemory(amd::FillMemoryCommand& cmd);
- void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
-
-// { oclhsa OpenCL integration
-// Added these stub (no-ops) implementation of pure virtual methods,
-// when integrating HSA and OpenCL branches.
-// TODO: After inegration, whoever is working on VirtualGPU should write
-// actual implemention.
- virtual void submitSignal(amd::SignalCommand &cmd) {}
- virtual void submitMakeBuffersResident(amd::MakeBuffersResidentCommand &cmd) {}
- virtual void submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd);
- virtual void submitSvmCopyMemory(amd::SvmCopyMemoryCommand& cmd);
- virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd);
- virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd);
- virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
- void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand &cmd) {}
- void submitThreadTrace(amd::ThreadTraceCommand &vcmd) {}
-
- /**
- * @brief Waits on an outstanding kernel without regard to how
- * it was dispatched - with or without a signal
- *
- * @return bool true if Wait returned successfully, false
- * otherwise
- */
- bool releaseGpuMemoryFence();
-// } oclhsa OpenCL integration
-private:
- /**
- * @brief Retrieves the various configuration parameters that could
- * be used to execute a kernel - Enable Profiling, Sizes of Global,
- * Local work spaces, offsets for global Id, etc.
- *
- * @note: The implementation currently does not verify if the input
- * parameters for global, local and offset arrays are valid. For
- * example, it assumes that the values that are passed in conform to
- * openCL properties such as: CL_DEVICE_MAX_WORK_ITEM_SIZES,
- * CL_DEVICE_MAX_WORK_GROUP_SIZE, etc
- *
- * @param lds_size The amount of LDS memory used in the kernel.
- *
- * @param profile_enable Flag to enable kernel profiling.
- *
- * @param config Output parameter updated with various execution
- * policy paramters.
- *
- * @param sizes The work item and work group size.
- *
- * @return HsaStatus ::kHsaStatusSuccess or ::kHsaStatusError
- */
- HsaStatus getDispatchConfig(
- uint32_t lds_size,
- bool profile_enable,
- HsaDispatchConfig* config,
- const amd::NDRangeContainer& sizes,
- const amd::Kernel& kernel);
-
- /**
- * @brief Synchronize kernel submits across different queue types
- * i.e. a submit to compute kernel should determine that there is no
- * outstanding kernel to another queue type, e.g. interop queue.
- * The same applies for submits to interop queues or queues of
- * another type.
- *
- * @param dispatch_queue Queue object into which the current kernel
- * would be submitted.
- *
- * @return HsaStatus ::kHsaStatusSuccess or ::kHsaStatusError
- */
- HsaStatus synchronizeInterQueueKernels(HsaQueue *dispatchQueue);
-
- /**
- * @brief Maintains the list of memory blocks allocated
- * for one or more kernel submissions
- */
- std::vector kernelArgList_;
-
- /**
- * @brief Indicates if a kernel dispatch is outstanding. This flag is
- * used to synchronized on kernel outputs.
- */
- bool hasPendingDispatch_;
-
- /**
- * @brief Maintains the queue type of the last kernel submit.
- * Submission of kernels across queue types must be coordinated
- * i.e. all outstanding kernels on one queue type must be finished
- * before kernels can be submitted onto a different queue type.
- */
- HsaQueueType lastSubmitQueue_;
-
- Timestamp* timestamp_;
- HsaDevice* gpu_device_; //!< Physical device
- HsaQueue* gpu_queue_; //!< Queue associated with a gpu
- HsaQueue* interopQueue_; //!< Interop queue associated with a gpu
- uint32_t dispatch_id_; //!< This variable must be updated atomically.
- Device& oclhsa_device_; //!< oclhsa device object
-};
-}
-#endif
diff --git a/rocclr/runtime/device/hsa/oclhsa.def b/rocclr/runtime/device/hsa/oclhsa.def
deleted file mode 100644
index ad704b16e8..0000000000
--- a/rocclr/runtime/device/hsa/oclhsa.def
+++ /dev/null
@@ -1,3 +0,0 @@
-LIBRARY OCLHSA
-EXPORTS
-
diff --git a/rocclr/runtime/device/hsa/oclhsa_common.hpp b/rocclr/runtime/device/hsa/oclhsa_common.hpp
deleted file mode 100644
index 741cebce5b..0000000000
--- a/rocclr/runtime/device/hsa/oclhsa_common.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_OCLHSA_COMMON_HPP_
-#define _OPENCL_RUNTIME_DEVICE_HSA_OCLHSA_COMMON_HPP_
-
-#include "hsacore_symbol_loader.hpp"
-#include "services_symbol_loader.hpp"
-
-#include "hsacoreagent.h"
-#include "hsaagent.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const HsaCoreApiTable *hsacoreapi;
-extern const HsaServicesApiTable *servicesapi;
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // header guard
diff --git a/rocclr/runtime/device/hsa/services_symbol_loader.cpp b/rocclr/runtime/device/hsa/services_symbol_loader.cpp
deleted file mode 100644
index 308a9a3b79..0000000000
--- a/rocclr/runtime/device/hsa/services_symbol_loader.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-// Implementation of the the loading of dll and loading of all the exported
-// function symbols.
-
-#include "device/hsa/services_symbol_loader.hpp"
-
-#include "runtime/thread/thread.hpp"
-#include "runtime/utils/debug.hpp"
-#include "runtime/os/os.hpp"
-
-#include
-
-#include
-
-ServicesApiSymbols* ServicesApiSymbols::instance_ = NULL;
-// services_dll_handle_ is defined in ServicesApiSymbols class.
-// This macro must be used only in member functions of ServicesApiSymbols
-// class.
-#define LOADSYMBOL(api) \
- api = (pfn_ ## api) amd::Os::getSymbol(services_dll_handle_, # api); \
- if (api == NULL) { \
- amd::log_printf(amd::LOG_ERROR, __FILE__, __LINE__, \
- "amd::Os::getSymbol() for exported func " # api " failed."); \
- amd::Os::unloadLibrary(services_dll_handle_); \
- abort(); \
- }
-
-ServicesApiSymbols::ServicesApiSymbols()
- : services_dll_name_(SERVICES_DLL_NAME) {
- services_dll_handle_ = amd::Os::loadLibrary(services_dll_name_.c_str());
- if (services_dll_handle_ == NULL) {
-// Do not print, otherwise tests fail when HSA core and services DLLs are
-// not installed, in which case only ORCA stack is initialized and it is
-// not an error
-// amd::log_printf(amd::LOG_INFO, __FILE__, __LINE__,
-//"Cannot load hsa servicese dll. HSA DLLs may not be installed on the machine."
-//" OpenCL requirement, returning without error.");
- return;
- }
-
- LOADSYMBOL(HsaGetServicesApiTable)
-}
-
-ServicesApiSymbols::~ServicesApiSymbols() {
- if (services_dll_handle_) {
- amd::Os::unloadLibrary(services_dll_handle_);
- services_dll_handle_ = NULL;
- }
-}
diff --git a/rocclr/runtime/device/hsa/services_symbol_loader.hpp b/rocclr/runtime/device/hsa/services_symbol_loader.hpp
deleted file mode 100644
index 9125d67215..0000000000
--- a/rocclr/runtime/device/hsa/services_symbol_loader.hpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
-#define _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
-
-// File: services_symbol_loader.hpp
-// The main purpose of this file (class ServicesApiSymbols), is to load the HSA
-// API function symbol HsaGetServicesApiTable() from hsaservices DLL/so module.
-// This function outputs HsaServicesApiTable which has pointers to the rest of the
-// hsaservices API functions, which should be used to invoke the API functions.
-
-#include "services.h"
-#include "hsainterop.h"
-#include "hsaagent.h"
-
-#include
-
-// In case of change in the name of hsaservices dll name, change the
-// #define SERVICES_DLL_NAME value. this is the only place the DLL name should
-// be changed or referred to.
-#define SERVICES_DLL_NAME "hsaservices" LP64_ONLY("64")
-
-// Convention: The typedefed function name must be prefixed with pfn_ indicating
-// it as pointer-to-function.
-typedef HsaStatus (*pfn_HsaGetServicesApiTable)(const HsaServicesApiTable **api_table);
-
-// Singleton ServicesApiSymbols class contains the module handle and loaded
-// symbols of one accessor API accessor function.
-// To call hsaservices API funciton, instance of this class must be used.
-// Example:
-// // In initialization code
-// const HsaServicesApiTable *servicesapi = NULL;
-// ServicesApiSymbols::Instance().HsaGetServicesApiTable(&servicesapi);
-// ...
-// ...
-// // Calling the services api.
-// servicesapi->HsaGetDevices(...);
-// servicesapi->HsaRegisterMemory(...);
-class ServicesApiSymbols {
- public:
- // Only the access function symbol is loaded, which in turn has pointers to
- // rest of the hsaservices api.
- pfn_HsaGetServicesApiTable HsaGetServicesApiTable;
- static ServicesApiSymbols& Instance() {
- if (instance_ == NULL) {
- instance_ = new ServicesApiSymbols();
- }
-
- return *instance_;
- }
- static void teardown(){
- if (instance_ != NULL){
- delete instance_;
- }
- }
- static bool IsDllLoaded(){
- return Instance().services_dll_handle_ ? true : false;
- };
-
-
- private:
-
- static ServicesApiSymbols* instance_;
- // Force singleton pattern.
- explicit ServicesApiSymbols();
- ~ServicesApiSymbols();
- ServicesApiSymbols(const ServicesApiSymbols &) {}
- const ServicesApiSymbols &operator=(const ServicesApiSymbols &) {
- return *this;
- }
-
- // Data.
- void *services_dll_handle_;
- const std::string services_dll_name_;
-};
-#endif // _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
diff --git a/rocclr/runtime/device/hsa/system_memory.h b/rocclr/runtime/device/hsa/system_memory.h
deleted file mode 100644
index 55602044db..0000000000
--- a/rocclr/runtime/device/hsa/system_memory.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//
-// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
-//
-
-/** @file */
-
-#ifndef _OPENCL_RUNTIME_DEVICE_HSA_SYSTEM_MEMORY_H_
-#define _OPENCL_RUNTIME_DEVICE_HSA_SYSTEM_MEMORY_H_
-
-#include "newcore.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-/**
- *******************************************************************************
- * @brief System memory types.
- * @details The memory option enumerations are used for specifying the various
- * configurable global system memory allocation options.
- *******************************************************************************
- */
-typedef enum {
- /**
- * Memory option used for requesting cacheable system memory.
- */
- kHsaAmdSystemMemoryTypeDefault = 0,
-
- /**
- * Memory option used for requesting system memory with caching disabled.
- */
- kHsaAmdSystemMemoryTypeUncached = 1,
-
- /**
- * Memory option used for requesting write-combined system memory.
- */
- kHsaAmdSystemMemoryTypeWriteCombined = 2,
-
- /**
- * Shortcut to get the number of supported memory type.
- */
- kHsaAmdSystemMemoryTypeCount = 3
-} HsaAmdSystemMemoryType;
-
-/**
- ****************************************************************************
- * @brief Allocate system memory accessible by all AMD devices in the platform.
- * @details The HsaAmdAllocateSystemMemory() interface is used for allocating
- * global system memory accessible (read and write) by the host and all AMD
- * devices in the platform.
- *
- * @param size The allocation size in bytes.
- * @param alignment The alignment size in bytes for the address of resulting
- * allocation. If the value is zero, no particular alignment will be applied.
- * If the value is not zero, it needs to be a power of two and minimum of
- * sizeof(void*).
- * @param type Type of system memory.
- * @param address A pointer to the location of where to return the pointer to
- * the base of the allocated region of memory.
- *
- * @return HsaStatus
- * @retval kHsaStatusSuccess The requested amount of memory was successfully
- * allocated.
- * @retval kHsaStatusOutOfMemory The implementation was unable to allocate the
- * requested amount of device memory due to memory constraints.
- * @retval kHsaStatusInvalidArgument An address of NULL was specified, the size
- * is 0 or the alignment is invalid.
- *
- * @see HsaAmdFreeSystemMemory, HsaAmdSystemMemoryType
- **************************************************************************/
-COREAPI HsaStatus HsaAmdAllocateSystemMemory(size_t size,
- size_t alignment,
- HsaAmdSystemMemoryType type,
- void **address);
-
-/**
- ****************************************************************************
- * @brief Deallocate system memory.
- * @details The HsaAmdFreeSystemMemory() interface is used for
- * deallocating global system memory that was previously allocated with
- * HsaAmdAllocateSystemMemory().
- *
- * @param address A pointer to the address to be deallocated.
- *
- * @return HsaStatus
- * @retval kHsaStatusSuccess The requested memory was successfully deallocated.
- * @retval kHsaStatusInvalidArguement An address of NULL was specified.
- *
- * @see HsaAmdAllocateSystemMemory
- ***************************************************************************
- */
-COREAPI HsaStatus HsaAmdFreeSystemMemory(void *address);
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // header guard