P4 to Git Change 1208254 by nhaustov@nhaustov_hsa on 2015/11/06 03:25:21
SWDEV-77584 - Remove old OpenCL hsa device and loader. Reviewed by: Evgeniy Mankov Testing: pre-checkin Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#128 edit ... //depot/stg/opencl/drivers/opencl/compiler/loader/Makefile#2 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/Makefile#2 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/build/Makefile#3 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/build/Makefile.libloader#11 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/hsacore_symbol_loader.cpp#3 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/hsacore_symbol_loader.hpp#3 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/loader.cpp#14 delete ... //depot/stg/opencl/drivers/opencl/compiler/loader/libloader/loader.hpp#6 delete ... //depot/stg/opencl/drivers/opencl/runtime/Makefile#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/build/Makefile.runtime#61 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#190 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/Makefile#8 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/build/Makefile#5 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/build/Makefile.oclhsa#23 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaappprofile.cpp#4 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaappprofile.hpp#4 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsabinary.cpp#8 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsabinary.hpp#5 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.cpp#10 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsablit.hpp#3 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompiler.cpp#27 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompilerlib.cpp#13 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacompilerlib.hpp#10 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacore_symbol_loader.cpp#8 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacore_symbol_loader.hpp#8 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacounters.cpp#5 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsacounters.hpp#3 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadefs.hpp#5 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.cpp#95 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsadevice.hpp#51 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsakernel.cpp#27 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsakernel.hpp#20 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.cpp#43 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsamemory.hpp#28 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.cpp#39 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsaprogram.hpp#20 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.cpp#40 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.hpp#13 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.cpp#99 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsavirtual.hpp#29 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/oclhsa.def#2 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/oclhsa_common.hpp#4 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/services_symbol_loader.cpp#10 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/services_symbol_loader.hpp#11 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/system_memory.h#2 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsaappprofile.cpp#2 edit
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -7,7 +7,7 @@
|
||||
#include "thread/monitor.hpp"
|
||||
|
||||
#if defined(WITH_HSA_DEVICE)
|
||||
#include "device/hsa/hsadevice.hpp"
|
||||
#include "device/hsa_foundation/hsadevice.hpp"
|
||||
extern amd::AppProfile* oclhsaCreateAppProfile();
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/appprofile.hpp"
|
||||
#include "device/hsa/hsaappprofile.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
amd::AppProfile* oclhsaCreateAppProfile()
|
||||
{
|
||||
amd::AppProfile* appProfile = new oclhsa::AppProfile;
|
||||
|
||||
if ((appProfile == NULL) || !appProfile->init()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return appProfile;
|
||||
}
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
bool AppProfile::ParseApplicationProfile()
|
||||
{
|
||||
std::string appName("Explorer");
|
||||
|
||||
std::transform(appName.begin(), appName.end(), appName.begin(), ::tolower);
|
||||
std::transform(appFileName_.begin(), appFileName_.end(), appFileName_.begin(), ::tolower);
|
||||
|
||||
if (appFileName_.compare(appName) == 0 ) {
|
||||
hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
|
||||
gpuvmHighAddr_ = false;
|
||||
noHsaInit_ = true;
|
||||
profileOverridesAllSettings_ = true;
|
||||
}
|
||||
|
||||
// Setting both bits is invalid, make it niether.
|
||||
if (hsaDeviceHint_ & CL_HSA_ENABLED_AMD
|
||||
&& hsaDeviceHint_ & CL_HSA_DISABLED_AMD) {
|
||||
hsaDeviceHint_ = 0;
|
||||
}
|
||||
|
||||
if (noHsaInit_) {
|
||||
// If no HSA initialization, then force hint flag to non-HSA device.
|
||||
// Even if this is not forced, the device selection logic will endure it.
|
||||
// After all hint flags are treated as hint only - depending on
|
||||
// availibility.
|
||||
hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,27 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef HSAAPPPROFILE_HPP_
|
||||
#define HSAAPPPROFILE_HPP_
|
||||
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
class AppProfile : public amd::AppProfile
|
||||
{
|
||||
public:
|
||||
AppProfile(): amd::AppProfile() {}
|
||||
|
||||
protected:
|
||||
//! parse application profile based on application file name
|
||||
virtual bool ParseApplicationProfile();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
|
||||
#include "hsabinary.hpp"
|
||||
#include "hsaprogram.hpp"
|
||||
#include "hsakernel.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
|
||||
namespace oclhsa {
|
||||
/*
|
||||
bool
|
||||
ClBinary::loadKernels(FSAILProgram& program, NameKernelMap &kernels)
|
||||
{
|
||||
return true;
|
||||
|
||||
|
||||
const char _kernel[] = "_kernel";
|
||||
const char __FSA_[] = "__FSA_";
|
||||
const char _header[] = "_header";
|
||||
const char _fsail[] = "_fsail";
|
||||
bool hasKernels = false;
|
||||
|
||||
// TODO : jugu
|
||||
// Target should be 15 bit maximum. Should check this somewhere.
|
||||
uint32_t target = static_cast<uint32_t>(21);//dev().calTarget());
|
||||
uint16_t elf_target;
|
||||
amd::OclElf::oclElfPlatform platform;
|
||||
if (!elfIn()->getTarget(elf_target, platform) ||
|
||||
(platform != amd::OclElf::CAL_PLATFORM) ||
|
||||
((uint32_t)target != elf_target)) {
|
||||
// warning !
|
||||
// LogError("The OCL binary image loading failed: different target");
|
||||
|
||||
// LHOWES TODO: target in kannan's elf is wrong so skip this for now
|
||||
// We may want a special HSA target or a similar more substantial change.
|
||||
// return false;
|
||||
}
|
||||
|
||||
for (amd::Sym_Handle sym = elfIn()->nextSymbol(NULL);
|
||||
sym != NULL;
|
||||
sym = elfIn()->nextSymbol(sym)) {
|
||||
amd::OclElf::SymbolInfo symInfo;
|
||||
if (!elfIn()->getSymbolInfo(sym, &symInfo)) {
|
||||
LogError("LoadKernelFromElf: getSymbolInfo() fails");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string elfSymName(symInfo.sym_name);
|
||||
|
||||
const size_t offset = sizeof(__FSA_) - 1;
|
||||
if (elfSymName.compare(0, offset, __FSA_) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Assume this elfSymName is associated with a kernel name. The folloiwng code will adjust
|
||||
// If it isn't.
|
||||
const size_t suffixPos = elfSymName.rfind('_');
|
||||
bool isKernel = true; // assume it is a kernel
|
||||
std::string functionName = elfSymName.substr(sizeof(__FSA_)-1, suffixPos-(sizeof(__FSA_)-1));
|
||||
//"__OpenCL_";
|
||||
//functionName.append(elfSymName.substr(sizeof(__FSA_)-1, suffixPos-(sizeof(__FSA_)-1)));
|
||||
//functionName.append("_kernel"); // make the kernel's linkage name
|
||||
|
||||
// Find kernel in map and get its kernel representation
|
||||
NameKernelMap::iterator searchIterator = kernels.find(functionName);
|
||||
Kernel *currentKernel = 0;
|
||||
if( searchIterator == kernels.end() ) {
|
||||
// TODO: note, this will need to be decided on based on the the device type. As we have no CPU yet...
|
||||
//currentKernel = new Kernel(functionName);
|
||||
//kernels[functionName] = currentKernel;
|
||||
} else {
|
||||
currentKernel = static_cast<oclhsa::Kernel*>(searchIterator->second);
|
||||
}
|
||||
|
||||
|
||||
// Add info for this elf symbol into tempobj's functionNameMap[]
|
||||
if (elfSymName.compare(suffixPos, sizeof(_fsail) - 1, _fsail) == 0) {
|
||||
|
||||
assert (currentKernel->hasFSAIL() &&
|
||||
"More than one fsail symbol for a kernel");
|
||||
// LHOWES TODO: Currently this is using the section address and size because
|
||||
// we only have a single kernel and there is a bug in the current AMP compiler.
|
||||
// Kannan is working on fixing this and once we have the symbol address and size
|
||||
// correct in the metadata then we can change this and it'll work properly for
|
||||
// multiple kernels.
|
||||
std::string options("");
|
||||
std::string fsailString(symInfo.sec_addr, symInfo.sec_addr + symInfo.sec_size);
|
||||
currentKernel->setFSAIL(fsailString);
|
||||
//currentKernel->compile(options);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// LHOWES
|
||||
// Hack to assume that this is the AMP path for now
|
||||
// until we have kernel metadata we need a way to generate the parameter list.
|
||||
{
|
||||
device::Kernel::parameters_t parameterList;
|
||||
// Is AMP code
|
||||
|
||||
amd::KernelParameterDescriptor desc;
|
||||
desc.name_ = "Functor";
|
||||
desc.type_ = T_POINTER;
|
||||
|
||||
desc.size_ = sizeof(void*);
|
||||
desc.offset_ = 0;
|
||||
|
||||
// BKENDALL HACK
|
||||
desc.typeName_ = "";
|
||||
desc.typeQualifier_ = 0;
|
||||
desc.accessQualifier_ = 0;
|
||||
desc.addressQualifier_ = 0;
|
||||
// !BKENDALL HACK
|
||||
|
||||
parameterList.push_back(desc);
|
||||
// oclhsa OpenCL integration
|
||||
}
|
||||
|
||||
hasKernels = true;
|
||||
}
|
||||
|
||||
|
||||
return hasKernels;
|
||||
|
||||
}
|
||||
*/
|
||||
/*
|
||||
bool
|
||||
ClBinary::clearElfOut()
|
||||
{
|
||||
// Recreate libelf elf object
|
||||
if (!elfOut()->Clear()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Need to re-setup target
|
||||
return setElfTarget();
|
||||
}
|
||||
*/
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
@@ -1,56 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef HSABINARY_HPP_
|
||||
#define HSABINARY_HPP_
|
||||
|
||||
#include "top.hpp"
|
||||
#include "hsadevice.hpp"
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
|
||||
typedef std::map<std::string, device::Kernel*> NameKernelMap;
|
||||
|
||||
class FSAILProgram;
|
||||
|
||||
class ClBinary : public device::ClBinary
|
||||
{
|
||||
public:
|
||||
ClBinary(const Device& dev, BinaryImageFormat bifVer = BIF_VERSION3)
|
||||
: device::ClBinary(dev, bifVer)
|
||||
{}
|
||||
|
||||
//! Destructor
|
||||
~ClBinary() {}
|
||||
|
||||
|
||||
protected:
|
||||
bool setElfTarget() {
|
||||
uint32_t target = static_cast<uint32_t>(21);//dev().calTarget());
|
||||
assert (((0xFFFF8000 & target) == 0) && "ASIC target ID >= 2^15");
|
||||
uint16_t elf_target = (uint16_t)(0x7FFF & target);
|
||||
return elfOut()->setTarget(elf_target, amd::OclElf::CAL_PLATFORM);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
ClBinary(const ClBinary&);
|
||||
|
||||
//! Disable default operator=
|
||||
ClBinary& operator=(const ClBinary&);
|
||||
|
||||
//! Returns the HSA device for this object
|
||||
const Device& dev() const { return static_cast<const Device&>(dev_); }
|
||||
|
||||
};
|
||||
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
|
||||
#endif // HSABINARY_HPP_
|
||||
|
||||
Το diff αρχείου καταστέλλεται επειδή είναι πολύ μεγάλο
Φόρτωση Διαφορών
@@ -1,401 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef HSABLIT_HPP_
|
||||
#define HSABLIT_HPP_
|
||||
|
||||
#include "top.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/blit.hpp"
|
||||
|
||||
/*! \addtogroup HSA Blit Implementation
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA Blit Manager Implementation
|
||||
namespace oclhsa {
|
||||
|
||||
class Device;
|
||||
class Kernel;
|
||||
class Memory;
|
||||
class VirtualGPU;
|
||||
|
||||
//! DMA Blit Manager
|
||||
class HsaBlitManager : public device::HostBlitManager
|
||||
{
|
||||
public:
|
||||
//! Constructor
|
||||
HsaBlitManager(
|
||||
device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
|
||||
Setup setup = Setup() //!< Specifies HW accelerated blits
|
||||
);
|
||||
|
||||
//! Destructor
|
||||
virtual ~HsaBlitManager() { }
|
||||
|
||||
//! Creates HostBlitManager object
|
||||
virtual bool create(amd::Device& device) { return true; }
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destinaiton host memory
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
virtual bool readImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBuffer(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBufferRect(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
virtual bool writeImage(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
virtual bool copyImageToBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
virtual bool copyBufferToImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
virtual bool copyImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer(
|
||||
device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool fillImage(
|
||||
device::Memory& dstMemory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
protected:
|
||||
//! Returns the virtual GPU object
|
||||
VirtualGPU& gpu() const { return static_cast<VirtualGPU&>(vDev_); }
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
HsaBlitManager(const HsaBlitManager&);
|
||||
|
||||
//! Disable operator=
|
||||
HsaBlitManager& operator=(const HsaBlitManager&);
|
||||
|
||||
bool importExportImage(
|
||||
uint8_t* dst,
|
||||
const uint8_t* src,
|
||||
const amd::Coord3D& dstOffset,
|
||||
size_t dstRowPitch,
|
||||
size_t dstSlicePitch,
|
||||
const amd::Coord3D& srcOffset,
|
||||
size_t srcRowPitch,
|
||||
size_t srcSlicePitch,
|
||||
const amd::Coord3D& sizeToCopy,
|
||||
size_t elementSize) const;
|
||||
};
|
||||
|
||||
//! Kernel Blit Manager
|
||||
class KernelBlitManager : public HsaBlitManager
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
BlitCopyImage = 0,
|
||||
BlitCopyImage1DA,
|
||||
BlitCopyImageToBuffer,
|
||||
BlitCopyBufferToImage,
|
||||
BlitCopyBufferRect,
|
||||
BlitCopyBufferRectAligned,
|
||||
BlitCopyBuffer,
|
||||
BlitCopyBufferAligned,
|
||||
FillBuffer,
|
||||
FillImage,
|
||||
BlitTotal
|
||||
};
|
||||
|
||||
//! Constructor
|
||||
KernelBlitManager(
|
||||
device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
|
||||
Setup setup = Setup() //!< Specifies HW accelerated blits
|
||||
);
|
||||
|
||||
//! Destructor
|
||||
virtual ~KernelBlitManager();
|
||||
|
||||
//! Creates HostBlitManager object
|
||||
virtual bool create(amd::Device& device);
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destinaiton host memory
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
virtual bool readImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBuffer(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBufferRect(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
virtual bool writeImage(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
virtual bool copyImageToBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
virtual bool copyBufferToImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
virtual bool copyImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer(
|
||||
device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool fillImage(
|
||||
device::Memory& dstMemory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
KernelBlitManager(const KernelBlitManager&);
|
||||
|
||||
//! Disable operator=
|
||||
KernelBlitManager& operator=(const KernelBlitManager&);
|
||||
|
||||
//! Creates a program for all blit operations
|
||||
bool createProgram(
|
||||
Device& device //!< Device object
|
||||
);
|
||||
|
||||
amd::Image::Format filterFormat(amd::Image::Format oldFormat) const;
|
||||
|
||||
device::Memory *createImageView(
|
||||
device::Memory &parent,
|
||||
amd::Image::Format newFormat) const;
|
||||
|
||||
amd::Context *context_; //!< A dummy context
|
||||
amd::Program *program_; //!< GPU program obejct
|
||||
amd::Kernel *kernels_[BlitTotal]; //!< GPU kernels for blit
|
||||
};
|
||||
|
||||
static const char* BlitName[KernelBlitManager::BlitTotal] = {
|
||||
"copyImage",
|
||||
"copyImage1DA",
|
||||
"copyImageToBuffer",
|
||||
"copyBufferToImage",
|
||||
"copyBufferRect",
|
||||
"copyBufferRectAligned",
|
||||
"copyBuffer",
|
||||
"copyBufferAligned",
|
||||
"fillBuffer",
|
||||
"fillImage"
|
||||
};
|
||||
|
||||
/*@}*/
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif /*HSABLIT_HPP_*/
|
||||
@@ -1,163 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "os/os.hpp"
|
||||
#include "hsadevice.hpp"
|
||||
#include "hsaprogram.hpp"
|
||||
#include "hsacompilerlib.hpp"
|
||||
//#include "gpukernel.hpp"
|
||||
//#include "compiler/compiler.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include <cstdio>
|
||||
|
||||
//CLC_IN_PROCESS_CHANGE
|
||||
extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typeInfo = NULL);
|
||||
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
|
||||
/* Temporary log function for the compiler library */
|
||||
static void logFunction(const char* msg, size_t size)
|
||||
{
|
||||
std::cout<< "Compiler Log: " << msg << std::endl;
|
||||
}
|
||||
|
||||
static int programsCount = 0;
|
||||
|
||||
|
||||
bool
|
||||
FSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options)
|
||||
{
|
||||
|
||||
acl_error errorCode;
|
||||
aclTargetInfo target;
|
||||
target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail-64"),
|
||||
dev().deviceInfo().targetName_,
|
||||
&errorCode);
|
||||
|
||||
//end if asic info is ready
|
||||
// We dump the source code for each program (param: headers)
|
||||
// into their filenames (headerIncludeNames) into the TEMP
|
||||
// folder specific to the OS and add the include path while
|
||||
// compiling
|
||||
|
||||
//Find the temp folder for the OS
|
||||
std::string tempFolder = amd::Os::getEnvironment("TEMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = amd::Os::getEnvironment("TMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = WINDOWS_SWITCH(".","/tmp");;
|
||||
}
|
||||
}
|
||||
//Iterate through each source code and dump it into tmp
|
||||
std::fstream f;
|
||||
std::vector<std::string> headerFileNames(headers.size());
|
||||
std::vector<std::string> newDirs;
|
||||
for (size_t i = 0; i < headers.size(); ++i) {
|
||||
std::string headerPath = tempFolder;
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if ( amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(),
|
||||
end = headerIncludeName.end();
|
||||
it != end;
|
||||
++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
if (pos != std::string::npos) {
|
||||
headerPath += amd::Os::fileSeparator();
|
||||
headerPath += headerIncludeName.substr(0, pos);
|
||||
headerIncludeName = headerIncludeName.substr(pos+1);
|
||||
}
|
||||
if (!amd::Os::pathExists(headerPath)) {
|
||||
bool ret = amd::Os::createPath(headerPath);
|
||||
assert(ret && "failed creating path!");
|
||||
newDirs.push_back(headerPath);
|
||||
}
|
||||
std::string headerFullName
|
||||
= headerPath + amd::Os::fileSeparator() + headerIncludeName;
|
||||
headerFileNames[i] = headerFullName;
|
||||
f.open(headerFullName.c_str(), std::fstream::out);
|
||||
//Should we allow asserts
|
||||
assert(!f.fail() && "failed creating header file!");
|
||||
f.write(headers[i]->c_str(), headers[i]->length());
|
||||
f.close();
|
||||
}
|
||||
|
||||
//Create Binary
|
||||
binaryElf_ = g_complibApi._aclBinaryInit(sizeof(aclBinary),
|
||||
&target,
|
||||
&binOpts_,
|
||||
&errorCode);
|
||||
|
||||
if( errorCode!=ACL_SUCCESS ) {
|
||||
buildLog_ += "Error while compiling opencl source:\
|
||||
aclBinary init failure \n";
|
||||
LogWarning("aclBinaryInit failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
//Insert opencl into binary
|
||||
errorCode = g_complibApi._aclInsertSection(device().compiler(),
|
||||
binaryElf_,
|
||||
sourceCode.c_str(),
|
||||
strlen(sourceCode.c_str()),
|
||||
aclSOURCE);
|
||||
|
||||
if ( errorCode != ACL_SUCCESS ) {
|
||||
buildLog_ += "Error while converting to BRIG: \
|
||||
Inserting openCl Source \n";
|
||||
}
|
||||
|
||||
|
||||
//Set the options for the compiler
|
||||
//Set the include path for the temp folder that contains the includes
|
||||
if(!headers.empty()) {
|
||||
this->compileOptions_.append(" -I");
|
||||
this->compileOptions_.append(tempFolder);
|
||||
}
|
||||
|
||||
//Add only for CL2.0 and later
|
||||
if (options->oVariables->CLStd[2] >= '2') {
|
||||
std::stringstream opts;
|
||||
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
|
||||
<< device().info().maxGlobalVariableSize_;
|
||||
compileOptions_.append(opts.str());
|
||||
}
|
||||
|
||||
//Compile source to IR
|
||||
this->compileOptions_.append(hsailOptions());
|
||||
errorCode = g_complibApi._aclCompile(device().compiler(),
|
||||
binaryElf_,
|
||||
//"-Wf,--support_all_extensions",
|
||||
this->compileOptions_.c_str(),
|
||||
ACL_TYPE_OPENCL,
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
logFunction);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
if( errorCode!=ACL_SUCCESS ) {
|
||||
LogWarning("aclCompile failed");
|
||||
buildLog_ += "Error while compiling \
|
||||
opencl source: Compiling CL to IR";
|
||||
return false;
|
||||
}
|
||||
// Save the binary in the interface class
|
||||
saveBinaryAndSetType(TYPE_COMPILED);
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
#endif // WITHOUT_GPU_BACKEND
|
||||
@@ -1,67 +0,0 @@
|
||||
#include "hsacompilerlib.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
|
||||
#include "acl.h"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
void* g_complibModule = NULL;
|
||||
struct CompLibApi g_complibApi;
|
||||
|
||||
|
||||
//
|
||||
// g_complibModule is defined in LoadCompLib(). This macro must be used only in LoadCompLib() function.
|
||||
//
|
||||
#define LOADSYMBOL(api) \
|
||||
g_complibApi._##api = (pfn_##api) amd::Os::getSymbol(g_complibModule, #api); \
|
||||
if( g_complibApi._##api == NULL ) { \
|
||||
LogError ("amd::Os::getSymbol() for exported func " #api " failed."); \
|
||||
amd::Os::unloadLibrary(g_complibModule); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
|
||||
bool LoadCompLib(bool offline)
|
||||
{
|
||||
g_complibModule = amd::Os::loadLibrary("amdhsacl" LP64_SWITCH(LINUX_SWITCH("32",""), "64"));
|
||||
if( g_complibModule == NULL ) {
|
||||
if (!offline) {
|
||||
LogError( "amd::Os::loadLibrary() for loading of amdhsacl.dll failed.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
LOADSYMBOL(aclCompilerInit)
|
||||
LOADSYMBOL(aclGetTargetInfo)
|
||||
LOADSYMBOL(aclBinaryInit)
|
||||
LOADSYMBOL(aclInsertSection)
|
||||
LOADSYMBOL(aclCompile)
|
||||
LOADSYMBOL(aclCompilerFini)
|
||||
LOADSYMBOL(aclBinaryFini)
|
||||
LOADSYMBOL(aclExtractSection)
|
||||
LOADSYMBOL(aclWriteToMem)
|
||||
LOADSYMBOL(aclQueryInfo)
|
||||
LOADSYMBOL(aclGetDeviceBinary)
|
||||
LOADSYMBOL(aclExtractSymbol)
|
||||
LOADSYMBOL(aclGetCompilerLog)
|
||||
LOADSYMBOL(aclCreateFromBinary)
|
||||
LOADSYMBOL(aclReadFromMem)
|
||||
|
||||
LOADSYMBOL(aclRemoveSymbol)
|
||||
LOADSYMBOL(aclInsertSymbol)
|
||||
LOADSYMBOL(aclWriteToFile)
|
||||
LOADSYMBOL(aclBinaryVersion)
|
||||
LOADSYMBOL(aclLink)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void UnloadCompLib()
|
||||
{
|
||||
if( g_complibModule )
|
||||
{
|
||||
amd::Os::unloadLibrary(g_complibModule);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace oclhsa
|
||||
@@ -1,92 +0,0 @@
|
||||
#ifndef HSACOMPILERLIB_HPP_
|
||||
#define HSACOMPILERLIB_HPP_
|
||||
|
||||
//
|
||||
// This file hsa the code for explicity loading amdoclcl.dll.
|
||||
// Exported functions from amdoclcl.dll can be added for usage as need-basis.
|
||||
// With explicit/dynamic loading oclhsa will not have any linkage to amdoclcl.lib.
|
||||
//
|
||||
|
||||
#include "thread/thread.hpp"
|
||||
#include "acl.h"
|
||||
#include "utils/debug.hpp"
|
||||
|
||||
using namespace amd;
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
//
|
||||
// To use any new exported function from amdhsacl.dll please add/make that function specific changes
|
||||
// in typedef below, struct CompLibApi and in hsacompilerLib.cpp::LoadCompLib() function.
|
||||
//
|
||||
|
||||
//
|
||||
// Convention: The typedefed function name must be prefixed with pfn_
|
||||
//
|
||||
typedef aclCompiler* (ACL_API_ENTRY *pfn_aclCompilerInit) (aclCompilerOptions *opts, acl_error *error_code);
|
||||
typedef aclTargetInfo (ACL_API_ENTRY *pfn_aclGetTargetInfo) (const char*, const char*, acl_error*);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclBinaryInit) (size_t, const aclTargetInfo*, const aclBinaryOptions*, acl_error*);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclInsertSection) (aclCompiler *cl, aclBinary *binary, const void *data, size_t data_size, aclSections id);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclCompile) (aclCompiler *cl, aclBinary *bin, const char *options, aclType from, aclType to, aclLogFunction compile_callback);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclCompilerFini) (aclCompiler *cl);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclBinaryFini) (aclBinary *bin);
|
||||
typedef const void* (ACL_API_ENTRY *pfn_aclExtractSection) (aclCompiler *cl, const aclBinary *binary, size_t *size, aclSections id, acl_error *error_code);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclWriteToMem) (aclBinary *bin,void **mem, size_t *size);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclQueryInfo) (aclCompiler *cl, const aclBinary *binary, aclQueryType query, const char *kernel, void *data_ptr, size_t *ptr_size);
|
||||
|
||||
|
||||
typedef const void* (ACL_API_ENTRY *pfn_aclGetDeviceBinary) (aclCompiler *cl,const aclBinary *bin,const char *kernel,size_t *size,acl_error *error_code);
|
||||
typedef const void* (ACL_API_ENTRY *pfn_aclExtractSymbol) (aclCompiler *cl,const aclBinary *binary,size_t *size,aclSections id,const char *symbol,acl_error *error_code);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclReadFromMem) (void *mem,size_t size, acl_error *error_code);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclRemoveSymbol) (aclCompiler *cl, aclBinary *binary, aclSections id, const char *symbol);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclInsertSymbol) (aclCompiler *cl, aclBinary *binary, const void *data, size_t data_size, aclSections id, const char *symbol);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclWriteToFile) (aclBinary *bin, const char *str);
|
||||
|
||||
|
||||
typedef char* (ACL_API_ENTRY *pfn_aclGetCompilerLog) (aclCompiler* cl);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclCreateFromBinary) (const aclBinary *binary,aclBIFVersion version);
|
||||
typedef aclBIFVersion (ACL_API_ENTRY *pfn_aclBinaryVersion) (const aclBinary *binary);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclLink) (aclCompiler* cl, aclBinary *src_bin, unsigned int num_libs, aclBinary **libs, aclType link_mode,const char* options, aclLogFunction link_callback);
|
||||
//
|
||||
// Convention: prefix struct member variable with with underscore '_'
|
||||
// would be nice if there was no underscore prfix, but on Linux the token
|
||||
// pasting in the macro is srtict and his is the workaround.
|
||||
//
|
||||
struct CompLibApi
|
||||
{
|
||||
pfn_aclCompilerInit _aclCompilerInit;
|
||||
pfn_aclGetTargetInfo _aclGetTargetInfo;
|
||||
pfn_aclBinaryInit _aclBinaryInit;
|
||||
pfn_aclInsertSection _aclInsertSection;
|
||||
pfn_aclCompile _aclCompile;
|
||||
pfn_aclCompilerFini _aclCompilerFini;
|
||||
pfn_aclBinaryFini _aclBinaryFini;
|
||||
pfn_aclExtractSection _aclExtractSection;
|
||||
pfn_aclWriteToMem _aclWriteToMem;
|
||||
pfn_aclQueryInfo _aclQueryInfo;
|
||||
pfn_aclGetDeviceBinary _aclGetDeviceBinary;
|
||||
pfn_aclExtractSymbol _aclExtractSymbol;
|
||||
pfn_aclReadFromMem _aclReadFromMem;
|
||||
pfn_aclRemoveSymbol _aclRemoveSymbol;
|
||||
pfn_aclInsertSymbol _aclInsertSymbol;
|
||||
pfn_aclWriteToFile _aclWriteToFile;
|
||||
pfn_aclGetCompilerLog _aclGetCompilerLog;
|
||||
pfn_aclCreateFromBinary _aclCreateFromBinary;
|
||||
pfn_aclBinaryVersion _aclBinaryVersion;
|
||||
pfn_aclLink _aclLink;
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// Use g_ prefix for all global variables.
|
||||
//
|
||||
extern void* g_complibModule;
|
||||
extern CompLibApi g_complibApi;
|
||||
|
||||
// Note: initializes global variable g_complibApi.
|
||||
// Not sure what error values we have, for now returning false on failure.
|
||||
bool LoadCompLib(bool isOfflineDevice=false);
|
||||
void UnloadCompLib();
|
||||
|
||||
} // namespace oclhsa
|
||||
#endif
|
||||
@@ -1,53 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
// Implementation of the the loading of dll and loading of all the exported
|
||||
// function symbols.
|
||||
|
||||
|
||||
#include "runtime/device/hsa/hsacore_symbol_loader.hpp"
|
||||
|
||||
#include "runtime/thread/thread.hpp"
|
||||
#include "runtime/utils/debug.hpp"
|
||||
#include "runtime/os/os.hpp"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
|
||||
HsacoreApiSymbols* HsacoreApiSymbols::instance_ = NULL;
|
||||
// hsacore_dll_handle_ is defined in HsacoreApiSymbols class.
|
||||
// This macro must be used only in member functions of HsacoreApiSymbols
|
||||
// class.
|
||||
#define LOADSYMBOL(api) \
|
||||
api = (pfn_ ## api) amd::Os::getSymbol(hsacore_dll_handle_, # api); \
|
||||
if (api == NULL) { \
|
||||
amd::log_printf(amd::LOG_ERROR, __FILE__, __LINE__, \
|
||||
"amd::Os::getSymbol() for exported func " # api " failed."); \
|
||||
amd::Os::unloadLibrary(hsacore_dll_handle_); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
HsacoreApiSymbols::HsacoreApiSymbols()
|
||||
: hsacore_dll_name_(HSACORE_DLL_NAME) {
|
||||
hsacore_dll_handle_ = amd::Os::loadLibrary(hsacore_dll_name_.c_str());
|
||||
if( hsacore_dll_handle_ == NULL) {
|
||||
// Do not print, otherwise tests fail when HSA core and services DLLs are
|
||||
// not installed, in which case only ORCA stack is initialized and it is
|
||||
// not an error..
|
||||
//amd::log_printf(amd::LOG_INFO, __FILE__, __LINE__,
|
||||
// "Cannot load hsa core dll. HSA DLLs may not be installed on the machine."
|
||||
// " OpenCL requirement, returning without error.");
|
||||
return;
|
||||
}
|
||||
|
||||
LOADSYMBOL(HsaGetCoreApiTable)
|
||||
}
|
||||
|
||||
HsacoreApiSymbols::~HsacoreApiSymbols() {
|
||||
if (hsacore_dll_handle_) {
|
||||
amd::Os::unloadLibrary(hsacore_dll_handle_);
|
||||
hsacore_dll_handle_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSACORE_SYMBOL_LOADER_HPP_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_HSACORE_SYMBOL_LOADER_HPP_
|
||||
|
||||
// File: hsacore_symbol_loader.hpp
|
||||
// The main purpose of this file (class HsacoreApiSymbols), is to load the HSA
|
||||
// API function symbol HsaGetCoreApiTable() from hsacore DLL/so module.
|
||||
// This function outputs HsaCoreApiTable which has pointers to the rest of the
|
||||
// hsacore API functions, which should be used to invoke the API functions.
|
||||
|
||||
#include "newcore.h"
|
||||
#include "hsacoreagent.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
// In case of change in the name of hsacore dll name, change the
|
||||
// #define HSACORE_DLL_NAME value. this is the only place the DLL name should
|
||||
// be changed or referred to.
|
||||
#define HSACORE_DLL_NAME "newhsacore" LP64_ONLY("64")
|
||||
|
||||
// Convention: The typedefed function name must be prefixed with pfn_ indicating
|
||||
// it as pointer-to-function.
|
||||
typedef HsaStatus (*pfn_HsaGetCoreApiTable)(const HsaCoreApiTable **api_table);
|
||||
|
||||
|
||||
// Singleton HsacoreApiSymbols class contains the module handle and loaded
|
||||
// symbols of one accessor API accessor function.
|
||||
// To call hsacore API funciton, instance of this class must be used.
|
||||
// Example:
|
||||
// // In initialization code
|
||||
// const HsaCoreApiTable *hsacoreapi = NULL;
|
||||
// HsacoreApiSymbols::Instance().HsaGetCoreApiTable(&hsacoreapi);
|
||||
// ...
|
||||
// ...
|
||||
// // Calling the core api.
|
||||
// hsacoreapi->HsaGetDevices(...);
|
||||
// hsacoreapi->HsaRegisterMemory(...);
|
||||
class HsacoreApiSymbols {
|
||||
public:
|
||||
// Only the access function symbol is loaded, which in turn has pointers to
|
||||
// rest of the hsacore api.
|
||||
pfn_HsaGetCoreApiTable HsaGetCoreApiTable;
|
||||
|
||||
static HsacoreApiSymbols &Instance() {
|
||||
if (instance_ == NULL) {
|
||||
instance_ = new HsacoreApiSymbols();
|
||||
}
|
||||
return *instance_;
|
||||
}
|
||||
static void teardown(){
|
||||
if (instance_ != NULL){
|
||||
delete instance_;
|
||||
}
|
||||
}
|
||||
static bool IsDllLoaded() {
|
||||
return Instance().hsacore_dll_handle_ ? true : false;
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
static HsacoreApiSymbols* instance_;
|
||||
// Force singleton pattern.export LD_LIBRAR
|
||||
explicit HsacoreApiSymbols();
|
||||
~HsacoreApiSymbols();
|
||||
HsacoreApiSymbols(const HsacoreApiSymbols &) {}
|
||||
const HsacoreApiSymbols &operator=(const HsacoreApiSymbols &) {return *this; }
|
||||
|
||||
// Data.
|
||||
void *hsacore_dll_handle_;
|
||||
const std::string hsacore_dll_name_;
|
||||
};
|
||||
#endif // header guard
|
||||
@@ -1,144 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#include "device/hsa/oclhsa_common.hpp"
|
||||
#include "device/hsa/hsacounters.hpp"
|
||||
#include "device/hsa/hsavirtual.hpp"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
PerfCounter::~PerfCounter()
|
||||
{
|
||||
// Destroy the corresponding HSA counter object
|
||||
HsaStatus status;
|
||||
status = servicesapi->HsaPmuDestroyCounter(counter_block_, counter_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("Destroy counter failed");
|
||||
return;
|
||||
}
|
||||
|
||||
// If no enabled counter corresponding to the PMU,
|
||||
// Release the PMU
|
||||
uint32_t counter_num;
|
||||
if (!getEnabledCounterNum(counter_num)) {
|
||||
LogError("getEnabledCounterNum failed");
|
||||
return;
|
||||
}
|
||||
|
||||
if (counter_num == 0) {
|
||||
status = servicesapi->HsaReleasePmu(hsaPmu_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("Destroy pmu failed");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
PerfCounter::create(HsaPmu hsaPmu)
|
||||
{
|
||||
HsaStatus status;
|
||||
hsaPmu_ = hsaPmu;
|
||||
uint32_t blockIndex = static_cast<uint32_t>(info()->blockIndex_);
|
||||
status = servicesapi->HsaPmuGetCounterBlockById(hsaPmu_, blockIndex, &counter_block_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuGetCounterBlockById, failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
status = servicesapi->HsaPmuCreateCounter(counter_block_, &counter_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogPrintfError("HsaPmuCreateCounter, failed.\
|
||||
Block: %d, counter: #d, event: %d",
|
||||
info()->blockIndex_,
|
||||
info()->counterIndex_,
|
||||
info()->eventIndex_);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
status = servicesapi->HsaPmuCounterSetEnabled(counter_, true);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuCounterSetEnabled, failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t eventIndex = static_cast<uint32_t>(info()->eventIndex_);
|
||||
status = servicesapi->HsaPmuCounterSetParameter(counter_,
|
||||
kHsaCounterParameterEventIndex,
|
||||
sizeof(uint32_t), (void *)&eventIndex);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuCounterSetParameter, failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
PerfCounter::getInfo(uint64_t infoType) const
|
||||
{
|
||||
switch (infoType) {
|
||||
case CL_PERFCOUNTER_GPU_BLOCK_INDEX: {
|
||||
// Return the GPU block index
|
||||
return info()->blockIndex_;
|
||||
}
|
||||
case CL_PERFCOUNTER_GPU_COUNTER_INDEX: {
|
||||
// Return the GPU counter index
|
||||
return info()->counterIndex_;
|
||||
}
|
||||
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
|
||||
// Return the GPU event index
|
||||
return info()->eventIndex_;
|
||||
}
|
||||
case CL_PERFCOUNTER_DATA: {
|
||||
HsaStatus status;
|
||||
uint64_t counterValue;
|
||||
status = servicesapi->HsaPmuCounterGetResult(counter_, &counterValue);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuCounterGetResult, failed");
|
||||
}
|
||||
return counterValue;
|
||||
}
|
||||
default:
|
||||
LogError("Wrong PerfCounter::getInfo parameter");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
PerfCounter::getEnabledCounterNum(uint32_t &counter_num)
|
||||
{
|
||||
// Collect all the program counter blocks
|
||||
uint32_t counterblock_num, num;
|
||||
uint32_t i;
|
||||
HsaStatus status;
|
||||
HsaCounter *pp_counters;
|
||||
HsaCounterBlock *pp_counterblocks;
|
||||
status = servicesapi->HsaPmuGetAllCounterBlocks(hsaPmu_,
|
||||
&pp_counterblocks,
|
||||
&counterblock_num);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuGetAllCounterBlocks, failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
counter_num = 0;
|
||||
for (i = 0; i < counterblock_num; i++) {
|
||||
// Retrieve all enabled pp_counters in each counter block
|
||||
status = servicesapi->HsaPmuGetEnabledCounters(pp_counterblocks[i],
|
||||
&pp_counters, &num);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("HsaPmuGetEnabledCounters, failed");
|
||||
return false;
|
||||
}
|
||||
counter_num += num;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // namespace oclhsa
|
||||
@@ -1,103 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef HSACOUNTERS_HPP_
|
||||
#define HSACOUNTERS_HPP_
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/hsa/hsadevice.hpp"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
class VirtualGPU;
|
||||
|
||||
//! Performance counter implementation on GPU
|
||||
class PerfCounter : public device::PerfCounter
|
||||
{
|
||||
public:
|
||||
//! The performance counter info
|
||||
struct Info : public amd::EmbeddedObject
|
||||
{
|
||||
uint blockIndex_; //!< Index of the block to configure
|
||||
uint counterIndex_; //!< Index of the hardware counter
|
||||
uint eventIndex_; //!< Event you wish to count with the counter
|
||||
};
|
||||
|
||||
//! The PerfCounter flags
|
||||
enum Flags
|
||||
{
|
||||
BeginIssued = 0x00000001,
|
||||
EndIssued = 0x00000002,
|
||||
ResultReady = 0x00000004
|
||||
};
|
||||
|
||||
//! Constructor for the GPU PerfCounter object
|
||||
PerfCounter(
|
||||
const HsaDevice *device, //!< A GPU device object
|
||||
const VirtualGPU& gpu, //!< Virtual GPU device object
|
||||
cl_uint blockIndex, //!< HW block index
|
||||
cl_uint counterIndex, //!< Counter index within the block
|
||||
cl_uint eventIndex) //!< Event index for profiling
|
||||
: gpuDevice_(device)
|
||||
, gpu_(gpu)
|
||||
, hsaPmu_(NULL)
|
||||
, flags_(0)
|
||||
, counter_(0)
|
||||
, index_(0)
|
||||
{
|
||||
info_.blockIndex_ = blockIndex;
|
||||
info_.counterIndex_ = counterIndex;
|
||||
info_.eventIndex_ = eventIndex;
|
||||
}
|
||||
|
||||
//! Destructor for the GPU PerfCounter object
|
||||
virtual ~PerfCounter();
|
||||
|
||||
//! Creates the counter object
|
||||
bool create(
|
||||
HsaPmu hsaPmu //!< Reference counter
|
||||
);
|
||||
|
||||
//! Returns the specific information about the counter
|
||||
uint64_t getInfo(
|
||||
uint64_t infoType //!< The type of returned information
|
||||
) const;
|
||||
|
||||
//! Returns the GPU device, associated with the current object
|
||||
const HsaDevice * dev() const { return gpuDevice_; }
|
||||
|
||||
//! Returns the virtual GPU device
|
||||
const VirtualGPU& gpu() const { return gpu_; }
|
||||
|
||||
//! Returns the CAL performance counter descriptor
|
||||
const Info* info() const { return &info_; }
|
||||
|
||||
//! Returns the Info structure for performance counter
|
||||
HsaPmu getCounterPmu() const { return hsaPmu_; }
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
PerfCounter(const PerfCounter&);
|
||||
|
||||
//! Disable default operator=
|
||||
PerfCounter& operator=(const PerfCounter&);
|
||||
|
||||
//! Get enabled counter number
|
||||
bool getEnabledCounterNum(uint32_t &counter_num);
|
||||
|
||||
const HsaDevice *gpuDevice_; //!< The backend device
|
||||
const VirtualGPU& gpu_; //!< The virtual GPU device object
|
||||
|
||||
HsaPmu hsaPmu_; //!< Hsa pmu
|
||||
uint flags_; //!< The perfcounter object state
|
||||
Info info_; //!< The info structure for perfcounter
|
||||
HsaCounter counter_; //!< HSA counter object
|
||||
HsaCounterBlock counter_block_; //!< counter block that the counter belongs to
|
||||
uint index_; //!< Counter index in the CAL container
|
||||
};
|
||||
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif // HSACOUNTERS_HPP_
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSADEFS_HPP_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_HSADEFS_HPP_
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
typedef uint HsaDeviceId;
|
||||
|
||||
struct AMDDeviceInfo {
|
||||
HsaDeviceId hsaDeviceId_; //!< Machine id
|
||||
const char* targetName_; //!< Target name for compilation
|
||||
const char* machineTarget_; //!< Machine target
|
||||
uint simdPerCU_; //!< Number of SIMDs per CU
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
};
|
||||
|
||||
//The device ID must match with the device's index into DeviceInfo
|
||||
const HsaDeviceId HSA_SPECTRE_ID = 0;
|
||||
const HsaDeviceId HSA_SPOOKY_ID = 1;
|
||||
const HsaDeviceId HSA_TONGA_ID = 2;
|
||||
const HsaDeviceId HSA_CARRIZO_ID = 3;
|
||||
const HsaDeviceId HSA_ICELAND_ID = 4;
|
||||
const HsaDeviceId HSA_INVALID_DEVICE_ID = -1;
|
||||
|
||||
static const AMDDeviceInfo DeviceInfoTable[] = {
|
||||
// targetName machineTarget
|
||||
/* TARGET_KAVERI_SPECTRE */ {HSA_SPECTRE_ID, "Spectre", "Spectre", 4, 16, 1, 256, 64 * Ki, 32 },
|
||||
/* TARGET_KAVERI_SPOOKY */ {HSA_SPOOKY_ID, "Spooky", "Spooky", 4, 16, 1, 256, 64 * Ki, 32 },
|
||||
/* TARGET_TONGA */ {HSA_TONGA_ID, "Tonga", "Tonga", 4, 16, 1, 256, 64 * Ki, 32},
|
||||
/* TARGET_CARRIZO */ {HSA_CARRIZO_ID, "Carrizo", "Carrizo", 4, 16, 1, 256, 64 * Ki, 32},
|
||||
/* TARGET_ICELAND */ {HSA_ICELAND_ID, "Topaz", "Topaz", 4, 16, 1, 256, 64 * Ki, 32}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@@ -1,896 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/kernel.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include "utils/debug.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "utils/versions.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
#include "newcore.h"
|
||||
|
||||
#include "amdocl/cl_common.hpp"
|
||||
#include "device/hsa/hsadevice.hpp"
|
||||
#include "device/hsa/hsavirtual.hpp"
|
||||
#include "device/hsa/hsaprogram.hpp"
|
||||
#include "device/hsa/hsablit.hpp"
|
||||
#include "device/hsa/hsacompilerlib.hpp"
|
||||
#include "device/hsa/hsamemory.hpp"
|
||||
#include "hsacore_symbol_loader.hpp"
|
||||
#include "device/hsa/oclhsa_common.hpp"
|
||||
#include "kv_id.h"
|
||||
#include "vi_id.h"
|
||||
#include "cz_id.h"
|
||||
#include "hsainterop.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
#include "CL/cl_gl.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "CL/cl_d3d10.h"
|
||||
#endif // _WIN32
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
|
||||
const HsaCoreApiTable *hsacoreapi = NULL;
|
||||
const HsaServicesApiTable *servicesapi = NULL;
|
||||
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
namespace device {
|
||||
extern const char* BlitSourceCode;
|
||||
}
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
aclCompiler* NullDevice::compilerHandle_;
|
||||
bool oclhsa::Device::isHsaInitialized_ = false;
|
||||
const bool oclhsa::Device::offlineDevice_ = false;
|
||||
const bool oclhsa::NullDevice::offlineDevice_= true;
|
||||
|
||||
static HsaDeviceId getHsaDeviceId(const HsaDevice *device) {
|
||||
/*
|
||||
* Use the device id to determine the ASIC family
|
||||
*/
|
||||
switch (device->device_id) {
|
||||
case DEVICE_ID_SPECTRE_MOBILE:
|
||||
case DEVICE_ID_SPECTRE_DESKTOP:
|
||||
case DEVICE_ID_SPECTRE_LITE_MOBILE_1309:
|
||||
case DEVICE_ID_SPECTRE_LITE_MOBILE_130A:
|
||||
case DEVICE_ID_SPECTRE_SL_MOBILE_130B:
|
||||
case DEVICE_ID_SPECTRE_MOBILE_130C:
|
||||
case DEVICE_ID_SPECTRE_LITE_MOBILE_130D:
|
||||
case DEVICE_ID_SPECTRE_SL_MOBILE_130E:
|
||||
case DEVICE_ID_SPECTRE_DESKTOP_130F:
|
||||
case DEVICE_ID_SPECTRE_WORKSTATION_1310:
|
||||
case DEVICE_ID_SPECTRE_WORKSTATION_1311:
|
||||
case DEVICE_ID_SPECTRE_LITE_DESKTOP_1313:
|
||||
case DEVICE_ID_SPECTRE_SL_DESKTOP_1315:
|
||||
case DEVICE_ID_SPECTRE_SL_MOBILE_1318:
|
||||
case DEVICE_ID_SPECTRE_SL_EMBEDDED_131B:
|
||||
case DEVICE_ID_SPECTRE_EMBEDDED_131C:
|
||||
case DEVICE_ID_SPECTRE_LITE_EMBEDDED_131D:
|
||||
return HSA_SPECTRE_ID;
|
||||
case DEVICE_ID_SPOOKY_MOBILE:
|
||||
case DEVICE_ID_SPOOKY_DESKTOP:
|
||||
case DEVICE_ID_SPOOKY_DESKTOP_1312:
|
||||
case DEVICE_ID_SPOOKY_DESKTOP_1316:
|
||||
case DEVICE_ID_SPOOKY_MOBILE_1317:
|
||||
return HSA_SPOOKY_ID;
|
||||
case DEVICE_ID_VI_TONGA_P_6920:
|
||||
case DEVICE_ID_VI_TONGA_P_6921:
|
||||
case DEVICE_ID_VI_TONGA_P_6928:
|
||||
case DEVICE_ID_VI_TONGA_P_692B:
|
||||
case DEVICE_ID_VI_TONGA_P_692F:
|
||||
case DEVICE_ID_VI_TONGA_P_6938:
|
||||
case DEVICE_ID_VI_TONGA_P_6939:
|
||||
return HSA_TONGA_ID;
|
||||
case DEVICE_ID_CZ_9870:
|
||||
case DEVICE_ID_CZ_9874:
|
||||
case DEVICE_ID_CZ_9875:
|
||||
case DEVICE_ID_CZ_9876:
|
||||
case DEVICE_ID_CZ_9877:
|
||||
return HSA_CARRIZO_ID;
|
||||
case DEVICE_ID_VI_ICELAND_M_6900:
|
||||
case DEVICE_ID_VI_ICELAND_M_6901:
|
||||
case DEVICE_ID_VI_ICELAND_M_6902:
|
||||
case DEVICE_ID_VI_ICELAND_M_6903:
|
||||
case DEVICE_ID_VI_ICELAND_M_6907:
|
||||
return HSA_ICELAND_ID;
|
||||
default:
|
||||
return HSA_INVALID_DEVICE_ID;
|
||||
}
|
||||
}
|
||||
bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
|
||||
online_ = false;
|
||||
deviceInfo_ = deviceInfo;
|
||||
// Mark the device as GPU type
|
||||
info_.type_ = CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD;
|
||||
info_.vendorId_ = 0x1002;
|
||||
|
||||
settings_ = new Settings();
|
||||
oclhsa::Settings* hsaSettings = static_cast<oclhsa::Settings*>(settings_);
|
||||
if ((hsaSettings == NULL) ||
|
||||
// @Todo sramalin Use double precision from constsant
|
||||
!hsaSettings->create((true) & 0x1)) {
|
||||
LogError("Error creating settings for NULL HSA device");
|
||||
return false;
|
||||
}
|
||||
// Report the device name
|
||||
::strcpy(info_.name_, deviceInfo_.machineTarget_);
|
||||
info_.extensions_ = getExtensionString();
|
||||
info_.maxWorkGroupSize_ = hsaSettings->maxWorkGroupSize_;
|
||||
::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
|
||||
info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
|
||||
std::string driverVersion = AMD_BUILD_STRING;
|
||||
driverVersion.append(" (HSA)");
|
||||
strcpy(info_.driverVersion_, driverVersion.c_str());
|
||||
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
|
||||
return true;
|
||||
}
|
||||
|
||||
Device::Device(const HsaDevice *bkendDevice)
|
||||
: _bkendDevice(bkendDevice), context_(NULL), xferQueue_(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
Device::~Device()
|
||||
{
|
||||
// Destroy transfer queue
|
||||
if (xferQueue_ && xferQueue_->terminate()) {
|
||||
delete xferQueue_;
|
||||
xferQueue_ = NULL;
|
||||
}
|
||||
|
||||
if (blitProgram_) {
|
||||
delete blitProgram_;
|
||||
blitProgram_ = NULL;
|
||||
}
|
||||
|
||||
if (context_ != NULL) {
|
||||
context_->release();
|
||||
}
|
||||
|
||||
if (info_.extensions_) {
|
||||
delete[]info_.extensions_;
|
||||
info_.extensions_ = NULL;
|
||||
}
|
||||
|
||||
if (settings_) {
|
||||
delete settings_;
|
||||
settings_ = NULL;
|
||||
}
|
||||
}
|
||||
bool NullDevice::initCompiler(bool isOffline) {
|
||||
// Initializes g_complibModule and g_complibApi if they were not initialized
|
||||
if( g_complibModule == NULL ){
|
||||
if (!LoadCompLib(isOffline)) {
|
||||
if (!isOffline) {
|
||||
LogError("Error - could not find the compiler library");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
//Initialize the compiler handle if has already not been initialized
|
||||
//This is destroyed in Device::teardown
|
||||
acl_error error;
|
||||
if (!compilerHandle_) {
|
||||
compilerHandle_ = g_complibApi._aclCompilerInit(NULL, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error initializing the compiler handle");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NullDevice::destroyCompiler() {
|
||||
if (compilerHandle_ != NULL) {
|
||||
acl_error error = g_complibApi._aclCompilerFini(compilerHandle_);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error closing the compiler");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if( g_complibModule != NULL ){
|
||||
UnloadCompLib();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void NullDevice::tearDown() {
|
||||
destroyCompiler();
|
||||
}
|
||||
bool NullDevice::init() {
|
||||
//Initialize the compiler
|
||||
if (!initCompiler(offlineDevice_)){
|
||||
return false;
|
||||
}
|
||||
//If there is an HSA enabled device online then skip any offline device
|
||||
std::vector<Device*> devices;
|
||||
devices = getDevices(CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD, false);
|
||||
|
||||
//Load the offline devices
|
||||
//Iterate through the set of available offline devices
|
||||
for (uint id = 0; id < sizeof(DeviceInfoTable)/sizeof(AMDDeviceInfo); id++) {
|
||||
bool isOnline = false;
|
||||
//Check if the particular device is online
|
||||
for (unsigned int i=0; i< devices.size(); i++) {
|
||||
if (static_cast<NullDevice*>(devices[i])->deviceInfo_.hsaDeviceId_ ==
|
||||
DeviceInfoTable[id].hsaDeviceId_){
|
||||
isOnline = true;
|
||||
}
|
||||
}
|
||||
if (isOnline) {
|
||||
continue;
|
||||
}
|
||||
NullDevice* nullDevice = new NullDevice();
|
||||
if (!nullDevice->create(DeviceInfoTable[id])) {
|
||||
LogError("Error creating new instance of Device.");
|
||||
delete nullDevice;
|
||||
return false;
|
||||
}
|
||||
nullDevice->registerDevice();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
NullDevice::~NullDevice() {
|
||||
if (info_.extensions_) {
|
||||
delete[]info_.extensions_;
|
||||
info_.extensions_ = NULL;
|
||||
}
|
||||
|
||||
if (settings_) {
|
||||
delete settings_;
|
||||
settings_ = NULL;
|
||||
}
|
||||
}
|
||||
bool Device::init() {
|
||||
// Assumption: init() will be called by ocl only once at the start of program
|
||||
// with a matching tearDown() when program exits.
|
||||
// TODO(papte) Check if init(),
|
||||
// tearDown(), init(), tearDown() repeat sequence is possible in one session
|
||||
// (process lifetime). If so we will be calling LoadLibrary() and
|
||||
// FreeLibrary() ifcn the similar repeat sequence. Investigate the effect of
|
||||
// this on the HSA Device and Core runtime's initialzers, where the device list
|
||||
// is generated in the runtime.
|
||||
#ifdef BUILD_STATIC_HSA
|
||||
HsaGetCoreApiTable(&hsacoreapi);
|
||||
HsaGetServicesApiTable(&servicesapi);
|
||||
#else
|
||||
bool core_dll_loaded = HsacoreApiSymbols::Instance().IsDllLoaded();
|
||||
bool service_dll_loaded = ServicesApiSymbols::Instance().IsDllLoaded();
|
||||
|
||||
if (!core_dll_loaded && !service_dll_loaded ) {
|
||||
// Both DLLs are not loaded, assume HSA not installed on a non-HSA
|
||||
// machine, returning true.
|
||||
LogInfo("HSA stack not available.");
|
||||
return true; // Return true, indicating nothing is wrong and
|
||||
// assuming HSA not installed.
|
||||
} else if (core_dll_loaded ^ service_dll_loaded) {
|
||||
// If Only one of the two HSA DLLs failed, then its an ERROR.
|
||||
LogError("One of the HSA libraies, core or services failed to load.\n");
|
||||
return false;
|
||||
} else {
|
||||
// Both DLLs loaded, continue initializing HSA stack.
|
||||
LogInfo("Initializing HSA stack.");
|
||||
}
|
||||
|
||||
// First thing first, initialize hsacoreapi and servicesapi to call core and
|
||||
// services API respectively.
|
||||
HsacoreApiSymbols::Instance().HsaGetCoreApiTable(&hsacoreapi);
|
||||
ServicesApiSymbols::Instance().HsaGetServicesApiTable(&servicesapi);
|
||||
#endif
|
||||
isHsaInitialized_ = false;
|
||||
if (hsacoreapi->HsaAmdInitialize() != kHsaStatusSuccess) {
|
||||
// Either an error in HSA core initialization or
|
||||
// KFD not installed on the machine.
|
||||
// Return without error, so OpenCL can continue without HSA stack.
|
||||
return true;
|
||||
}
|
||||
isHsaInitialized_ = true;
|
||||
|
||||
// Initialize the structure used to configure the
|
||||
// behavior of Hsa Runtime
|
||||
// TODO (PA) : verify if this ito be called or not.
|
||||
// Latest code does not call.
|
||||
// SetHsaEnvConfig();
|
||||
|
||||
//Initialize the compiler
|
||||
if (!initCompiler(offlineDevice_)){
|
||||
return false;
|
||||
}
|
||||
|
||||
const HsaDevice *devices = NULL;
|
||||
unsigned num_devices = 0;
|
||||
|
||||
// Initialize the Hsa Service layer
|
||||
servicesapi->HsaInitServices(128);
|
||||
|
||||
HsaStatus status = hsacoreapi->HsaGetDevices(&num_devices, &devices);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogPrintfError(
|
||||
"in %s(), Call to newcore HsaGetDevices() failed, HsaStatus: %d",
|
||||
__FUNCTION__, status);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < num_devices; i++) {
|
||||
Device *oclhsa_device = new Device(&devices[i]);
|
||||
if (!oclhsa_device) {
|
||||
LogError("Error creating new instance of Device on then heap.");
|
||||
return false;
|
||||
}
|
||||
HsaDeviceId deviceId = getHsaDeviceId(&devices[i]);
|
||||
if (deviceId == HSA_INVALID_DEVICE_ID) {
|
||||
LogError(" Invalid HSA device");
|
||||
return false;
|
||||
}
|
||||
//Find device id in the table
|
||||
unsigned sizeOfTable = sizeof(DeviceInfoTable)/sizeof(AMDDeviceInfo);
|
||||
uint id;
|
||||
for (id = 0; id < sizeOfTable; id++) {
|
||||
if (DeviceInfoTable[id].hsaDeviceId_ == deviceId){
|
||||
break;
|
||||
}
|
||||
}
|
||||
//If the AmdDeviceInfo for the HsaDevice Id could not be found return false
|
||||
if (id == sizeOfTable) {
|
||||
return false;
|
||||
}
|
||||
oclhsa_device->deviceInfo_ = DeviceInfoTable[id];
|
||||
|
||||
if (!oclhsa_device->mapHSADeviceToOpenCLDevice(&devices[i])) {
|
||||
LogError("Failed mapping of HsaDevice to Device.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!oclhsa_device->create()) {
|
||||
LogError("Error creating new instance of Device.");
|
||||
return false;
|
||||
}
|
||||
oclhsa_device->registerDevice(); // no return code for this function
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Device::tearDown()
|
||||
{
|
||||
if (isHsaInitialized_) {
|
||||
if (servicesapi != NULL && servicesapi->HsaDestroyServices != NULL) {
|
||||
servicesapi->HsaDestroyServices();
|
||||
}
|
||||
hsacoreapi->HsaAmdShutdown();
|
||||
}
|
||||
NullDevice::tearDown();
|
||||
HsacoreApiSymbols::teardown();
|
||||
ServicesApiSymbols::teardown();
|
||||
}
|
||||
|
||||
bool
|
||||
Device::create()
|
||||
{
|
||||
amd::Context::Info info = {0};
|
||||
std::vector<amd::Device*> devices;
|
||||
devices.push_back(this);
|
||||
|
||||
// Create a dummy context
|
||||
context_ = new amd::Context(devices, info);
|
||||
if (context_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
// Create blit programs
|
||||
if (blitProgram_ == NULL || !blitProgram_->create(this)) {
|
||||
delete blitProgram_;
|
||||
blitProgram_ = NULL;
|
||||
LogError("Couldn't create blit kernels!");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
oclhsa::Memory*
|
||||
Device::getOclHsaMemory(amd::Memory* mem) const
|
||||
{
|
||||
return static_cast<oclhsa::Memory*>(mem->getDeviceMemory(*this));
|
||||
}
|
||||
|
||||
device::Program*
|
||||
NullDevice::createProgram(bool hsail) {
|
||||
return new oclhsa::FSAILProgram(*this);
|
||||
}
|
||||
|
||||
device::Program*
|
||||
Device::createProgram(bool hsail) {
|
||||
return new oclhsa::FSAILProgram(*this);
|
||||
}
|
||||
|
||||
cl_device_svm_capabilities
|
||||
Device::getSvmCapabilities(const HsaDevice* device)
|
||||
{
|
||||
// KV supports all types of SVM
|
||||
if (device->device_id >= DEVICE_ID_SPECTRE_MOBILE &&
|
||||
device->device_id <= DEVICE_ID_SPECTRE_EMBEDDED_131C) {
|
||||
|
||||
cl_bitfield atomics = CL_DEVICE_SVM_ATOMICS;
|
||||
// Atomics are allowed in 32 bits if a environment variable is set
|
||||
if (Is32Bits() && !settings().enableSvm32BitsAtomics_) {
|
||||
atomics = 0;
|
||||
}
|
||||
return CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
|
||||
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
|
||||
atomics;
|
||||
}
|
||||
// Devices such as Bonaire enable some HSA features but they do not include
|
||||
// CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (because of addresses above 2^40) or
|
||||
// CL_DEVICE_SVM_ATOMICS capabilities.
|
||||
return CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
|
||||
}
|
||||
|
||||
bool
|
||||
Device::mapHSADeviceToOpenCLDevice(const HsaDevice *dev)
|
||||
{
|
||||
// Create HSA settings
|
||||
settings_ = new Settings();
|
||||
oclhsa::Settings* hsaSettings = static_cast<oclhsa::Settings*>(settings_);
|
||||
if ((hsaSettings == NULL) ||
|
||||
!hsaSettings->create((dev->is_double_precision) & 0x1)) {
|
||||
return false;
|
||||
}
|
||||
// Report the device name
|
||||
::strcpy(info_.name_, deviceInfo_.machineTarget_);
|
||||
strcpy(info_.boardName_, dev->device_name);
|
||||
|
||||
if (dev->number_cache_descriptors != 0) {
|
||||
HsaCacheDescriptor* cacheDesc = dev->cache_descriptors;
|
||||
info_.globalMemCacheLineSize_ = cacheDesc->cache_line_size;
|
||||
info_.globalMemCacheSize_ = cacheDesc->cache_size * Ki;
|
||||
|
||||
info_.globalMemCacheType_ = (cacheDesc->cache_type.value == 0) ?
|
||||
CL_NONE : CL_READ_WRITE_CACHE;
|
||||
}
|
||||
else {
|
||||
info_.globalMemCacheType_ = CL_NONE;
|
||||
info_.globalMemCacheLineSize_ = 0;
|
||||
info_.globalMemCacheSize_ = 0;
|
||||
}
|
||||
|
||||
// Map HSA device types to OCL device types.
|
||||
// if (dev->device_type == kHsaDeviceTypeThroughput)
|
||||
info_.type_ = CL_DEVICE_TYPE_GPU | CL_HSA_ENABLED_AMD;
|
||||
|
||||
info_.maxComputeUnits_ = dev->number_compute_units;
|
||||
info_.deviceTopology_.pcie.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
|
||||
info_.deviceTopology_.pcie.bus = (dev->location_id&(0xFF<<8))>>8;
|
||||
info_.deviceTopology_.pcie.device = (dev->location_id&(0x1F<<3))>>3;
|
||||
info_.deviceTopology_.pcie.function = (dev->location_id&0x07);
|
||||
info_.extensions_ = getExtensionString();
|
||||
info_.nativeVectorWidthDouble_ =
|
||||
info_.preferredVectorWidthDouble_ = (settings().doublePrecision_) ? 1 : 0;
|
||||
|
||||
info_.maxWorkGroupSize_ = dev->wave_front_size * dev->max_waves_per_simd;
|
||||
info_.maxClockFrequency_ = dev->max_clock_rate_of_f_compute;
|
||||
//info_.imageSupport_ = dev->is_image_support;
|
||||
info_.imageSupport_ = false;
|
||||
|
||||
info_.localMemSizePerCU_ = dev->group_memory_size;
|
||||
|
||||
if (populateOCLDeviceConstants() == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Populate the single config setting.
|
||||
info_.singleFPConfig_ = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
|
||||
CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_FMA;
|
||||
|
||||
if (hsaSettings->doublePrecision_) {
|
||||
info_.doubleFPConfig_ = info_.singleFPConfig_ | CL_FP_DENORM;
|
||||
info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
||||
}
|
||||
|
||||
info_.svmCapabilities_ = getSvmCapabilities(dev);
|
||||
info_.preferredPlatformAtomicAlignment_ = 0;
|
||||
info_.preferredGlobalAtomicAlignment_ = 0;
|
||||
info_.preferredLocalAtomicAlignment_ = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
isFrameBufferDescriptor(HsaMemoryDescriptor &desc)
|
||||
{
|
||||
return (desc.heap_type == kHsaHeapTypeFrameBufferPrivate);
|
||||
}
|
||||
|
||||
bool
|
||||
Device::populateOCLDeviceConstants()
|
||||
{
|
||||
info_.available_ = true;
|
||||
/*info_.maxWorkGroupSize_ = 256;*/
|
||||
info_.maxWorkItemDimensions_ = 3;
|
||||
|
||||
// Get frame buffer memory descriptor.
|
||||
HsaMemoryDescriptor *memDescBegin = _bkendDevice->memory_descriptors;
|
||||
HsaMemoryDescriptor *memDescEnd =
|
||||
memDescBegin + _bkendDevice->number_memory_descriptors;
|
||||
HsaMemoryDescriptor *hsaFbDesc =
|
||||
std::find_if(memDescBegin, memDescEnd, isFrameBufferDescriptor);
|
||||
|
||||
if ((hsaFbDesc != memDescEnd) && (hsaFbDesc->size_in_bytes > 0)) {
|
||||
// Device local memory exists. Populate OpenCL info field with
|
||||
// attributes of HSA GPU local memory descriptor.
|
||||
info_.globalMemSize_ = hsaFbDesc->size_in_bytes;
|
||||
|
||||
info_.maxMemAllocSize_ =
|
||||
std::max(std::min(cl_ulong(1 * Gi), info_.globalMemSize_ / 4),
|
||||
cl_ulong(128 * Mi));
|
||||
|
||||
// Make sure the max allocation size is not larger than the available
|
||||
// memory size.
|
||||
info_.maxMemAllocSize_ =
|
||||
std::min(info_.maxMemAllocSize_, info_.globalMemSize_);
|
||||
}
|
||||
else {
|
||||
// The HSA device backend does not have local memory, so we use system
|
||||
// memory as default.
|
||||
info_.globalMemSize_ = Os::getPhysicalMemSize();
|
||||
if (info_.globalMemSize_ == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Cap global memory
|
||||
#if defined (_LP64)
|
||||
// Cap at 8TiB for 64-bit
|
||||
const cl_ulong maxGlobalMemSize = 8ULL * Ki * Gi;
|
||||
#elif defined (_WIN32)
|
||||
// Cap at 2GiB (see http://msdn.microsoft.com/en-us/library/aa366778.aspx)
|
||||
const cl_ulong maxGlobalMemSize = 2ULL * Gi;
|
||||
#else // linux
|
||||
// Cap at 3.5GiB
|
||||
const cl_ulong maxGlobalMemSize = 3584ULL * Mi;
|
||||
#endif
|
||||
info_.globalMemSize_ = std::min(info_.globalMemSize_, maxGlobalMemSize);
|
||||
|
||||
info_.maxMemAllocSize_ =
|
||||
info_.globalMemSize_ * CPU_MAX_ALLOC_PERCENT / 100;
|
||||
if (flagIsDefault(CPU_MAX_ALLOC_PERCENT)) {
|
||||
const cl_ulong minAllocSize = LP64_SWITCH(1ULL * Gi, 2ULL * Gi);
|
||||
info_.maxMemAllocSize_ = std::max(info_.maxMemAllocSize_,
|
||||
std::min(info_.globalMemSize_, minAllocSize));
|
||||
}
|
||||
}
|
||||
|
||||
/*make sure we don't run anything over 8 params for now*/
|
||||
info_.maxParameterSize_ = 1024; // [TODO]: CAL stack values: 1024*
|
||||
// constant
|
||||
info_.maxWorkItemSizes_[0] = 256;
|
||||
info_.maxWorkItemSizes_[1] = 256;
|
||||
info_.maxWorkItemSizes_[2] = 256;
|
||||
|
||||
info_.nativeVectorWidthChar_ = info_.preferredVectorWidthChar_ = 4;
|
||||
info_.nativeVectorWidthShort_ = info_.preferredVectorWidthShort_ = 2;
|
||||
info_.nativeVectorWidthInt_ = info_.preferredVectorWidthInt_ = 1;
|
||||
info_.nativeVectorWidthLong_ = info_.preferredVectorWidthLong_ = 1;
|
||||
info_.nativeVectorWidthFloat_ = info_.preferredVectorWidthFloat_ = 1;
|
||||
|
||||
info_.localMemSize_ = 32 * 1024;
|
||||
info_.hostUnifiedMemory_ = CL_TRUE;
|
||||
info_.memBaseAddrAlign_ = 8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ?
|
||||
sizeof(cl_long16) : MEMOBJ_BASE_ADDR_ALIGN);
|
||||
info_.minDataTypeAlignSize_ = sizeof(cl_long16);
|
||||
|
||||
info_.maxConstantArgs_ = 8;
|
||||
info_.maxConstantBufferSize_ = 64 * 1024;
|
||||
info_.localMemType_ = CL_LOCAL;
|
||||
info_.errorCorrectionSupport_ = false;
|
||||
info_.profilingTimerResolution_ = 1;
|
||||
info_.littleEndian_ = true;
|
||||
info_.compilerAvailable_ = true;
|
||||
info_.executionCapabilities_ = CL_EXEC_KERNEL;
|
||||
info_.queueProperties_ = CL_QUEUE_PROFILING_ENABLE;
|
||||
info_.platform_ = AMD_PLATFORM;
|
||||
info_.profile_ = "FULL_PROFILE";
|
||||
strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
|
||||
|
||||
info_.addressBits_ = LP64_SWITCH(32, 64);
|
||||
info_.maxSamplers_ = 16;
|
||||
info_.maxReadImageArgs_ = 128;
|
||||
info_.maxWriteImageArgs_ = 8;
|
||||
info_.maxReadWriteImageArgs_ = 64;
|
||||
info_.image2DMaxWidth_ = 16 * 1024;
|
||||
info_.image2DMaxHeight_ = 16 * 1024;
|
||||
info_.image3DMaxWidth_ = 2 * 1024;
|
||||
info_.image3DMaxHeight_ = 2 * 1024;
|
||||
info_.image3DMaxDepth_ = 2 * 1024;
|
||||
info_.imageMaxArraySize_ = 2 * 1024;
|
||||
info_.imageMaxBufferSize_ = 64 * 1024;
|
||||
info_.imagePitchAlignment_ = 256;
|
||||
info_.imageBaseAddressAlignment_ = 256;
|
||||
info_.imageMaxArraySize_ = 2048;
|
||||
info_.imageMaxBufferSize_ = 65536;
|
||||
info_.bufferFromImageSupport_ = CL_TRUE;
|
||||
info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
|
||||
std::string driverVersion = AMD_BUILD_STRING;
|
||||
driverVersion.append(" (HSA)");
|
||||
strcpy(info_.driverVersion_, driverVersion.c_str());
|
||||
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
|
||||
|
||||
info_.builtInKernels_ = "";
|
||||
info_.linkerAvailable_ = true;
|
||||
info_.preferredInteropUserSync_ = true;
|
||||
info_.printfBufferSize_ = 1000 * 1024;
|
||||
info_.vendorId_ = 0x1002; // from gpudevice
|
||||
|
||||
info_.maxGlobalVariableSize_ = static_cast<size_t>(info_.maxMemAllocSize_);
|
||||
info_.globalVariablePreferredTotalSize_ =
|
||||
static_cast<size_t>(info_.globalMemSize_);
|
||||
return true;
|
||||
}
|
||||
|
||||
device::VirtualDevice*
|
||||
Device::createVirtualDevice(amd::CommandQueue* queue)
|
||||
{
|
||||
bool interopQueue = (queue != NULL) &&
|
||||
(0 != (queue->context().info().flags_ &
|
||||
(amd::Context::GLDeviceKhr |
|
||||
amd::Context::D3D10DeviceKhr |
|
||||
amd::Context::D3D11DeviceKhr)));
|
||||
|
||||
// Initialization of heap and other resources occur during the command
|
||||
// queue creation time.
|
||||
HsaQueueType type = kHsaQueueTypeCompute;
|
||||
if (interopQueue) {
|
||||
type = kHsaQueueTypeInterop;
|
||||
}
|
||||
|
||||
VirtualGPU *virtualDevice = new VirtualGPU(*this);
|
||||
|
||||
if (!virtualDevice->create(type)) {
|
||||
delete virtualDevice;
|
||||
virtualDevice = NULL;
|
||||
}
|
||||
|
||||
return virtualDevice;
|
||||
}
|
||||
|
||||
bool
|
||||
Device::globalFreeMemory(size_t *freeMemory) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
Device::bindExternalDevice(
|
||||
intptr_t type,
|
||||
void* gfxDevice,
|
||||
void* gfxContext,
|
||||
bool validateOnly)
|
||||
{
|
||||
switch (type) {
|
||||
#ifdef _WIN32
|
||||
case CL_CONTEXT_D3D10_DEVICE_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaBeginD3D10Interop(
|
||||
_bkendDevice, reinterpret_cast<ID3D10Device *>(gfxDevice))) {
|
||||
LogError("Failed HsaBeginD3D10Interop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case CL_CONTEXT_D3D11_DEVICE_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaBeginD3D11Interop(
|
||||
_bkendDevice, reinterpret_cast<ID3D11Device *>(gfxDevice))) {
|
||||
LogError("Failed HsaBeginD3D11Interop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
#endif // _WIN32
|
||||
case CL_GL_CONTEXT_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaBeginGLInterop(
|
||||
_bkendDevice, reinterpret_cast<GLvoid *>(gfxContext))) {
|
||||
LogError("Failed HsaBeginGLInterop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LogError("Unknown external device!");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (validateOnly) {
|
||||
return unbindExternalDevice(type, gfxDevice, gfxContext, validateOnly);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Device::unbindExternalDevice(
|
||||
intptr_t type,
|
||||
void* gfxDevice,
|
||||
void* gfxContext,
|
||||
bool validateOnly)
|
||||
{
|
||||
switch (type) {
|
||||
#ifdef _WIN32
|
||||
case CL_CONTEXT_D3D10_DEVICE_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaEndD3D10Interop(
|
||||
_bkendDevice, reinterpret_cast<ID3D10Device *>(gfxDevice))) {
|
||||
LogError("Failed HsaEndD3D10Interop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case CL_CONTEXT_D3D11_DEVICE_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaEndD3D11Interop(
|
||||
_bkendDevice, reinterpret_cast<ID3D11Device *>(gfxDevice))) {
|
||||
LogError("Failed HsaEndD3D11Interop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
#endif // _WIN32
|
||||
case CL_GL_CONTEXT_KHR:
|
||||
if (kHsaStatusSuccess != hsacoreapi->HsaEndGLInterop(
|
||||
_bkendDevice, reinterpret_cast<GLvoid *>(gfxContext))) {
|
||||
LogError("Failed HsaEndGLInterop()");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LogError("Unknown external device!");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
device::Memory*
|
||||
Device::createMemory(amd::Memory &owner) const
|
||||
{
|
||||
oclhsa::Memory* memory = NULL;
|
||||
|
||||
if (owner.asBuffer()) {
|
||||
memory = new oclhsa::Buffer(*this, owner);
|
||||
}
|
||||
else if (owner.asImage()) {
|
||||
memory = new oclhsa::Image(*this, owner);
|
||||
}
|
||||
else {
|
||||
LogError("Unknown memory type");
|
||||
}
|
||||
|
||||
if (memory == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
if (owner.isInterop() && (owner.parent() == NULL)) {
|
||||
result = memory->createInterop();
|
||||
}
|
||||
else {
|
||||
result = memory->create();
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
delete memory;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!memory->isHostMemDirectAccess() && owner.asImage() &&
|
||||
owner.parent() == NULL &&
|
||||
(owner.getMemFlags() &
|
||||
(CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR))) {
|
||||
// To avoid recurssive call to Device::createMemory, we perform
|
||||
// data transfer to the view of the image.
|
||||
amd::Image *imageView =
|
||||
owner.asImage()->createView(
|
||||
owner.getContext(), owner.asImage()->getImageFormat(), xferQueue());
|
||||
|
||||
if (imageView == NULL) {
|
||||
LogError("[OCL] Fail to allocate view of image object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Image* devImageView =
|
||||
new oclhsa::Image(static_cast<const Device &>(*this), *imageView);
|
||||
if (devImageView == NULL) {
|
||||
LogError("[OCL] Fail to allocate device mem object for the view");
|
||||
imageView->release();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (devImageView != NULL &&
|
||||
!devImageView->createView(static_cast<oclhsa::Image &>(*memory))) {
|
||||
LogError("[OCL] Fail to create device mem object for the view");
|
||||
delete devImageView;
|
||||
imageView->release();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
imageView->replaceDeviceMemory(this, devImageView);
|
||||
|
||||
result = xferMgr().writeImage(
|
||||
owner.getHostMem(),
|
||||
*devImageView,
|
||||
amd::Coord3D(0),
|
||||
imageView->getRegion(),
|
||||
imageView->getRowPitch(),
|
||||
imageView->getSlicePitch(),
|
||||
true);
|
||||
|
||||
imageView->release();
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
delete memory;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return memory;
|
||||
}
|
||||
|
||||
void*
|
||||
Device::hostAlloc(size_t size, size_t alignment, bool atomics) const
|
||||
{
|
||||
void* ret;
|
||||
alignment = std::max(alignment, static_cast<size_t>(info_.memBaseAddrAlign_));
|
||||
assert(amd::isMultipleOf(alignment, info_.memBaseAddrAlign_));
|
||||
HsaAmdSystemMemoryType type = amd::Is64Bits() && atomics
|
||||
? kHsaAmdSystemMemoryTypeCoherent : kHsaAmdSystemMemoryTypeDefault;
|
||||
hsacoreapi->HsaAmdAllocateSystemMemory(size, alignment, type, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
Device::hostFree(void* ptr, size_t size) const
|
||||
{
|
||||
hsacoreapi->HsaAmdFreeSystemMemory(ptr);
|
||||
}
|
||||
|
||||
void*
|
||||
Device::svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const
|
||||
{
|
||||
bool atomics = (flags & CL_MEM_SVM_ATOMICS) != 0;
|
||||
return hostAlloc(size, alignment, atomics);
|
||||
}
|
||||
|
||||
void
|
||||
Device::svmFree(void* ptr) const
|
||||
{
|
||||
hostFree(ptr);
|
||||
}
|
||||
|
||||
VirtualGPU*
|
||||
Device::xferQueue() const
|
||||
{
|
||||
if (!xferQueue_) {
|
||||
// Create virtual device for internal memory transfer
|
||||
Device* thisDevice = const_cast<Device*>(this);
|
||||
thisDevice->xferQueue_ = reinterpret_cast<VirtualGPU*>(
|
||||
thisDevice->createVirtualDevice());
|
||||
if (!xferQueue_) {
|
||||
LogError("Couldn't create the device transfer manager!");
|
||||
}
|
||||
}
|
||||
return xferQueue_;
|
||||
}
|
||||
|
||||
}
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
@@ -1,334 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_HSADEVICE_HPP_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_HSADEVICE_HPP_
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/perfctr.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "utils/concurrent.hpp"
|
||||
#include "thread/thread.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "utils/versions.hpp"
|
||||
#include "aclTypes.h"
|
||||
|
||||
#include "device/hsa/hsasettings.hpp"
|
||||
#include "device/hsa/hsavirtual.hpp"
|
||||
#include "device/hsa/hsadefs.hpp"
|
||||
|
||||
#include "newcore.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// extern hsa::Runtime* g_hsaruntime;
|
||||
|
||||
/*! \addtogroup HSA
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA Device Implementation
|
||||
namespace oclhsa {
|
||||
|
||||
/**
|
||||
* @brief List of environment variables that could be used to
|
||||
* configure the behavior of Hsa Runtime
|
||||
*/
|
||||
#define ENVVAR_HSA_POLL_KERNEL_COMPLETION "HSA_POLL_COMPLETION"
|
||||
|
||||
//! Forward declarations
|
||||
class Command;
|
||||
class Device;
|
||||
class GpuCommand;
|
||||
class Heap;
|
||||
class HeapBlock;
|
||||
class Program;
|
||||
class Kernel;
|
||||
class Memory;
|
||||
class Resource;
|
||||
class VirtualDevice;
|
||||
class PrintfDbg;
|
||||
|
||||
//A NULL Device type used only for offline compilation
|
||||
// Only functions that are used for compilation will be in this device
|
||||
class NullDevice : public amd::Device {
|
||||
public:
|
||||
//! constructor
|
||||
NullDevice(){};
|
||||
|
||||
//!create the device
|
||||
bool create(const AMDDeviceInfo& deviceInfo);
|
||||
|
||||
//! Initialise all the offline devices that can be used for compilation
|
||||
static bool init();
|
||||
//! Teardown for offline devices
|
||||
static void tearDown();
|
||||
|
||||
//! Destructor for the Null device
|
||||
virtual ~NullDevice();
|
||||
|
||||
aclCompiler *compiler() const { return compilerHandle_; }
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
virtual device::Program *createProgram(bool hsail = false);
|
||||
|
||||
const AMDDeviceInfo& deviceInfo() const {
|
||||
return deviceInfo_;
|
||||
}
|
||||
//! Gets the backend device for the NULL device type
|
||||
virtual const HsaDevice* getBackendDevice() const {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//List of dummy functions which are disabled for NullDevice
|
||||
|
||||
//! Create sub-devices according to the given partition scheme.
|
||||
virtual cl_int createSubDevices(
|
||||
device::CreateSubDevicesInfo& create_info,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) {
|
||||
ShouldNotReachHere();
|
||||
return CL_INVALID_VALUE; };
|
||||
|
||||
//! Create a new virtual device environment.
|
||||
virtual device::VirtualDevice* createVirtualDevice(
|
||||
amd::CommandQueue* queue = NULL) { return NULL; }
|
||||
|
||||
virtual bool registerSvmMemory(void* ptr, size_t size) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void deregisterSvmMemory(void* ptr) const {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createMemory(amd::Memory& owner) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL; }
|
||||
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(
|
||||
const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const
|
||||
{
|
||||
ShouldNotReachHere();
|
||||
return true;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createView(
|
||||
amd::Memory& owner, //!< Owner memory object
|
||||
const device::Memory& parent //!< Parent device memory object for the view
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual void* svmAlloc(
|
||||
amd::Context& context, //!< The context used to create a buffer
|
||||
size_t size, //!< size of svm spaces
|
||||
size_t alignment, //!< alignment requirement of svm spaces
|
||||
cl_svm_mem_flags flags, //!< flags of creation svm spaces
|
||||
void* svmPtr //!< existing svm pointer for mGPU case
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual void svmFree(
|
||||
void* ptr //!< svm pointer needed to be freed
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
|
||||
virtual bool bindExternalDevice(
|
||||
intptr_t type, void* pDevice, void* pContext, bool validateOnly) {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool unbindExternalDevice(
|
||||
intptr_t type, void* pDevice, void* pContext, bool validateOnly) {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Releases non-blocking map target memory
|
||||
virtual void freeMapTarget(amd::Memory& mem, void* target) { ShouldNotReachHere();}
|
||||
|
||||
//! Empty implementation on Null device
|
||||
virtual bool globalFreeMemory(size_t* freeMemory) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
//! Initialize compiler instance and handle
|
||||
static bool initCompiler(bool isOffline);
|
||||
//! destroy compiler instance and handle
|
||||
static bool destroyCompiler();
|
||||
//! Handle to the the compiler
|
||||
static aclCompiler* compilerHandle_;
|
||||
//! Device Id for an HsaDevice
|
||||
AMDDeviceInfo deviceInfo_;
|
||||
private:
|
||||
static const bool offlineDevice_;
|
||||
};
|
||||
|
||||
//! A HSA device ordinal (physical HSA device)
|
||||
class Device : public NullDevice {
|
||||
public:
|
||||
//! Initialise the whole HSA device subsystem (CAL init, device enumeration, etc).
|
||||
static bool init();
|
||||
static void tearDown();
|
||||
|
||||
static bool loadHsaModules();
|
||||
|
||||
bool create();
|
||||
|
||||
//! Construct a new physical HSA device
|
||||
Device(const HsaDevice *bkendDevice);
|
||||
virtual const HsaDevice *getBackendDevice() const
|
||||
{
|
||||
return (_bkendDevice);
|
||||
}
|
||||
|
||||
//! Destructor for the physical HSA device
|
||||
virtual ~Device();
|
||||
|
||||
bool mapHSADeviceToOpenCLDevice(const HsaDevice *hsadevice);
|
||||
|
||||
// Temporary, delete it later when HSA Runtime and KFD is fully fucntional.
|
||||
void fake_device();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// TODO: Below are all mocked up virtual functions from amd::Device, they may
|
||||
// need real implementation.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// #ifdef cl_ext_device_fission
|
||||
//! Create sub-devices according to the given partition scheme.
|
||||
virtual cl_int createSubDevices(
|
||||
device::CreateSubDevicesInfo &create_inf,
|
||||
cl_uint num_entries,
|
||||
cl_device_id *devices,
|
||||
cl_uint *num_devices)
|
||||
{ return CL_INVALID_VALUE; }
|
||||
// #endif // cl_ext_device_fission
|
||||
|
||||
// bool Device::create(CALuint ordinal);
|
||||
|
||||
//! Instantiate a new virtual device
|
||||
virtual device::VirtualDevice *createVirtualDevice(
|
||||
amd::CommandQueue* queue = NULL);
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
virtual device::Program *createProgram(bool hsail = false);
|
||||
|
||||
virtual device::Memory *createMemory(amd::Memory &owner) const;
|
||||
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(
|
||||
const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const
|
||||
{
|
||||
//! \todo HSA team has to implement sampler allocation
|
||||
*sampler = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory *createView(
|
||||
amd::Memory &owner, //!< Owner memory object
|
||||
const device::Memory &parent //!< Parent device memory object for the view
|
||||
) const { return NULL; }
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory &owner) const {return true; }
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
virtual bool bindExternalDevice(
|
||||
intptr_t type, void *pDevice, void *pContext, bool validateOnly);
|
||||
|
||||
/**
|
||||
* @brief Removes the external device as an available device.
|
||||
*
|
||||
* @note: The current implementation is to avoid build break
|
||||
* and does not represent actual / correct implementation. This
|
||||
* needs to be done.
|
||||
*/
|
||||
bool unbindExternalDevice(
|
||||
intptr_t type, //!< Enum val. for ext.API type: GL, D3D10, etc.
|
||||
void *gfxDevice, //!< D3D device do D3D, HDC/Display handle of X Window for GL
|
||||
void *gfxContext, //!< HGLRC/GLXContext handle
|
||||
bool validateOnly //!< Only validate if the device can inter-operate with
|
||||
//!< pDevice/pContext, do not bind.
|
||||
);
|
||||
|
||||
//! Gets free memory on a GPU device
|
||||
virtual bool globalFreeMemory(size_t *freeMemory) const;
|
||||
|
||||
virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
|
||||
|
||||
virtual void hostFree(void* ptr, size_t size = 0) const;
|
||||
|
||||
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, void* svmPtr = NULL) const;
|
||||
|
||||
virtual void svmFree(void* ptr) const;
|
||||
|
||||
//! Returns a OCLHSA memory object from AMD memory object
|
||||
oclhsa::Memory* getOclHsaMemory(
|
||||
amd::Memory* mem //!< Pointer to AMD memory object
|
||||
) const;
|
||||
|
||||
const Settings &settings() const { return reinterpret_cast<Settings &>(*settings_); }
|
||||
|
||||
//! Returns transfer engine object
|
||||
const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr();}
|
||||
|
||||
private:
|
||||
bool populateOCLDeviceConstants();
|
||||
|
||||
cl_device_svm_capabilities getSvmCapabilities(const HsaDevice* device);
|
||||
|
||||
VirtualGPU* xferQueue() const;
|
||||
|
||||
static bool isHsaInitialized_;
|
||||
const HsaDevice *_bkendDevice;
|
||||
static const bool offlineDevice_;
|
||||
amd::Context *context_; //!< A dummy context for internal data transfer
|
||||
VirtualGPU *xferQueue_; //!< Transfer queue, created on demand
|
||||
}; // class oclhsa::Device
|
||||
} // namespace oclhsa
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif /*WITHOUT_FSA_BACKEND*/
|
||||
#endif /*HSA_HPP_*/
|
||||
@@ -1,573 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#include "device/hsa/hsakernel.hpp"
|
||||
|
||||
#include "device/hsa/oclhsa_common.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
inline static HSAIL_ARG_TYPE
|
||||
GetHSAILArgType(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return HSAIL_ARGTYPE_POINTER;
|
||||
case ARG_TYPE_VALUE:
|
||||
return HSAIL_ARGTYPE_VALUE;
|
||||
case ARG_TYPE_IMAGE:
|
||||
return HSAIL_ARGTYPE_IMAGE;
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return HSAIL_ARGTYPE_SAMPLER;
|
||||
case ARG_TYPE_ERROR:
|
||||
default:
|
||||
return HSAIL_ARGTYPE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
inline static size_t
|
||||
GetHSAILArgAlignment(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return argInfo->arg.pointer.align;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline static HSAIL_ADDRESS_QUALIFIER
|
||||
GetHSAILAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV:
|
||||
case PTR_MT_GLOBAL:
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
case PTR_MT_LDS_EMU:
|
||||
case PTR_MT_LDS:
|
||||
return HSAIL_ADDRESS_LOCAL;
|
||||
case PTR_MT_ERROR:
|
||||
default:
|
||||
LogError("Unsupported address type");
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
}
|
||||
else if ((argInfo->type == ARG_TYPE_IMAGE) ||
|
||||
(argInfo->type == ARG_TYPE_SAMPLER)) {
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
}
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
|
||||
/* f16 returns f32 - workaround due to comp lib */
|
||||
inline static HSAIL_DATA_TYPE
|
||||
GetHSAILDataType(const aclArgData* argInfo)
|
||||
{
|
||||
aclArgDataType dataType;
|
||||
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
dataType = argInfo->arg.pointer.data;
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||||
dataType = argInfo->arg.value.data;
|
||||
}
|
||||
else {
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
switch (dataType) {
|
||||
case DATATYPE_i1:
|
||||
return HSAIL_DATATYPE_B1;
|
||||
case DATATYPE_i8:
|
||||
return HSAIL_DATATYPE_S8;
|
||||
case DATATYPE_i16:
|
||||
return HSAIL_DATATYPE_S16;
|
||||
case DATATYPE_i32:
|
||||
return HSAIL_DATATYPE_S32;
|
||||
case DATATYPE_i64:
|
||||
return HSAIL_DATATYPE_S64;
|
||||
case DATATYPE_u8:
|
||||
return HSAIL_DATATYPE_U8;
|
||||
case DATATYPE_u16:
|
||||
return HSAIL_DATATYPE_U16;
|
||||
case DATATYPE_u32:
|
||||
return HSAIL_DATATYPE_U32;
|
||||
case DATATYPE_u64:
|
||||
return HSAIL_DATATYPE_U64;
|
||||
case DATATYPE_f16:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case DATATYPE_f32:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case DATATYPE_f64:
|
||||
return HSAIL_DATATYPE_F64;
|
||||
case DATATYPE_struct:
|
||||
return HSAIL_DATATYPE_STRUCT;
|
||||
case DATATYPE_opaque:
|
||||
return HSAIL_DATATYPE_OPAQUE;
|
||||
case DATATYPE_ERROR:
|
||||
default:
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// returns size in number of bytes
|
||||
inline static int
|
||||
GetHSAILArgSize(const aclArgData *argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (GetHSAILDataType(argInfo)) {
|
||||
case HSAIL_DATATYPE_B1:
|
||||
return 1;
|
||||
case HSAIL_DATATYPE_B8:
|
||||
case HSAIL_DATATYPE_S8:
|
||||
case HSAIL_DATATYPE_U8:
|
||||
return 1;
|
||||
case HSAIL_DATATYPE_B16:
|
||||
case HSAIL_DATATYPE_U16:
|
||||
case HSAIL_DATATYPE_S16:
|
||||
case HSAIL_DATATYPE_F16:
|
||||
return 2;
|
||||
case HSAIL_DATATYPE_B32:
|
||||
case HSAIL_DATATYPE_U32:
|
||||
case HSAIL_DATATYPE_S32:
|
||||
case HSAIL_DATATYPE_F32:
|
||||
return 4;
|
||||
case HSAIL_DATATYPE_B64:
|
||||
case HSAIL_DATATYPE_U64:
|
||||
case HSAIL_DATATYPE_S64:
|
||||
case HSAIL_DATATYPE_F64:
|
||||
return 8;
|
||||
case HSAIL_DATATYPE_STRUCT:
|
||||
return argInfo->arg.value.numElements;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
case ARG_TYPE_POINTER:
|
||||
case ARG_TYPE_IMAGE:
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return sizeof(void*);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
inline static clk_value_type_t
|
||||
GetOclType(const aclArgData* argInfo)
|
||||
{
|
||||
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
|
||||
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
|
||||
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
|
||||
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
|
||||
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
|
||||
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
|
||||
};
|
||||
|
||||
uint sizeType;
|
||||
if ((argInfo->type == ARG_TYPE_POINTER) || (argInfo->type == ARG_TYPE_IMAGE)) {
|
||||
return T_POINTER;
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
sizeType = 0;
|
||||
break;
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_u16:
|
||||
sizeType = 1;
|
||||
break;
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_u32:
|
||||
sizeType = 2;
|
||||
break;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
sizeType = 3;
|
||||
break;
|
||||
case DATATYPE_f16:
|
||||
case DATATYPE_f32:
|
||||
sizeType = 4;
|
||||
break;
|
||||
case DATATYPE_f64:
|
||||
sizeType = 5;
|
||||
break;
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
switch (argInfo->arg.value.numElements) {
|
||||
case 1: return ClkValueMapType[sizeType][0];
|
||||
case 2: return ClkValueMapType[sizeType][1];
|
||||
case 3: return ClkValueMapType[sizeType][2];
|
||||
case 4: return ClkValueMapType[sizeType][3];
|
||||
case 8: return ClkValueMapType[sizeType][4];
|
||||
case 16: return ClkValueMapType[sizeType][5];
|
||||
default: return T_VOID;
|
||||
}
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_SAMPLER) {
|
||||
return T_SAMPLER;
|
||||
}
|
||||
else {
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_address_qualifier
|
||||
GetOclAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_UAV:
|
||||
case PTR_MT_GLOBAL:
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
case PTR_MT_LDS_EMU:
|
||||
case PTR_MT_LDS:
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
//default for all other cases
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_access_qualifier
|
||||
GetOclAccessQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
switch (argInfo->arg.image.type) {
|
||||
case ACCESS_TYPE_RO:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case ACCESS_TYPE_WO:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
case ACCESS_TYPE_RW:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
}
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_type_qualifier
|
||||
GetOclTypeQual(const aclArgData* argInfo)
|
||||
{
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
if (argInfo->arg.pointer.isVolatile) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
if (argInfo->arg.pointer.isRestrict) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||||
}
|
||||
if (argInfo->isConst) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
}
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static int
|
||||
GetOclSize(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER: return sizeof(void *);
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
case DATATYPE_struct:
|
||||
return 1 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16:
|
||||
return 2 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32:
|
||||
return 4 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64:
|
||||
return 8 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_ERROR:
|
||||
default: return -1;
|
||||
}
|
||||
case ARG_TYPE_IMAGE: return sizeof(cl_mem);
|
||||
case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
KernelArg::KernelArg(aclArgData *argInfo) {
|
||||
argInfo_ = argInfo;
|
||||
name_ = argInfo_->argStr;
|
||||
typeName_ = argInfo->typeStr;
|
||||
}
|
||||
|
||||
int KernelArg::size() {
|
||||
switch (argInfo_->type) {
|
||||
case ARG_TYPE_POINTER: {
|
||||
return sizeof(void *);
|
||||
}
|
||||
case ARG_TYPE_VALUE: {
|
||||
switch (argInfo_->arg.value.data) {
|
||||
case DATATYPE_ERROR: {
|
||||
return -1;
|
||||
}
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
case DATATYPE_struct: {
|
||||
return 1 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16: {
|
||||
return 2 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32: {
|
||||
return 4 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64: {
|
||||
return 8 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
case ARG_TYPE_IMAGE: {
|
||||
return sizeof(cl_mem);
|
||||
}
|
||||
case ARG_TYPE_SAMPLER: {
|
||||
return sizeof(cl_sampler);
|
||||
}
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string& KernelArg::name() {
|
||||
return name_;
|
||||
}
|
||||
|
||||
std::string& KernelArg::typeName()
|
||||
{
|
||||
return typeName_;
|
||||
}
|
||||
|
||||
void
|
||||
Kernel::initArgList(const aclArgData* aclArg)
|
||||
{
|
||||
// Initialize the hsail argument list too
|
||||
initHsailArgs(aclArg);
|
||||
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
amd::KernelParameterDescriptor desc;
|
||||
size_t offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += ExtraArguments;
|
||||
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
|
||||
desc.name_ = hsailArgList_[i]->name_.c_str();
|
||||
desc.type_ = GetOclType(aclArg);
|
||||
desc.addressQualifier_ = GetOclAddrQual(aclArg);
|
||||
desc.accessQualifier_ = GetOclAccessQual(aclArg);
|
||||
desc.typeQualifier_ = GetOclTypeQual(aclArg);
|
||||
desc.typeName_ = hsailArgList_[i]->typeName_.c_str();
|
||||
|
||||
// Make a check if it is local or global
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
desc.size_ = 0;
|
||||
}
|
||||
else {
|
||||
desc.size_ = GetOclSize(aclArg);
|
||||
}
|
||||
|
||||
// Make offset alignment to match CPU metadata, since
|
||||
// in multidevice config abstraction layer has a single signature
|
||||
// and CPU sends the paramaters as they are allocated in memory
|
||||
size_t size = desc.size_;
|
||||
if (size == 0) {
|
||||
// Local memory for CPU
|
||||
size = sizeof(cl_mem);
|
||||
}
|
||||
offset = amd::alignUp(offset, std::min(size, size_t(16)));
|
||||
desc.offset_ = offset;
|
||||
offset += amd::alignUp(size, sizeof(uint32_t));
|
||||
params.push_back(desc);
|
||||
}
|
||||
createSignature(params);
|
||||
}
|
||||
|
||||
void
|
||||
Kernel::initHsailArgs(const aclArgData* aclArg)
|
||||
{
|
||||
int offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += ExtraArguments;
|
||||
|
||||
// Iterate through the each kernel argument
|
||||
for (; aclArg->struct_size != 0; aclArg++) {
|
||||
HsailKernelArg* arg = new HsailKernelArg;
|
||||
// Initialize HSAIL kernel argument
|
||||
arg->name_ = aclArg->argStr;
|
||||
arg->typeName_ = aclArg->typeStr;
|
||||
arg->size_ = GetHSAILArgSize(aclArg);
|
||||
arg->offset_ = offset;
|
||||
arg->type_ = GetHSAILArgType(aclArg);
|
||||
arg->addrQual_ = GetHSAILAddrQual(aclArg);
|
||||
arg->dataType_ = GetHSAILDataType(aclArg);
|
||||
// If vector of args we add additional arguments to flatten it out
|
||||
arg->numElem_ = ((aclArg->type == ARG_TYPE_VALUE) &&
|
||||
(aclArg->arg.value.data != DATATYPE_struct)) ?
|
||||
aclArg->arg.value.numElements : 1;
|
||||
arg->alignment_ = GetHSAILArgAlignment(aclArg);
|
||||
offset += GetHSAILArgSize(aclArg);
|
||||
hsailArgList_.push_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
Kernel::Kernel(std::string name,
|
||||
FSAILProgram* prog,
|
||||
HsaBrig* brig,
|
||||
std::string compileOptions):
|
||||
device::Kernel(name),
|
||||
program_(prog),
|
||||
compileOptions_(compileOptions),
|
||||
brig_(brig),
|
||||
kernelCode_(NULL),
|
||||
debugInfo_(NULL){
|
||||
}
|
||||
|
||||
bool Kernel::init(){
|
||||
acl_error errorCode;
|
||||
//compile kernel down to ISA
|
||||
const HsaDevice *hsaDevice = program_->hsaDevice();
|
||||
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
|
||||
HsaStatus status = hsacoreapi->HsaFinalizeBrig(
|
||||
hsaDevice, brig_,
|
||||
openClKernelName.c_str(),
|
||||
compileOptions_.c_str(),
|
||||
&kernelCode_,
|
||||
&debugInfo_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
return false;
|
||||
}
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
aclCompiler* compileHandle = program_->dev().compiler();
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(),
|
||||
NULL,
|
||||
&sizeOfArgList);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
char *argList = (char *)malloc(sizeOfArgList);
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(),
|
||||
argList,
|
||||
&sizeOfArgList);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
//Set the argList
|
||||
initArgList((const aclArgData *) argList);
|
||||
|
||||
//Pull out amdKernelInfo
|
||||
HsaKernelAmdInfo kernelAmdInfo;
|
||||
status = servicesapi->HsaGetKernelAmdInfo(kernelCode_, &kernelAmdInfo);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
return false;
|
||||
}
|
||||
HsaDeviceAmdInfo devInfo;
|
||||
status = servicesapi->HsaGetDeviceAmdInfo(hsaDevice, &devInfo);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
return false;
|
||||
}
|
||||
//Set the workgroup information for the kernel
|
||||
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
|
||||
workGroupInfo_.availableLDSSize_ = hsaDevice->group_memory_size;
|
||||
workGroupInfo_.availableSGPRs_ = devInfo.max_number_of_sgprs;
|
||||
workGroupInfo_.availableVGPRs_ = devInfo.max_number_of_vgprs;
|
||||
size_t sizeOfWorkGroupSize;
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(),
|
||||
NULL,
|
||||
&sizeOfWorkGroupSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(),
|
||||
workGroupInfo_.compileSize_,
|
||||
&sizeOfWorkGroupSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
//Setting it the same as used LDS
|
||||
workGroupInfo_.localMemSize_ = kernelCode_->workgroup_group_segment_byte_size;
|
||||
workGroupInfo_.privateMemSize_ = kernelCode_->workitem_private_segment_byte_size;
|
||||
workGroupInfo_.usedLDSSize_ = kernelCode_->workgroup_group_segment_byte_size;
|
||||
workGroupInfo_.preferredSizeMultiple_ = hsaDevice->wave_front_size;
|
||||
workGroupInfo_.usedSGPRs_ = kernelAmdInfo.wave_front_sgpr_count;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.usedVGPRs_ = kernelAmdInfo.work_item_vgpr_count;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = hsaDevice->max_waves_per_simd;
|
||||
workGroupInfo_.wavefrontSize_ = hsaDevice->wave_front_size;
|
||||
//TODO: Need to populate it from the shader object
|
||||
workGroupInfo_.size_ = 256;
|
||||
return true;
|
||||
}
|
||||
|
||||
Kernel::~Kernel() {
|
||||
while (!hsailArgList_.empty()) {
|
||||
HsailKernelArg* kernelArgPointer = hsailArgList_.back();
|
||||
delete kernelArgPointer;
|
||||
hsailArgList_.pop_back();
|
||||
}
|
||||
hsacoreapi->HsaFreeKernelCode(kernelCode_);
|
||||
hsacoreapi->HsaFreeKernelDebug(debugInfo_);
|
||||
}
|
||||
|
||||
} // namespace oclhsa
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
@@ -1,161 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef HSAKERNEL_HPP_
|
||||
#define HSAKERNEL_HPP_
|
||||
|
||||
#include "acl.h"
|
||||
#include "device/hsa/hsaprogram.hpp"
|
||||
#include "newcore.h"
|
||||
#include "top.hpp"
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
#define MAX_INFO_STRING_LEN 0x40
|
||||
enum HSAIL_ADDRESS_QUALIFIER{
|
||||
HSAIL_ADDRESS_ERROR=0,
|
||||
HSAIL_ADDRESS_GLOBAL,
|
||||
HSAIL_ADDRESS_LOCAL,
|
||||
HSAIL_MAX_ADDRESS_QUALIFIERS
|
||||
} ;
|
||||
|
||||
enum HSAIL_ARG_TYPE{
|
||||
HSAIL_ARGTYPE_ERROR=0,
|
||||
HSAIL_ARGTYPE_POINTER,
|
||||
HSAIL_ARGTYPE_VALUE,
|
||||
HSAIL_ARGTYPE_IMAGE,
|
||||
HSAIL_ARGTYPE_SAMPLER,
|
||||
HSAIL_ARGMAX_ARG_TYPES
|
||||
};
|
||||
|
||||
enum HSAIL_DATA_TYPE{
|
||||
HSAIL_DATATYPE_ERROR=0,
|
||||
HSAIL_DATATYPE_B1,
|
||||
HSAIL_DATATYPE_B8,
|
||||
HSAIL_DATATYPE_B16,
|
||||
HSAIL_DATATYPE_B32,
|
||||
HSAIL_DATATYPE_B64,
|
||||
HSAIL_DATATYPE_S8,
|
||||
HSAIL_DATATYPE_S16,
|
||||
HSAIL_DATATYPE_S32,
|
||||
HSAIL_DATATYPE_S64,
|
||||
HSAIL_DATATYPE_U8,
|
||||
HSAIL_DATATYPE_U16,
|
||||
HSAIL_DATATYPE_U32,
|
||||
HSAIL_DATATYPE_U64,
|
||||
HSAIL_DATATYPE_F16,
|
||||
HSAIL_DATATYPE_F32,
|
||||
HSAIL_DATATYPE_F64,
|
||||
HSAIL_DATATYPE_STRUCT,
|
||||
HSAIL_DATATYPE_OPAQUE,
|
||||
HSAIL_DATATYPE_MAX_TYPES
|
||||
};
|
||||
|
||||
struct HsailKernelArg
|
||||
{
|
||||
std::string name_; //!< Argument's name
|
||||
std::string typeName_; //!< Argument's type name
|
||||
uint size_; //!< Size in bytes
|
||||
uint offset_; //!< Argument's offset
|
||||
uint alignment_; //!< Argument's alignment
|
||||
HSAIL_ARG_TYPE type_; //!< Type of the argument
|
||||
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
|
||||
HSAIL_DATA_TYPE dataType_; //!< The type of data
|
||||
uint numElem_; //!< Number of elements
|
||||
};
|
||||
|
||||
class KernelArg
|
||||
{
|
||||
public:
|
||||
KernelArg(aclArgData* argInfo);
|
||||
//! Return type of the argument
|
||||
clk_value_type_t amdoclType();
|
||||
//! Global, local etc - returns amdocl types
|
||||
clk_address_space_t amdoclAddrQual();
|
||||
//! Global,localetc - returns opencl type
|
||||
cl_kernel_arg_address_qualifier oclAddrQual();
|
||||
//! read , write etc - returns amdocl type
|
||||
clk_arg_qualifier_t amdoclAccessQual();
|
||||
//! read , write etc - returns opencl type type
|
||||
cl_kernel_arg_access_qualifier oclAccessQual();
|
||||
//! const,volatile,restrict etc - returns opencl type type
|
||||
cl_kernel_arg_type_qualifier oclTypeQual();
|
||||
|
||||
//! Name of the argument
|
||||
std::string& name();
|
||||
//! Name of the argument
|
||||
std::string& typeName();
|
||||
//! reflection
|
||||
std::string reflection(){ return name(); };
|
||||
//! Returns the size of the argument
|
||||
int size();
|
||||
//! returns the offset
|
||||
int offset();
|
||||
|
||||
void setOffset();
|
||||
|
||||
private:
|
||||
aclArgData* argInfo_;
|
||||
int offset_;
|
||||
std::string name_;
|
||||
std::string typeName_;
|
||||
};
|
||||
|
||||
class Kernel : public device::Kernel
|
||||
{
|
||||
public:
|
||||
// Global offsets located in the first 3 elements
|
||||
static const uint ExtraArguments = 3;
|
||||
|
||||
Kernel(std::string name,
|
||||
FSAILProgram* prog,
|
||||
HsaBrig* brig,
|
||||
std::string compileOptions);
|
||||
|
||||
~Kernel();
|
||||
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init();
|
||||
|
||||
const FSAILProgram* program() {
|
||||
return static_cast<const FSAILProgram*>(program_);
|
||||
}
|
||||
|
||||
//! Returns the AqlKernel associated with this Kernel
|
||||
const HsaKernelCode* kernelCode() { return
|
||||
static_cast<const HsaKernelCode*>(kernelCode_);
|
||||
}
|
||||
|
||||
//! Returns the BRIG that was used to compile this kernel
|
||||
const HsaBrig* brig() {
|
||||
return static_cast<const HsaBrig*>(brig_);
|
||||
}
|
||||
|
||||
//!returns a pointer to the hsail argument at the specified index
|
||||
HsailKernelArg* hsailArgAt(size_t index) {
|
||||
return hsailArgList_[index];
|
||||
}
|
||||
|
||||
private:
|
||||
//! Populates hsailArgList_
|
||||
void initArgList(const aclArgData* aclArg);
|
||||
|
||||
//! Initializes Hsail Argument metadata and info ;
|
||||
void initHsailArgs(const aclArgData* aclArg);
|
||||
|
||||
FSAILProgram *program_; //!< The oclhsa::FSAILProgram context
|
||||
std::vector<HsailKernelArg*> hsailArgList_; //!< Vector list of HSAIL Arguments
|
||||
std::string compileOptions_; //!< compile used for finalizing this kernel
|
||||
HsaBrig* brig_; //!< The brig used to generate ISA for this kernel
|
||||
HsaKernelCode* kernelCode_; //!< AQL kernel code for this kernel
|
||||
HsaKernelDebug* debugInfo_; //!< Dwarf info for this kernel
|
||||
};
|
||||
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
|
||||
#endif // HSAKERNEL_HPP_
|
||||
|
||||
@@ -1,938 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
#include "device/device.hpp"
|
||||
#include "device/hsa/hsamemory.hpp"
|
||||
#include "device/hsa/hsadevice.hpp"
|
||||
#include "device/hsa/hsablit.hpp"
|
||||
#include "device/hsa/oclhsa_common.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "platform/sampler.hpp"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
/////////////////////////////////oclhsa::Memory//////////////////////////////
|
||||
Memory::Memory(const oclhsa::Device &dev, amd::Memory &owner)
|
||||
: device::Memory(owner),
|
||||
dev_(dev),
|
||||
deviceMemory_(NULL),
|
||||
interopType_(InteropNone)
|
||||
{
|
||||
}
|
||||
|
||||
Memory::~Memory()
|
||||
{}
|
||||
|
||||
bool
|
||||
Memory::allocateMapMemory(size_t allocationSize)
|
||||
{
|
||||
assert(mapMemory_ == NULL);
|
||||
|
||||
void *mapData = NULL;
|
||||
|
||||
// Use/reuse system memory from HSA system memory pool as backing
|
||||
// storage of the map target.
|
||||
if (kHsaStatusSuccess !=
|
||||
servicesapi->HsaAllocateSystemMemory(
|
||||
owner()->getSize(), 0, kHsaSystemMemoryTypeDefault, &mapData)) {
|
||||
LogError("[OCL] Fail to allocate the backing storage for map target");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create buffer object to contain the map target.
|
||||
amd::Memory *mapMemory =
|
||||
new(owner()->getContext()) amd::Buffer(
|
||||
owner()->getContext(), CL_MEM_USE_HOST_PTR, owner()->getSize());
|
||||
|
||||
if ((mapMemory == NULL) || (!mapMemory->create(mapData))) {
|
||||
LogError("[OCL] Fail to allocate map target object");
|
||||
servicesapi->HsaFreeSystemMemory(mapData);
|
||||
if (mapMemory) {
|
||||
mapMemory->release();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
mapMemory_ = mapMemory;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Memory::freeMapMemory()
|
||||
{
|
||||
// Return the memory to HSA system memory pool.
|
||||
assert(mapMemory_ != NULL);
|
||||
servicesapi->HsaFreeSystemMemory(mapMemory_->getHostMem());
|
||||
|
||||
// Release the buffer object containing the map data.
|
||||
mapMemory_->release();
|
||||
mapMemory_ = NULL;
|
||||
}
|
||||
|
||||
void *
|
||||
Memory::allocMapTarget(const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch)
|
||||
{
|
||||
// Map/Unmap must be serialized.
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
incIndMapCount();
|
||||
|
||||
// If the device backing storage is direct accessible, use it.
|
||||
if (isHostMemDirectAccess()) {
|
||||
return (static_cast<char *>(deviceMemory_) + origin[0]);
|
||||
}
|
||||
|
||||
// Otherwise, check for host memory.
|
||||
void *hostMem = owner()->getHostMem();
|
||||
if (hostMem != NULL) {
|
||||
return (static_cast<char *>(hostMem) + origin[0]);
|
||||
}
|
||||
|
||||
// Allocate one if needed.
|
||||
if (indirectMapCount_ == 1) {
|
||||
if (!allocateMapMemory(owner()->getSize())) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Did the map resource allocation fail?
|
||||
if (mapMemory_ == NULL) {
|
||||
LogError("Could not map target resource");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return (static_cast<char *>(mapMemory_->getHostMem()) + origin[0]);
|
||||
}
|
||||
|
||||
void
|
||||
Memory::decIndMapCount()
|
||||
{
|
||||
// Map/Unmap must be serialized.
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
if (indirectMapCount_ == 0) {
|
||||
LogError("decIndMapCount() called when indirectMapCount_ already zero");
|
||||
return;
|
||||
}
|
||||
|
||||
// Decrement the counter and release indirect map if it's the last op
|
||||
if (--indirectMapCount_ == 0 &&
|
||||
mapMemory_ != NULL) {
|
||||
freeMapMemory();
|
||||
}
|
||||
}
|
||||
|
||||
void *
|
||||
Memory::cpuMap(
|
||||
device::VirtualDevice& vDev,
|
||||
uint flags,
|
||||
uint startLayer,
|
||||
uint numLayers,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch
|
||||
)
|
||||
{
|
||||
// Create the map target.
|
||||
void * mapTarget =
|
||||
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch);
|
||||
|
||||
// Sync to map target if no direct access.
|
||||
if (!isHostMemDirectAccess()) {
|
||||
if (!vDev.blitMgr().readBuffer(
|
||||
*this, mapTarget, amd::Coord3D(0), amd::Coord3D(size()), true)) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return mapTarget;
|
||||
}
|
||||
|
||||
void
|
||||
Memory::cpuUnmap(device::VirtualDevice& vDev)
|
||||
{
|
||||
// Sync to device backing storage if no direct access.
|
||||
if (!isHostMemDirectAccess()) {
|
||||
if (!vDev.blitMgr().writeBuffer(
|
||||
mapMemory_->getHostMem(), *this, amd::Coord3D(0),
|
||||
amd::Coord3D(size()), true)) {
|
||||
LogError("[OCL] Fail sync the device memory on cpuUnmap");
|
||||
}
|
||||
}
|
||||
|
||||
decIndMapCount();
|
||||
}
|
||||
|
||||
void Memory::destroyInterop()
|
||||
{
|
||||
HsaStatus status;
|
||||
#ifdef _WIN32
|
||||
if (interopType_ == InteropD3D10) {
|
||||
HsaStatus status = hsacoreapi->HsaUnmapD3D10Resource(
|
||||
dev_.getBackendDevice(), d3d10Resource_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaUnmapD3D10Resource");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
else if (interopType_ == InteropD3D11) {
|
||||
HsaStatus status = hsacoreapi->HsaUnmapD3D11Resource(
|
||||
dev_.getBackendDevice(), d3d11Resource_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaUnmapD3D11Resource");
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (interopType_ == InteropGL) {
|
||||
void * glContext =owner()->getContext().info().hCtx_;
|
||||
status = hsacoreapi->HsaReleaseGLResources( dev_.getBackendDevice(),
|
||||
glContext,
|
||||
&glResource_,
|
||||
1);
|
||||
if (kHsaStatusSuccess != status) {
|
||||
LogError("[OCL] Fail on HsaReleaseGLResources");
|
||||
}
|
||||
|
||||
status = hsacoreapi->HsaUnmapGLResource(
|
||||
dev_.getBackendDevice(), glContext, &glResource_);
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaUnmapGLResource");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Memory::isHsaLocalMemory() const {
|
||||
if (owner()->isInterop()) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (amd::Is64Bits()) {
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(deviceMemory_);
|
||||
|
||||
// Fast check: in 64 bits, CPU can only access the high area
|
||||
// (VA[63:47] == 0x1FFFF) and low area (VA[63:47 == 0).
|
||||
// Reference: GFXIP7_ShaderIO_Delt.doc
|
||||
addr >>= 47; // discard least significant 47 bits
|
||||
return (addr != 0x1FFFF && addr != 0);
|
||||
}
|
||||
else {
|
||||
const HsaMemoryDescriptor &memDesc =
|
||||
dev_.getBackendDevice()->memory_descriptors[0];
|
||||
|
||||
if (memDesc.heap_type == kHsaHeapTypeFrameBufferPrivate) {
|
||||
const uintptr_t addr =
|
||||
reinterpret_cast<uintptr_t>(deviceMemory_);
|
||||
const uintptr_t gpuvmBase = memDesc.virtual_base_address;
|
||||
const size_t size = memDesc.size_in_bytes;
|
||||
return (addr >= gpuvmBase && addr < (gpuvmBase + size));
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/////////////////////////////////oclhsa::Buffer//////////////////////////////
|
||||
|
||||
Buffer::Buffer(const oclhsa::Device &dev, amd::Memory &owner)
|
||||
: oclhsa::Memory(dev, owner)
|
||||
{}
|
||||
|
||||
Buffer::~Buffer()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Buffer::destroy()
|
||||
{
|
||||
if (owner()->parent() != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (owner()->isInterop()) {
|
||||
destroyInterop();
|
||||
return;
|
||||
}
|
||||
|
||||
if (isHostMemoryRegistered()) {
|
||||
hsacoreapi->HsaDeregisterSystemMemory(deviceMemory_);
|
||||
}
|
||||
else {
|
||||
if (!isHostMemDirectAccess()) {
|
||||
hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
|
||||
}
|
||||
else if (deviceMemory_ != owner()->getHostMem()) {
|
||||
// if they are identical, the host pointer will be
|
||||
// deallocated later on => avoid double deallocation
|
||||
hsacoreapi->HsaAmdFreeSystemMemory(deviceMemory_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Buffer::createInterop()
|
||||
{
|
||||
amd::InteropObject *interopObject = owner()->getInteropObj();
|
||||
|
||||
#ifdef _WIN32
|
||||
if (interopObject->asD3D10Object() != NULL) {
|
||||
amd::D3D10Object *d3d10Object = interopObject->asD3D10Object();
|
||||
// 1. Get the D3D11 resource
|
||||
ID3D10Resource *resource = d3d10Object->getD3D10Resource();
|
||||
ID3D10Buffer *d3d10Buffer = static_cast<ID3D10Buffer *>(resource);
|
||||
|
||||
HsaStatus status = hsacoreapi->HsaMapD3D10Buffer(
|
||||
dev_.getBackendDevice(), d3d10Buffer, &deviceMemory_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaMapD3D10Buffer");
|
||||
return false;
|
||||
}
|
||||
interopType_ = InteropD3D10;
|
||||
d3d10Resource_ = d3d10Buffer;
|
||||
}
|
||||
|
||||
if (interopObject->asD3D11Object() != NULL) {
|
||||
amd::D3D11Object *d3d11Object = interopObject->asD3D11Object();
|
||||
// 1. Get the D3D11 resource
|
||||
ID3D11Resource *resource = d3d11Object->getD3D11Resource();
|
||||
ID3D11Buffer *d3d11Buffer = static_cast<ID3D11Buffer *>(resource);
|
||||
|
||||
HsaStatus status = hsacoreapi->HsaMapD3D11Buffer(
|
||||
dev_.getBackendDevice(), d3d11Buffer, &deviceMemory_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaMapD3D10Buffer");
|
||||
return false;
|
||||
}
|
||||
interopType_ = InteropD3D11;
|
||||
d3d11Resource_ = d3d11Buffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (interopObject->asBufferGL()) {
|
||||
amd::BufferGL *buffer_gl = interopObject->asBufferGL();
|
||||
HsaGLResource gl_resource = {0};
|
||||
gl_resource.name = buffer_gl->getGLName();
|
||||
gl_resource.type = buffer_gl->getGLInternalFormat();
|
||||
|
||||
void * glContext =owner()->getContext().info().hCtx_;
|
||||
HsaStatus status = hsacoreapi->HsaMapGLBuffer(
|
||||
dev_.getBackendDevice(), glContext, &gl_resource, &deviceMemory_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaMapGLBuffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
status = hsacoreapi->HsaAcquireGLResources( dev_.getBackendDevice(),
|
||||
glContext,
|
||||
&gl_resource,
|
||||
1);
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaAcquireGLResources");
|
||||
return false;
|
||||
}
|
||||
interopType_ = InteropGL;
|
||||
glResource_ = gl_resource;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Buffer::create()
|
||||
{
|
||||
if (owner()->parent()) {
|
||||
// Sub-Buffer creation.
|
||||
oclhsa::Memory *parentBuffer =
|
||||
static_cast<oclhsa::Memory *>(owner()->parent()->getDeviceMemory(dev_));
|
||||
|
||||
if (parentBuffer == NULL) {
|
||||
LogError("[OCL] Fail to allocate parent buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t offset = owner()->getOrigin();
|
||||
deviceMemory_ =
|
||||
static_cast<char *>(parentBuffer->getDeviceMemory()) + offset;
|
||||
|
||||
void* parentHostPtr = parentBuffer->owner()->getHostMem();
|
||||
if (parentHostPtr) {
|
||||
owner()->setHostMem(static_cast<char *>(parentHostPtr) + offset);
|
||||
}
|
||||
|
||||
flags_ |= owner()->parent()->getMemFlags();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate backing storage in device local memory unless UHP or AHP are set
|
||||
const cl_mem_flags memFlags = owner()->getMemFlags();
|
||||
if (!(memFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
|
||||
bool useDeviceMemory = dev_.settings().enableLocalMemory_;
|
||||
size_t alignment = static_cast<size_t>(dev_.info().memBaseAddrAlign_);
|
||||
if (useDeviceMemory) {
|
||||
hsacoreapi->HsaAllocateDeviceMemory(
|
||||
size(), alignment, dev_.getBackendDevice(), &deviceMemory_);
|
||||
if (deviceMemory_ && (memFlags & CL_MEM_COPY_HOST_PTR)) {
|
||||
bool ret = dev_.xferMgr().writeBuffer(owner()->getHostMem(), *this,
|
||||
amd::Coord3D(0), amd::Coord3D(size()), true);
|
||||
if (!ret) {
|
||||
hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
|
||||
deviceMemory_ = NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
// if device memory is depleted, do not fall back to system memory
|
||||
return deviceMemory_ != NULL;
|
||||
}
|
||||
else if (!(owner()->getHostMem())) {
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
deviceMemory_ = dev_.hostAlloc(size(), alignment);
|
||||
// no need to copy - otherwise, the host pointer will not be NULL
|
||||
return deviceMemory_ != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
void* hostMem = owner()->getHostMem();
|
||||
assert(hostMem);
|
||||
// If there is a host ptr, then register it only if it was not allocated,
|
||||
// (=> allocated by us)
|
||||
if (!(owner()->getHostMemRef()->alloced())) {
|
||||
// Reuse existing host memory for the backing storage and register it.
|
||||
//
|
||||
// SVM precludes a possible 64-bits optimization in which host buffers
|
||||
// allocated by the user (UHP) in the default, coherent space could be
|
||||
// mapped into the non-coherent space by means of CreateFileMapping/mmap
|
||||
// without copying any data (the "device memory" would be the
|
||||
// non-coherent buffer).
|
||||
// The optimization cannot be applied because regular buffers allocated
|
||||
// using UHP are expected to have same characteristics as the original
|
||||
// buffer, i.e., if the original buffer supports atomics then the
|
||||
// corresponding OpenCL buffer will support atomics too.
|
||||
flags_ |= HostMemoryRegistered;
|
||||
if (hsacoreapi->HsaRegisterSystemMemory(hostMem, size()) != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Failed to register system memory");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
deviceMemory_ = hostMem;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Buffer::recreate(size_t newSize, size_t newAlignment, bool forceSystem) {
|
||||
const size_t memFlag = static_cast<size_t>(owner()->getMemFlags());
|
||||
if ((memFlag & CL_MEM_ALLOC_HOST_PTR) ||
|
||||
(memFlag & CL_MEM_USE_HOST_PTR) ||
|
||||
!dev_.settings().enableLocalMemory_) {
|
||||
forceSystem = true;
|
||||
}
|
||||
|
||||
void *newDeviceMemory = NULL;
|
||||
uint hostDirectAccess = 0;
|
||||
|
||||
if (forceSystem) {
|
||||
newDeviceMemory = dev_.hostAlloc(newSize, newAlignment);
|
||||
if (newDeviceMemory == NULL) {
|
||||
LogError("[OCL] Fail to reallocate system memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy the old data to the new memory location.
|
||||
if (!dev_.xferMgr().readBuffer(*this, newDeviceMemory,
|
||||
amd::Coord3D(0),
|
||||
amd::Coord3D(size()),
|
||||
true)) {
|
||||
LogError("[OCL] Fail to copy the current value");
|
||||
dev_.hostFree(newDeviceMemory);
|
||||
newDeviceMemory = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
hostDirectAccess = HostMemoryDirectAccess;
|
||||
}
|
||||
else {
|
||||
hsacoreapi->HsaAllocateDeviceMemory(
|
||||
newSize, newAlignment, dev_.getBackendDevice(), &newDeviceMemory);
|
||||
|
||||
if (newDeviceMemory == NULL) {
|
||||
LogError("[OCL] Fail to reallocate device local memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(
|
||||
amd::isMultipleOf(static_cast<char *>(newDeviceMemory),
|
||||
newAlignment));
|
||||
|
||||
// Copy the old data to the new memory location.
|
||||
if (!dev_.xferMgr().readBuffer(
|
||||
*this, newDeviceMemory, amd::Coord3D(0), amd::Coord3D(size()),
|
||||
true)) {
|
||||
LogError("[OCL] Fail to copy the current value");
|
||||
hsacoreapi->HsaFreeDeviceMemory(newDeviceMemory);
|
||||
newDeviceMemory = NULL;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
destroy();
|
||||
|
||||
deviceMemory_ = newDeviceMemory;
|
||||
|
||||
if ((memFlag & CL_MEM_ALLOC_HOST_PTR) &&
|
||||
(owner()->getContext().devices().size() == 1)) {
|
||||
owner()->setHostMem(deviceMemory_);
|
||||
}
|
||||
|
||||
flags_ &= (~HostMemoryDirectAccess & ~HostMemoryRegistered);
|
||||
flags_ |= hostDirectAccess;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/////////////////////////////////oclhsa::Image//////////////////////////////
|
||||
|
||||
Image::Image(const oclhsa::Device& dev, amd::Memory& owner) :
|
||||
oclhsa::Memory(dev, owner)
|
||||
{
|
||||
flags_ &= (~HostMemoryDirectAccess & ~HostMemoryRegistered);
|
||||
populateImageDescriptor();
|
||||
}
|
||||
|
||||
struct ImageFormatLayout {
|
||||
cl_image_format clFormat;
|
||||
HsaImageFormat hsaFormat;
|
||||
};
|
||||
|
||||
static const ImageFormatLayout
|
||||
ImageFormatLayoutMap[] = {
|
||||
{ { CL_R, CL_UNORM_INT8 }, HSA_IMAGE_FMT_R8_UNORM },
|
||||
{ { CL_R, CL_UNORM_INT16}, HSA_IMAGE_FMT_R16_UNORM },
|
||||
{ { CL_R, CL_SNORM_INT8 }, HSA_IMAGE_FMT_R8_SNORM },
|
||||
{ { CL_R, CL_SNORM_INT16}, HSA_IMAGE_FMT_R16_SNORM },
|
||||
{ { CL_R, CL_SIGNED_INT8}, HSA_IMAGE_FMT_R8_SINT },
|
||||
{ { CL_R, CL_SIGNED_INT16}, HSA_IMAGE_FMT_R16_SINT},
|
||||
{ { CL_R, CL_SIGNED_INT32}, HSA_IMAGE_FMT_R32_SINT},
|
||||
{ { CL_R, CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8_UINT },
|
||||
{ { CL_R, CL_UNSIGNED_INT16}, HSA_IMAGE_FMT_R16_UINT},
|
||||
{ { CL_R, CL_UNSIGNED_INT32}, HSA_IMAGE_FMT_R32_UINT},
|
||||
{ { CL_R, CL_HALF_FLOAT}, HSA_IMAGE_FMT_R_HALFFLOAT},
|
||||
{ { CL_R, CL_FLOAT }, HSA_IMAGE_FMT_R_FLOAT},
|
||||
{ { CL_A, CL_UNORM_INT8 }, HSA_IMAGE_FMT_A8_UNORM},
|
||||
{ { CL_A, CL_UNORM_INT16 }, HSA_IMAGE_FMT_A16_UNORM},
|
||||
{ { CL_A, CL_SNORM_INT8 }, HSA_IMAGE_FMT_A8_SNORM},
|
||||
{ { CL_A, CL_SNORM_INT16 }, HSA_IMAGE_FMT_A16_SNORM},
|
||||
{ { CL_A, CL_SIGNED_INT8 }, HSA_IMAGE_FMT_A8_SINT},
|
||||
{ { CL_A, CL_SIGNED_INT16 },HSA_IMAGE_FMT_A16_SINT},
|
||||
{ { CL_A, CL_SIGNED_INT32}, HSA_IMAGE_FMT_A32_SINT},
|
||||
{ { CL_A, CL_UNSIGNED_INT8 },HSA_IMAGE_FMT_A8_UINT},
|
||||
{ { CL_A, CL_UNSIGNED_INT16}, HSA_IMAGE_FMT_A16_UINT},
|
||||
{ { CL_A, CL_UNSIGNED_INT32}, HSA_IMAGE_FMT_A32_UINT},
|
||||
{ { CL_A, CL_HALF_FLOAT}, HSA_IMAGE_FMT_A_HALFFLOAT},
|
||||
{ { CL_A, CL_FLOAT}, HSA_IMAGE_FMT_A_FLOAT},
|
||||
{ { CL_RG,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8G8_UNORM},
|
||||
{ { CL_RG,CL_UNORM_INT16},HSA_IMAGE_FMT_R16G16_UNORM},
|
||||
{ { CL_RG,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8G8_SNORM},
|
||||
{ { CL_RG,CL_SNORM_INT16},HSA_IMAGE_FMT_R16G16_SNORM},
|
||||
{ { CL_RG,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8G8_SINT},
|
||||
{ { CL_RG,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16G16_SINT},
|
||||
{ { CL_RG,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32G32_SINT},
|
||||
{ { CL_RG,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8G8_UINT},
|
||||
{ { CL_RG,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16G16_UINT},
|
||||
{ { CL_RG,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32G32_UINT},
|
||||
{ { CL_RG,CL_HALF_FLOAT},HSA_IMAGE_FMT_RG_HALFFLOAT},
|
||||
{ { CL_RG,CL_FLOAT},HSA_IMAGE_FMT_RG_FLOAT},
|
||||
{ { CL_RA,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8A8_UNORM},
|
||||
{ { CL_RA,CL_UNORM_INT16},HSA_IMAGE_FMT_R16A16_UNORM},
|
||||
{ { CL_RA,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8A8_SNORM},
|
||||
{ { CL_RA,CL_SNORM_INT16},HSA_IMAGE_FMT_R16A16_SNORM},
|
||||
{ { CL_RA,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8A8_SINT},
|
||||
{ { CL_RA,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16A16_SINT},
|
||||
{ { CL_RA,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32A32_SINT},
|
||||
{ { CL_RA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8A8_UINT},
|
||||
{ { CL_RA,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16A16_UINT},
|
||||
{ { CL_RA,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32A32_UINT},
|
||||
{ { CL_RA,CL_HALF_FLOAT},HSA_IMAGE_FMT_RA_HALFFLOAT},
|
||||
{ { CL_RA,CL_FLOAT},HSA_IMAGE_FMT_RA_FLOAT},
|
||||
{ { CL_RGBA,CL_UNORM_INT8}, HSA_IMAGE_FMT_R8G8B8A8_UNORM},
|
||||
{ { CL_RGBA,CL_UNORM_INT16},HSA_IMAGE_FMT_R16G16B16A16_UNORM},
|
||||
{ { CL_RGBA,CL_SNORM_INT8}, HSA_IMAGE_FMT_R8G8B8A8_SNORM},
|
||||
{ { CL_RGBA,CL_SNORM_INT16},HSA_IMAGE_FMT_R16G16B16A16_SNORM},
|
||||
{ { CL_RGBA,CL_SIGNED_INT8},HSA_IMAGE_FMT_R8G8B8A8_SINT},
|
||||
{ { CL_RGBA,CL_SIGNED_INT16},HSA_IMAGE_FMT_R16G16B16A16_SINT},
|
||||
{ { CL_RGBA,CL_SIGNED_INT32},HSA_IMAGE_FMT_R32G32B32A32_SINT},
|
||||
{ { CL_RGBA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_R8G8B8A8_UINT},
|
||||
{ { CL_RGBA,CL_UNSIGNED_INT16},HSA_IMAGE_FMT_R16G16B16A16_UINT},
|
||||
{ { CL_RGBA,CL_UNSIGNED_INT32},HSA_IMAGE_FMT_R32G32B32A32_UINT},
|
||||
{ { CL_RGBA,CL_HALF_FLOAT},HSA_IMAGE_FMT_RGBA_HALFFLOAT},
|
||||
{ { CL_RGBA,CL_FLOAT},HSA_IMAGE_FMT_RGBA_FLOAT},
|
||||
{ { CL_ARGB,CL_UNORM_INT8},HSA_IMAGE_FMT_A8R8G8B8_UNORM},
|
||||
{ { CL_ARGB,CL_SNORM_INT8},HSA_IMAGE_FMT_A8R8G8B8_SNORM},
|
||||
{ { CL_ARGB,CL_SIGNED_INT8},HSA_IMAGE_FMT_A8R8G8B8_SINT},
|
||||
{ { CL_ARGB,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_A8R8G8B8_UINT},
|
||||
{ { CL_BGRA,CL_UNORM_INT8},HSA_IMAGE_FMT_B8G8R8A8_UNORM},
|
||||
{ { CL_BGRA,CL_SNORM_INT8},HSA_IMAGE_FMT_B8G8R8A8_SNORM},
|
||||
{ { CL_BGRA,CL_SIGNED_INT8},HSA_IMAGE_FMT_B8G8R8A8_SINT},
|
||||
{ { CL_BGRA,CL_UNSIGNED_INT8},HSA_IMAGE_FMT_B8G8R8A8_UINT},
|
||||
{ {CL_LUMINANCE,CL_SNORM_INT8}, HSA_IMAGE_FMT_L8_SNORM},
|
||||
{ {CL_LUMINANCE,CL_SNORM_INT16},HSA_IMAGE_FMT_L16_SNORM},
|
||||
{ {CL_LUMINANCE,CL_UNORM_INT8},HSA_IMAGE_FMT_L8_UNORM},
|
||||
{ {CL_LUMINANCE,CL_UNORM_INT16},HSA_IMAGE_FMT_L16_UNORM},
|
||||
{ {CL_LUMINANCE,CL_HALF_FLOAT},HSA_IMAGE_FMT_L_HALFFLOAT},
|
||||
{ {CL_LUMINANCE,CL_FLOAT},HSA_IMAGE_FMT_L_FLOAT},
|
||||
{ {CL_INTENSITY,CL_SNORM_INT8}, HSA_IMAGE_FMT_I8_SNORM},
|
||||
{ {CL_INTENSITY,CL_SNORM_INT16},HSA_IMAGE_FMT_I16_SNORM},
|
||||
{ {CL_INTENSITY,CL_UNORM_INT8},HSA_IMAGE_FMT_I8_UNORM},
|
||||
{ {CL_INTENSITY,CL_UNORM_INT16},HSA_IMAGE_FMT_I16_UNORM},
|
||||
{ {CL_INTENSITY,CL_HALF_FLOAT},HSA_IMAGE_FMT_I_HALFFLOAT},
|
||||
{ {CL_INTENSITY,CL_FLOAT},HSA_IMAGE_FMT_I_FLOAT},
|
||||
{ {CL_RGB, CL_UNORM_SHORT_565},HSA_IMAGE_FMT_R5G6B5_UNORM},
|
||||
{ {CL_RGB, CL_UNORM_SHORT_555},HSA_IMAGE_FMT_R5G5B5_UNORM},
|
||||
{ {CL_RGB, CL_UNORM_INT_101010},HSA_IMAGE_FMT_R10G10B10_UNORM}
|
||||
};
|
||||
|
||||
void
|
||||
Image::populateImageDescriptor()
|
||||
{
|
||||
amd::Image* image = owner()->asImage();
|
||||
|
||||
// build HSA runtime image descriptor
|
||||
imageDescriptor_.width = image->getWidth();
|
||||
imageDescriptor_.height = image->getHeight();
|
||||
imageDescriptor_.depth = image->getDepth();
|
||||
imageDescriptor_.arraySize = 0;
|
||||
|
||||
// Device specific image does not require rowpitch/slicepitch information.
|
||||
// Only image buffer is required to specify rowpitch size.
|
||||
imageDescriptor_.rowPitchInBytes = 0;
|
||||
imageDescriptor_.slicePitchInBytes = 0;
|
||||
|
||||
switch (image->getType())
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_1D;
|
||||
imageDescriptor_.height = 1;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_1DBuffer;
|
||||
imageDescriptor_.height = 1;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
//@todo - arraySize = height ?!
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_1DArray;
|
||||
imageDescriptor_. height = 1;
|
||||
imageDescriptor_.arraySize = image->getHeight();
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_2D;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
//@todo - arraySize = depth ?!
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_2DArray;
|
||||
imageDescriptor_.depth = 1;
|
||||
imageDescriptor_.arraySize = image->getDepth();
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageDescriptor_.geometry = HSA_GEOMETRY_3D;
|
||||
break;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < sizeof(ImageFormatLayoutMap) / sizeof(ImageFormatLayout); ++i) {
|
||||
if ((image->getImageFormat().image_channel_data_type ==
|
||||
ImageFormatLayoutMap[i].clFormat.image_channel_data_type) &&
|
||||
(image->getImageFormat().image_channel_order ==
|
||||
ImageFormatLayoutMap[i].clFormat.image_channel_order)) {
|
||||
imageDescriptor_.format = ImageFormatLayoutMap[i].hsaFormat;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Image::createInterop() {
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
amd::InteropObject *interopObject = owner()->getInteropObj();
|
||||
void *hsaImageObjectInterop = NULL;
|
||||
size_t hsaImageObjectInteropSize = 0;
|
||||
#ifdef _WIN32
|
||||
if (interopObject->asD3D10Object()) {
|
||||
amd::D3D10Object *d3d10Object = interopObject->asD3D10Object();
|
||||
// 1. Get the D3D11 resource
|
||||
ID3D10Resource *resource = d3d10Object->getD3D10Resource();
|
||||
HsaStatus status = hsacoreapi->HsaMapD3D10Texture(
|
||||
dev_.getBackendDevice(), resource, &hsaImageObjectInterop,
|
||||
&hsaImageObjectInteropSize, kHsaMapFlagsReadWrite);
|
||||
if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0 ) {
|
||||
LogError("[OCL] Fail on HsaMapD3D10Texture");
|
||||
return false;
|
||||
}
|
||||
interopType_ = InteropD3D10;
|
||||
d3d10Resource_ = resource;
|
||||
}
|
||||
|
||||
if (interopObject->asD3D11Object()) {
|
||||
amd::D3D11Object *d3d11Object = interopObject->asD3D11Object();
|
||||
|
||||
// 1. Get the D3D11 resource
|
||||
ID3D11Resource *resource = d3d11Object->getD3D11Resource();
|
||||
HsaStatus status = hsacoreapi->HsaMapD3D11Texture(
|
||||
dev_.getBackendDevice(), resource, &hsaImageObjectInterop,
|
||||
&hsaImageObjectInteropSize, kHsaMapFlagsReadWrite,
|
||||
d3d11Object->getPlane());
|
||||
if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0 ) {
|
||||
LogError("[OCL] Fail on HsaMapD3D11Texture");
|
||||
return false;
|
||||
}
|
||||
interopType_ = InteropD3D11;
|
||||
d3d11Resource_ = resource;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (interopObject->asGLObject()) {
|
||||
amd::GLObject* gl_object = interopObject->asGLObject();
|
||||
HsaGLResource gl_resource = {0};
|
||||
gl_resource.name = gl_object->getGLName();
|
||||
if (gl_object->getGLTarget() != GL_TEXTURE_CUBE_MAP) {
|
||||
gl_resource.type = gl_object->getGLTarget();
|
||||
}
|
||||
else {
|
||||
gl_resource.type = gl_object->getCubemapFace();
|
||||
}
|
||||
gl_resource.mipmap_level = gl_object->getGLMipLevel();
|
||||
|
||||
void * glContext =owner()->getContext().info().hCtx_;
|
||||
|
||||
// Get the texture SRD.
|
||||
HsaStatus status = hsacoreapi->HsaMapGLTexture(
|
||||
dev_.getBackendDevice(), glContext, &gl_resource,
|
||||
&hsaImageObjectInterop, &hsaImageObjectInteropSize);
|
||||
if (status != kHsaStatusSuccess || hsaImageObjectInteropSize == 0) {
|
||||
LogError("[OCL] Fail on HsaMapGLTexture");
|
||||
return false;
|
||||
}
|
||||
|
||||
status = hsacoreapi->HsaAcquireGLResources( dev_.getBackendDevice(),
|
||||
glContext,
|
||||
&gl_resource,
|
||||
1);
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaAcquireGLResources");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the flat address for texture buffer.
|
||||
if (owner()->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
|
||||
// Map the texture buffer resource as buffer.
|
||||
HsaStatus status = hsacoreapi->HsaMapGLBuffer(
|
||||
dev_.getBackendDevice(), glContext, &gl_resource,
|
||||
&deviceMemory_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail on HsaMapGLBuffer");
|
||||
return false;
|
||||
}
|
||||
// Sanity check.
|
||||
assert((deviceMemory_ != NULL) &&
|
||||
"deviceMemory_ should not be \
|
||||
NULL upon successful return from HsaMapGLBuffer");
|
||||
}
|
||||
|
||||
interopType_ = InteropGL;
|
||||
glResource_ = gl_resource;
|
||||
}
|
||||
|
||||
// Populate HSA specific information to the interop image object.
|
||||
HsaStatus status = hsacoreapi->HsaAmdCreateDeviceImageView(
|
||||
&imageDescriptor_, hsaImageObjectInterop, hsaImageObject_);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail to tranform interop image SRD");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Image::create()
|
||||
{
|
||||
if (owner()->parent()) {
|
||||
// Image view creation
|
||||
oclhsa::Image *parentImage =
|
||||
static_cast<oclhsa::Image *>(owner()->parent()->getDeviceMemory(dev_));
|
||||
|
||||
if (parentImage == NULL) {
|
||||
LogError("[OCL] Fail to allocate parent image");
|
||||
return false;
|
||||
}
|
||||
|
||||
return createView(*parentImage);
|
||||
}
|
||||
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
// Get memory size requirement for device specific image.
|
||||
HsaStatus status = hsacoreapi->HsaGetDeviceImageInfo(
|
||||
dev_.getBackendDevice(), &imageDescriptor_,
|
||||
&deviceImageInfo_);
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail to allocate image memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev_.settings().enableLocalMemory_) {
|
||||
status = hsacoreapi->HsaAllocateDeviceMemory(
|
||||
deviceImageInfo_.imageSizeInBytes,
|
||||
deviceImageInfo_.imageAlignmentInBytes,
|
||||
dev_.getBackendDevice(),
|
||||
&deviceMemory_);
|
||||
} else {
|
||||
status = servicesapi->HsaAllocateSystemMemory(
|
||||
deviceImageInfo_.imageSizeInBytes,
|
||||
deviceImageInfo_.imageAlignmentInBytes,
|
||||
kHsaSystemMemoryTypeDefault,
|
||||
&deviceMemory_);
|
||||
}
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail to allocate image memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(amd::isMultipleOf(
|
||||
deviceMemory_, deviceImageInfo_.imageAlignmentInBytes));
|
||||
|
||||
status = hsacoreapi->HsaCreateDeviceImage(
|
||||
dev_.getBackendDevice(), &imageDescriptor_,
|
||||
deviceMemory_, &hsaImageObject_[0]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Image::createView(Image &parent)
|
||||
{
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
if (parent.owner()->asBuffer()) {
|
||||
// Get new texture SRD since parent is a buffer.
|
||||
deviceMemory_ = parent.getDeviceMemory();
|
||||
|
||||
// Force device specific image implementation to use rowpitch size.
|
||||
amd::Image* image = owner()->asImage();
|
||||
imageDescriptor_.rowPitchInBytes = image->getRowPitch();
|
||||
|
||||
HsaStatus status = hsacoreapi->HsaCreateDeviceImage(
|
||||
dev_.getBackendDevice(), &imageDescriptor_,
|
||||
deviceMemory_, &hsaImageObject_[0]);
|
||||
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail to create HSA image object");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Get the view of the existing parent's SRD based on the child's image
|
||||
// descriptor.
|
||||
HsaStatus status = hsacoreapi->HsaAmdCreateDeviceImageView(
|
||||
&imageDescriptor_, parent.getHsaImageObjectAddress(),
|
||||
&hsaImageObject_[0]);
|
||||
if (status != kHsaStatusSuccess) {
|
||||
LogError("[OCL] Fail to get view of parent image");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void* Image::allocMapTarget(const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
incIndMapCount();
|
||||
|
||||
void* pHostMem = owner()->getHostMem();
|
||||
|
||||
if (pHostMem == NULL) {
|
||||
if (indirectMapCount_ == 1) {
|
||||
if (!allocateMapMemory(owner()->getSize())) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Did the map resource allocation fail?
|
||||
if (mapMemory_ == NULL) {
|
||||
LogError("Could not map target resource");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
pHostMem = mapMemory_->getHostMem();
|
||||
}
|
||||
|
||||
amd::Image* image = owner()->asImage();
|
||||
|
||||
size_t elementSize = image->getImageFormat().getElementSize();
|
||||
|
||||
size_t offset = origin[0] * elementSize;
|
||||
|
||||
// Adjust offset with Y dimension
|
||||
offset += image->getRowPitch() * origin[1];
|
||||
|
||||
// Adjust offset with Z dimension
|
||||
offset += image->getSlicePitch() * origin[2];
|
||||
|
||||
*rowPitch = image->getRowPitch();
|
||||
if (slicePitch != NULL)
|
||||
*slicePitch = image->getSlicePitch();
|
||||
|
||||
return (static_cast<uint8_t*>(pHostMem) + offset);
|
||||
}
|
||||
|
||||
Image::~Image()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Image::destroy()
|
||||
{
|
||||
if (owner()->parent() != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (owner()->isInterop()) {
|
||||
destroyInterop();
|
||||
return;
|
||||
}
|
||||
|
||||
if (dev_.settings().enableLocalMemory_) {
|
||||
hsacoreapi->HsaFreeDeviceMemory(deviceMemory_);
|
||||
}
|
||||
else {
|
||||
servicesapi->HsaFreeSystemMemory(deviceMemory_);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
@@ -1,202 +0,0 @@
|
||||
#ifndef HSAMEMORY_HPP_
|
||||
#define HSAMEMORY_HPP_
|
||||
|
||||
#include "top.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "utils/debug.hpp"
|
||||
#include "hsadevice.hpp"
|
||||
#include "services.h"
|
||||
#ifdef _WIN32
|
||||
#include "amdocl/cl_d3d11_amd.hpp"
|
||||
#endif
|
||||
#include "amdocl/cl_gl_amd.hpp"
|
||||
#include "hsainterop.h"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
enum InteropType {
|
||||
InteropNone = 0,
|
||||
InteropD3D9 = 1,
|
||||
InteropD3D10 = 2,
|
||||
InteropD3D11 = 3,
|
||||
InteropGL = 4
|
||||
};
|
||||
|
||||
class Memory : public device::Memory {
|
||||
public:
|
||||
Memory(const oclhsa::Device &dev, amd::Memory &owner);
|
||||
|
||||
virtual ~Memory();
|
||||
|
||||
// Getter for deviceMemory_.
|
||||
void *getDeviceMemory() const { return deviceMemory_; }
|
||||
|
||||
// Gets a pointer to a region of host-visible memory for use as the target
|
||||
// of an indirect map for a given memory object
|
||||
virtual void *allocMapTarget(const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch);
|
||||
|
||||
// Create device memory according to OpenCL memory flag.
|
||||
virtual bool create() = 0;
|
||||
virtual bool createInterop() = 0;
|
||||
|
||||
// Pins system memory associated with this memory object.
|
||||
virtual bool pinSystemMemory(void *hostPtr, // System memory address
|
||||
size_t size // Size of allocated system memory
|
||||
) {
|
||||
Unimplemented();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Immediate blocking write from device cache to owners's backing store.
|
||||
// Marks owner as "current" by resetting the last writer to NULL.
|
||||
virtual void syncHostFromCache(SyncFlags syncFlags = SyncFlags())
|
||||
{
|
||||
// Need to revisit this when multi-devices is supported.
|
||||
}
|
||||
|
||||
bool processGLResource (GLResourceOP operation) { return true;}
|
||||
|
||||
// Releases indirect map surface
|
||||
void releaseIndirectMap() { decIndMapCount(); }
|
||||
|
||||
//! Map the device memory to CPU visible
|
||||
virtual void* cpuMap(
|
||||
device::VirtualDevice& vDev, //!< Virtual device for map operaiton
|
||||
uint flags = 0, //!< flags for the map operation
|
||||
// Optimization for multilayer map/unmap
|
||||
uint startLayer = 0, //!< Start layer for multilayer map
|
||||
uint numLayers = 0, //!< End layer for multilayer map
|
||||
size_t* rowPitch = NULL,//!< Row pitch for the device memory
|
||||
size_t* slicePitch = NULL //!< Slice pitch for the device memory
|
||||
);
|
||||
|
||||
//! Unmap the device memory
|
||||
virtual void cpuUnmap(
|
||||
device::VirtualDevice& vDev //!< Virtual device for unmap operaiton
|
||||
);
|
||||
|
||||
bool isHsaLocalMemory() const;
|
||||
|
||||
// Accessors for indirect map memory object
|
||||
amd::Memory *mapMemory() const { return mapMemory_; }
|
||||
|
||||
protected:
|
||||
bool allocateMapMemory(size_t allocationSize);
|
||||
|
||||
void freeMapMemory();
|
||||
|
||||
// Decrement map count
|
||||
virtual void decIndMapCount();
|
||||
|
||||
// Free / deregister device memory.
|
||||
virtual void destroy() = 0;
|
||||
|
||||
//This function is called in the destructor ~Buffer() and ~Image(),
|
||||
//since InteropObject belonging to owner() is destroyed before
|
||||
//the destructor is called, we use the cached values of
|
||||
//interopType and Resource in this function.
|
||||
virtual void destroyInterop();
|
||||
|
||||
// Pointer to the device associated with this memory object.
|
||||
const oclhsa::Device &dev_;
|
||||
|
||||
// Pointer to the device memory. This could be in system or device local mem.
|
||||
void* deviceMemory_;
|
||||
|
||||
InteropType interopType_;
|
||||
#ifdef _WIN32
|
||||
ID3D10Resource* d3d10Resource_;
|
||||
ID3D11Resource* d3d11Resource_;
|
||||
#endif
|
||||
HsaGLResource glResource_;
|
||||
|
||||
private:
|
||||
// Disable copy constructor
|
||||
Memory(const Memory &);
|
||||
|
||||
// Disable operator=
|
||||
Memory &operator=(const Memory &);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Buffer : public oclhsa::Memory {
|
||||
public:
|
||||
Buffer(const oclhsa::Device &dev, amd::Memory &owner);
|
||||
|
||||
virtual ~Buffer();
|
||||
|
||||
// Create device memory according to OpenCL memory flag.
|
||||
virtual bool create();
|
||||
|
||||
// Recreate the device memory using new size and alignment.
|
||||
bool recreate(size_t newSize, size_t newAlignment, bool forceSystem);
|
||||
|
||||
//! Create a interop memory
|
||||
bool createInterop();
|
||||
|
||||
private:
|
||||
// Disable copy constructor
|
||||
Buffer(const Buffer &);
|
||||
|
||||
// Disable operator=
|
||||
Buffer &operator=(const Buffer &);
|
||||
|
||||
// Free / deregister device memory.
|
||||
void destroy();
|
||||
};
|
||||
|
||||
class Image : public oclhsa::Memory
|
||||
{
|
||||
public:
|
||||
Image(const oclhsa::Device& dev, amd::Memory& owner);
|
||||
|
||||
virtual ~Image();
|
||||
|
||||
//! Create device memory according to OpenCL memory flag.
|
||||
virtual bool create();
|
||||
|
||||
//! Create an image view
|
||||
bool createView(Image &image);
|
||||
|
||||
virtual bool createInterop();
|
||||
|
||||
//! Gets a pointer to a region of host-visible memory for use as the target
|
||||
//! of an indirect map for a given memory object
|
||||
virtual void* allocMapTarget(const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch);
|
||||
|
||||
size_t getDeviceRowPitchSize() { return deviceImageInfo_.rowPitchInBytes; }
|
||||
size_t getDeviceSlicePitchSize() { return deviceImageInfo_.slicePitchInBytes; }
|
||||
size_t getDeviceDataSize() { return deviceImageInfo_.imageSizeInBytes; }
|
||||
size_t getDeviceDataAlignment() { return deviceImageInfo_.imageAlignmentInBytes; }
|
||||
|
||||
void* getHsaImageObjectAddress() { return &hsaImageObject_[0];}
|
||||
size_t getHsaImageObjectSizeInBytes() {return sizeof(hsaImageObject_); }
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Image(const Buffer&);
|
||||
|
||||
//! Disable operator=
|
||||
Image& operator=(const Buffer&);
|
||||
|
||||
// Free / deregister device memory.
|
||||
void destroy();
|
||||
|
||||
void populateImageDescriptor();
|
||||
|
||||
HsaImageDescriptor imageDescriptor_;
|
||||
HsaDeviceImageInfo deviceImageInfo_;
|
||||
uint8_t hsaImageObject_[HSA_IMAGE_OBJECT_SIZE];
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -1,726 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include "device/hsa/hsaprogram.hpp"
|
||||
|
||||
#include "compiler/lib/loaders/elf/elf.hpp"
|
||||
#include "compiler/lib/utils/options.hpp"
|
||||
#include "runtime/device/hsa/hsakernel.hpp"
|
||||
#include "runtime/device/hsa/hsacompilerlib.hpp"
|
||||
#include "runtime/device/hsa/oclhsa_common.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/libUtils.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
|
||||
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
|
||||
namespace oclhsa {
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
/* Temporary log function for the compiler library */
|
||||
static void logFunction(const char *msg, size_t size) {
|
||||
std::cout << "Compiler Library log :" << msg << std::endl;
|
||||
}
|
||||
|
||||
FSAILProgram::~FSAILProgram() {
|
||||
unloadBrig();
|
||||
acl_error error;
|
||||
// Free the elf binary
|
||||
if (binaryElf_ != NULL) {
|
||||
error = g_complibApi._aclBinaryFini(binaryElf_);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogWarning( "Error while destroying the acl binary \n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FSAILProgram::FSAILProgram(oclhsa::NullDevice& device): device::Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(NULL),
|
||||
device_(device),
|
||||
isBrigLoaded_(false)
|
||||
{
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
//binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
|
||||
//Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
|
||||
//when aclGetTargetInfo is called - EPR# 377910
|
||||
binOpts_.elfclass = ELFCLASS32;
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
}
|
||||
|
||||
bool FSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the
|
||||
// original
|
||||
// binary that
|
||||
// isn't owned
|
||||
// by ClBinary
|
||||
clBinary()->saveOrigBinary(binaryIn, size);
|
||||
|
||||
char *bin = binaryIn;
|
||||
size_t sz = size;
|
||||
|
||||
int encryptCode;
|
||||
|
||||
char *decryptedBin;
|
||||
size_t decryptedSize;
|
||||
if (!clBinary()->decryptElf(binaryIn, size,
|
||||
&decryptedBin, &decryptedSize, &encryptCode)) {
|
||||
return false;
|
||||
}
|
||||
if (decryptedBin != NULL) {
|
||||
// It is decrypted binary.
|
||||
bin = decryptedBin;
|
||||
sz = decryptedSize;
|
||||
}
|
||||
|
||||
// Both 32-bit and 64-bit are allowed!
|
||||
if (!amd::isElfMagic(bin)) {
|
||||
// Invalid binary.
|
||||
if (decryptedBin != NULL) {
|
||||
delete[]decryptedBin;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
clBinary()->setFlags(encryptCode);
|
||||
|
||||
return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
|
||||
}
|
||||
|
||||
bool FSAILProgram::initBuild(amd::option::Options *options) {
|
||||
if (!device::Program::initBuild(options)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Need to get device information from CAL !?!?
|
||||
// Needs the device pointer from CAL to send to options class
|
||||
//
|
||||
// Shreyas: Commenting this might cause a bug - keeping this fro now
|
||||
// options->setPerBuildInfo("hsa",
|
||||
// binary_.getEncryptCode()
|
||||
// );
|
||||
|
||||
// Elf Binary setup
|
||||
std::string outFileName;
|
||||
|
||||
// true means fsail required
|
||||
clBinary()->init(options, true);
|
||||
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
||||
outFileName = options->getDumpFileName(".bin");
|
||||
}
|
||||
|
||||
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
|
||||
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
||||
(outFileName.size() >
|
||||
0) ? outFileName.c_str() : NULL)) {
|
||||
LogError("Setup elf out for gpu failed");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ! post-compile setup for GPU
|
||||
bool FSAILProgram::finiBuild(bool isBuildGood) {
|
||||
clBinary()->resetElfOut();
|
||||
clBinary()->resetElfIn();
|
||||
|
||||
if (!isBuildGood) {
|
||||
// Prevent the encrypted binary form leaking out
|
||||
clBinary()->setBinary(NULL, 0);
|
||||
|
||||
}
|
||||
|
||||
return device::Program::finiBuild(isBuildGood);
|
||||
}
|
||||
|
||||
static char *readFile(std::string source_filename, size_t &size) {
|
||||
FILE *fp = ::fopen(source_filename.c_str(), "rb");
|
||||
unsigned int length;
|
||||
size_t offset = 0;
|
||||
char *ptr;
|
||||
|
||||
if (!fp) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// obtain file size.
|
||||
::fseek(fp, 0, SEEK_END);
|
||||
length = ::ftell(fp);
|
||||
::rewind(fp);
|
||||
|
||||
ptr = reinterpret_cast<char *>(malloc(offset + length + 1));
|
||||
if (length != fread(&ptr[offset], 1, length, fp)) {
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr[offset + length] = '\0';
|
||||
size = offset + length;
|
||||
::fclose(fp);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
aclType FSAILProgram::getNextCompilationStageFromBinary() {
|
||||
acl_error errorCode;
|
||||
size_t secSize = 0;
|
||||
aclType from = ACL_TYPE_DEFAULT;
|
||||
// Checking llvmir in .llvmir section
|
||||
bool isLlvmirText = true;
|
||||
const void *llvmirText = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclLLVMIR,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isLlvmirText = false;
|
||||
}
|
||||
// Checking compile & link options in .comment section
|
||||
bool isOpts = true;
|
||||
const void *opts = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclCOMMENT,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isOpts = false;
|
||||
}
|
||||
if (isLlvmirText) {
|
||||
from = ACL_TYPE_LLVMIR_BINARY;
|
||||
} else {
|
||||
if (!isLlvmirText) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing LLVMIR section)\n" ;
|
||||
}
|
||||
if (!isOpts) {
|
||||
buildLog_ +="Warning while linking : \
|
||||
Invalid binary (Missing COMMENT section)\n" ;
|
||||
}
|
||||
return ACL_TYPE_DEFAULT;
|
||||
}
|
||||
bool isHsailText = true;
|
||||
// Checking HSAIL in .cg section
|
||||
const void *hsailText = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclCODEGEN,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isHsailText = false;
|
||||
}
|
||||
// Checking BRIG STRTAB in .brig_strtab section
|
||||
bool isBrigStrtab = true;
|
||||
const void *brigStrtab = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclBRIGstrs,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isBrigStrtab = false;
|
||||
}
|
||||
// Checking BRIG CODE in .brig_code section
|
||||
bool isBrigCode = true;
|
||||
const void *brigCode = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclBRIGcode,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isBrigCode = false;
|
||||
}
|
||||
// Checking BRIG OPERANDS in .brig_operands section
|
||||
bool isBrigOps = true;
|
||||
const void *brigOps = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclBRIGoprs,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isBrigOps = false;
|
||||
}
|
||||
if (isHsailText && isBrigStrtab && isBrigCode && isBrigOps) {
|
||||
from = ACL_TYPE_HSAIL_BINARY;
|
||||
} else if (!isHsailText && !isBrigStrtab && !isBrigCode && !isBrigOps) {
|
||||
from = ACL_TYPE_LLVMIR_BINARY;
|
||||
} else {
|
||||
if (!isHsailText) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing CG section)\n" ;
|
||||
}
|
||||
if (!isBrigStrtab) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing BRIG_STRTAB section)\n" ;
|
||||
}
|
||||
if (!isBrigCode) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing BRIG_CODE section)\n" ;
|
||||
}
|
||||
if (!isBrigOps) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing BRIG_OPERANDS section)\n" ;
|
||||
}
|
||||
return ACL_TYPE_DEFAULT;
|
||||
}
|
||||
// Checking ISA in .text section
|
||||
bool isShaderIsa = true;
|
||||
const void *shaderIsa = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&secSize,
|
||||
aclTEXT,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
isShaderIsa = false;
|
||||
}
|
||||
if (isShaderIsa && from == ACL_TYPE_LLVMIR_BINARY) {
|
||||
from = ACL_TYPE_DEFAULT;
|
||||
}
|
||||
return from;
|
||||
}
|
||||
bool FSAILProgram::updateAclBinaryWithKernelIsaAndDebug(std::string kernelName){
|
||||
assert(brig_.loadmap_section != NULL);
|
||||
aclBinary * internalAclBinary = reinterpret_cast<aclBinary*>(brig_.loadmap_section);
|
||||
|
||||
std::string openClKernelName("&__OpenCL_" + kernelName + "_kernel");
|
||||
const oclBIFSymbolStruct* isaSymbolStruct = findBIF30SymStruct(symISABinary);
|
||||
assert(isaSymbolStruct && "symbol not found");
|
||||
std::string kernelIsaSymbol = isaSymbolStruct->str[bif::PRE] +
|
||||
openClKernelName + isaSymbolStruct->str[bif::POST];
|
||||
|
||||
const oclBIFSymbolStruct* debugSymbolStruct = findBIF30SymStruct(symDebugInfo);
|
||||
assert(debugSymbolStruct && "symbol not found");
|
||||
//For debug symbols, the PRE is used for BRIG debug and the POST is used for
|
||||
//ISA debug
|
||||
std::string kernelIsaDebugSymbol = debugSymbolStruct->str[bif::POST] + openClKernelName;
|
||||
|
||||
//Extract the ISA section
|
||||
size_t symbolSize;
|
||||
acl_error errorCode;
|
||||
const void* isaSymbol = g_complibApi._aclExtractSymbol(device().compiler(),
|
||||
internalAclBinary,
|
||||
&symbolSize,
|
||||
aclTEXT,
|
||||
kernelIsaSymbol.c_str(),
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to extract ISA for kernel";
|
||||
return false;
|
||||
}
|
||||
//Insert the ISA section
|
||||
errorCode = g_complibApi._aclInsertSymbol(device().compiler(),
|
||||
binaryElf_,
|
||||
isaSymbol,
|
||||
symbolSize,
|
||||
aclTEXT,
|
||||
kernelIsaSymbol.c_str());
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to insert ISA for kernel";
|
||||
return false;
|
||||
}
|
||||
const void* debugSymbol = g_complibApi._aclExtractSymbol(device().compiler(),
|
||||
internalAclBinary,
|
||||
&symbolSize,
|
||||
aclHSADEBUG,
|
||||
kernelIsaDebugSymbol.c_str(),
|
||||
&errorCode);
|
||||
//If debug information is available
|
||||
if (errorCode == ACL_SUCCESS) {
|
||||
//Update binary with the debug section for the kernel
|
||||
errorCode = g_complibApi._aclInsertSymbol(device().compiler(),
|
||||
binaryElf_,
|
||||
debugSymbol,
|
||||
symbolSize,
|
||||
aclHSADEBUG,
|
||||
kernelIsaDebugSymbol.c_str());
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to insert debug information for kernel";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool FSAILProgram::ExtractSymbolAndCopy(aclSections id,
|
||||
const char *symbol_name,
|
||||
void** address_to_copy,
|
||||
size_t* symbol_size_bytes,
|
||||
bool verify) {
|
||||
acl_error error_code;
|
||||
*symbol_size_bytes = 0;
|
||||
const void* symbol_data = g_complibApi._aclExtractSymbol(
|
||||
device().compiler(),
|
||||
binaryElf_,
|
||||
symbol_size_bytes,
|
||||
id,
|
||||
symbol_name,
|
||||
&error_code);
|
||||
//If the section is not mandatory and the section does not exist
|
||||
//skip this section
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
if (!verify) {
|
||||
return true;
|
||||
}
|
||||
std::string error = "Could not find Brig Directive in BIFF: ";
|
||||
error += symbol_name;
|
||||
LogError(error.c_str());
|
||||
buildLog_ += error;
|
||||
return false;
|
||||
}
|
||||
*address_to_copy = malloc(*symbol_size_bytes);
|
||||
if (*address_to_copy == NULL) {
|
||||
LogError(" Failed to allocate memory");
|
||||
return false;
|
||||
}
|
||||
memcpy(*address_to_copy, symbol_data, *symbol_size_bytes);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSAILProgram::saveBinaryAndSetType(type_t type) {
|
||||
//Write binary to memory
|
||||
void *rawBinary = NULL;
|
||||
size_t size;
|
||||
if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
|
||||
!= ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to write binary to memory \n";
|
||||
return false;
|
||||
}
|
||||
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
||||
//Set the type of binary
|
||||
setType(type);
|
||||
//Free memory containing rawBinary
|
||||
binaryElf_->binOpts.dealloc(rawBinary);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSAILProgram::linkImpl(const std::vector<Program *> &inputPrograms,
|
||||
amd::option::Options *options,
|
||||
bool createLibrary) {
|
||||
std::vector<device::Program *>::const_iterator it
|
||||
= inputPrograms.begin();
|
||||
std::vector<device::Program *>::const_iterator itEnd
|
||||
= inputPrograms.end();
|
||||
acl_error errorCode;
|
||||
|
||||
// For each program we need to extract the LLVMIR and create
|
||||
// aclBinary for each
|
||||
std::vector<aclBinary *> binaries_to_link;
|
||||
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
FSAILProgram *program = (FSAILProgram *)*it;
|
||||
// Check if the program was created with clCreateProgramWIthBinary
|
||||
binary_t binary = program->binary();
|
||||
if ((binary.first != NULL) && (binary.second > 0)) {
|
||||
// Binary already exists -- we can also check if there is no
|
||||
// opencl source code
|
||||
// Need to check if LLVMIR exists in the binary
|
||||
// If LLVMIR does not exist then is it valid
|
||||
// We need to pull out all the compiled kernels
|
||||
// We cannot do this at present because we need at least
|
||||
// Hsail text to pull the kernels oout
|
||||
void *mem = const_cast<void *>(binary.first);
|
||||
binaryElf_ = g_complibApi._aclReadFromMem(mem,
|
||||
binary.second,
|
||||
&errorCode);
|
||||
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
LogWarning("Error while linking : Could not read from raw binary");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// At this stage each FSAILProgram contains a valid binary_elf
|
||||
// Check if LLVMIR is in the binary
|
||||
// @TODO - Memory leak , cannot free this buffer
|
||||
// need to fix this.. File EPR on compiler library
|
||||
size_t llvmirSize = 0;
|
||||
const void *llvmirText = g_complibApi._aclExtractSection(device().compiler(),
|
||||
binaryElf_,
|
||||
&llvmirSize,
|
||||
aclLLVMIR,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ +="Error while linking : \
|
||||
Invalid binary (Missing LLVMIR section)" ;
|
||||
return false;
|
||||
}
|
||||
// Create a new aclBinary for each LLVMIR and save it in a list
|
||||
aclBIFVersion ver = g_complibApi._aclBinaryVersion(binaryElf_);
|
||||
aclBinary *bin = g_complibApi._aclCreateFromBinary(binaryElf_, ver);
|
||||
binaries_to_link.push_back(bin);
|
||||
}
|
||||
|
||||
// At this stage each FSAILProgram in the list has an aclBinary initialized
|
||||
// and contains LLVMIR
|
||||
// We can now go ahead and link them.
|
||||
if (binaries_to_link.size() > 1) {
|
||||
errorCode = g_complibApi._aclLink(device().compiler(),
|
||||
binaries_to_link[0],
|
||||
binaries_to_link.size() - 1,
|
||||
&binaries_to_link[1],
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
"-create-library",
|
||||
NULL);
|
||||
}
|
||||
else {
|
||||
errorCode = g_complibApi._aclLink(device().compiler(),
|
||||
binaries_to_link[0],
|
||||
0,
|
||||
NULL,
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
"-create-library",
|
||||
NULL);
|
||||
}
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to link programs";
|
||||
return false;
|
||||
}
|
||||
// Store the newly linked aclBinary for this program.
|
||||
binaryElf_ = binaries_to_link[0];
|
||||
// Free all the other aclBinaries
|
||||
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
||||
g_complibApi._aclBinaryFini(binaries_to_link[i]);
|
||||
}
|
||||
if (createLibrary) {
|
||||
saveBinaryAndSetType(TYPE_LIBRARY);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Now call linkImpl with the new options
|
||||
return linkImpl(options);
|
||||
}
|
||||
|
||||
bool FSAILProgram::loadBrig() {
|
||||
//Copy all the sections into BRIG
|
||||
memset(&brig_, 0 ,sizeof(HsaBrig));
|
||||
bool codeStatus = ExtractSymbolAndCopy(aclBRIGcode,
|
||||
"__BRIG__code",
|
||||
&brig_.code_section,
|
||||
&brig_.code_section_byte_size,
|
||||
true
|
||||
);
|
||||
bool oprStatus = ExtractSymbolAndCopy(aclBRIGoprs,
|
||||
"__BRIG__operands",
|
||||
&brig_.operand_section,
|
||||
&brig_.operand_section_byte_size,
|
||||
true
|
||||
);
|
||||
bool strStatus = ExtractSymbolAndCopy(aclBRIGstrs,
|
||||
"__BRIG__strtab",
|
||||
&brig_.string_section,
|
||||
&brig_.string_section_byte_size,
|
||||
true
|
||||
);
|
||||
bool dbgStatus = ExtractSymbolAndCopy(aclHSADEBUG ,
|
||||
"__debug_brig__",
|
||||
&brig_.debug_section,
|
||||
&brig_.debug_section_byte_size,
|
||||
false
|
||||
);
|
||||
if (!codeStatus || !oprStatus || !strStatus || !dbgStatus) {
|
||||
LogError("Failed to Extract one or more BRIG sections");
|
||||
buildLog_ += "Error: Failed to Extract one or more BRIG sections";
|
||||
return false;
|
||||
}
|
||||
if(hsacoreapi->HsaLoadBrig(device_.getBackendDevice(), &brig_)
|
||||
!= kHsaStatusSuccess){
|
||||
return false;
|
||||
}
|
||||
isBrigLoaded_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSAILProgram::unloadBrig() {
|
||||
if (isBrigLoaded_ == true) {
|
||||
HsaStatus status = hsacoreapi->HsaUnloadBrig(&brig_);
|
||||
if (status != kHsaStatusSuccess){
|
||||
return false;
|
||||
}
|
||||
//Destroy the BRIG
|
||||
free(brig_.code_section);
|
||||
free(brig_.operand_section);
|
||||
free(brig_.string_section);
|
||||
free(brig_.debug_section);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSAILProgram::linkImpl(amd::option::Options *options) {
|
||||
acl_error errorCode;
|
||||
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
||||
//If the binaryElf_ is not set then program must have been created
|
||||
// using clCreateProgramWithBinary
|
||||
if (!binaryElf_) {
|
||||
binary_t binary = this->binary();
|
||||
if ((binary.first != NULL) && (binary.second > 0)) {
|
||||
// Binary already exists -- we can also check if there is no
|
||||
// opencl source code
|
||||
// Need to check if LLVMIR exists in the binary
|
||||
// If LLVMIR does not exist then is it valid
|
||||
// We need to pull out all the compiled kernels
|
||||
// We cannot do this at present because we need at least
|
||||
// Hsail text to pull the kernels oout
|
||||
void *mem = const_cast<void *>(binary.first);
|
||||
binaryElf_ = g_complibApi._aclReadFromMem(mem,
|
||||
binary.second,
|
||||
&errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while converting to BRIG: aclBinary init failure \n" ;
|
||||
LogWarning("aclBinaryInit failed");
|
||||
return false;
|
||||
}
|
||||
// Check that all needed section also exist in binaryElf_
|
||||
// No any validity checks here
|
||||
continueCompileFrom = getNextCompilationStageFromBinary();
|
||||
if (ACL_TYPE_DEFAULT == continueCompileFrom) {
|
||||
return false;
|
||||
}
|
||||
if (ACL_TYPE_HSAIL_BINARY == continueCompileFrom) {
|
||||
// Save binary in the interface class
|
||||
// Also load compile & link options from binary into Program class members:
|
||||
// compileOptions_ & linkOptions_
|
||||
setBinary(static_cast<char*>(mem), binary.second);
|
||||
// Compare options loaded from binary with current ones
|
||||
// If they differ then recompile from ACL_TYPE_LLVMIR_BINARY
|
||||
// @TODO It is needed to compare options taking into account that:
|
||||
// 1. options are order independent;
|
||||
// 2. (may be not trivial) compare only options that affect binary
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
if (compileOptions_ + linkOptions_ != curOptions) {
|
||||
continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
||||
// 1. if the program is not created with binary;
|
||||
// 2. if the program is created with binary and contains only .llvmir & .comment
|
||||
// 3. if the program is created with binary, contains all brig sections,
|
||||
// but the binary's compile & link options differ from current ones (recompilation);
|
||||
if (ACL_TYPE_LLVMIR_BINARY == continueCompileFrom) {
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
errorCode = g_complibApi._aclCompile(device().compiler(),
|
||||
binaryElf_,
|
||||
curOptions.c_str(),
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
ACL_TYPE_CG,
|
||||
logFunction);
|
||||
}
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while converting to BRIG: Compiling LLVMIR to BRIG \n" ;
|
||||
return false;
|
||||
}
|
||||
//Stop compilation if it is an offline device - HSA runtime does not
|
||||
//support ISA compiled offline
|
||||
if (!dev().isOnline()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const HsaDevice *hsaDevice = dev().getBackendDevice();
|
||||
if (!loadBrig()) {
|
||||
buildLog_ += "Error while loading BRIG" ;
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t kernelNamesSize = 0;
|
||||
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: kernel names query from the ELF failed\n";
|
||||
return false;
|
||||
}
|
||||
if (kernelNamesSize > 0) {
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: kernel's Metadata is corrupted in the ELF\n";
|
||||
delete kernelNames;
|
||||
return false;
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
||||
delete kernelNames;
|
||||
std::vector<std::string>::iterator it = vKernels.begin();
|
||||
bool dynamicParallelism = false;
|
||||
for (it; it != vKernels.end(); ++it) {
|
||||
std::string kernelName = *it;
|
||||
Kernel *aKernel = new oclhsa::Kernel(kernelName,
|
||||
this,
|
||||
&brig_,
|
||||
options->origOptionStr + hsailOptions());
|
||||
if (!aKernel->init() ) {
|
||||
return false;
|
||||
}
|
||||
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
||||
// Update the binary in the FSAILProgram to save the ISA and debug information.
|
||||
// This is so the debugger and the profiler can use the a single aclBinary for all their needs.
|
||||
if (!updateAclBinaryWithKernelIsaAndDebug(kernelName)) {
|
||||
return false;
|
||||
}
|
||||
kernels()[kernelName] = aKernel;
|
||||
}
|
||||
}
|
||||
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSAILProgram::createBinary(amd::option::Options *options) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FSAILProgram::initClBinary() {
|
||||
if (clBinary_ == NULL) {
|
||||
clBinary_ = new ClBinary(static_cast<const Device &>(device()));
|
||||
if (clBinary_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FSAILProgram::releaseClBinary() {
|
||||
if (clBinary_ != NULL) {
|
||||
delete clBinary_;
|
||||
clBinary_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
std::string FSAILProgram::hsailOptions() {
|
||||
std::string hsailOptions;
|
||||
//Set options for the standard device specific options
|
||||
//This is just for legacy compiler code
|
||||
// All our devices support these options now
|
||||
hsailOptions.append(" -DFP_FAST_FMAF=1");
|
||||
hsailOptions.append(" -DFP_FAST_FMA=1");
|
||||
//TODO(sramalin) : Query the device for opencl version
|
||||
// and only set if -cl-std wasn't specified in
|
||||
// original build options (app)
|
||||
//hsailOptions.append(" -cl-std=CL1.2");
|
||||
//check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(hsailOptions.append(" -m64"));
|
||||
//Now append each extension supported by the device
|
||||
// one by one
|
||||
std::string token;
|
||||
std::istringstream iss("");
|
||||
iss.str(device().info().extensions_);
|
||||
while (getline(iss, token, ' ')) {
|
||||
if (!token.empty()) {
|
||||
hsailOptions.append(" -D");
|
||||
hsailOptions.append(token);
|
||||
hsailOptions.append("=1");
|
||||
}
|
||||
}
|
||||
return hsailOptions;
|
||||
}
|
||||
|
||||
#endif // WITHOUT_FSA_BACKEND
|
||||
} // namespace hsa
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef HSAPROGRAM_HPP_
|
||||
#define HSAPROGRAM_HPP_
|
||||
|
||||
#ifndef WITHOUT_FSA_BACKEND
|
||||
|
||||
#include "hsabinary.hpp"
|
||||
#include "hsacompilerlib.hpp"
|
||||
#include "services.h"
|
||||
#include "acl.h"
|
||||
#include "oclhsa_common.hpp"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include "hsadevice.hpp"
|
||||
|
||||
//! \namespace oclhsa HSA Device Implementation
|
||||
namespace oclhsa {
|
||||
|
||||
//! \class empty program
|
||||
class FSAILProgram : public device::Program
|
||||
{
|
||||
friend class ClBinary;
|
||||
public:
|
||||
//! Default constructor
|
||||
FSAILProgram(oclhsa::NullDevice& device);
|
||||
//! Default destructor
|
||||
~FSAILProgram();
|
||||
|
||||
// Initialize Binary for GPU (used only for clCreateProgramWithBinary()).
|
||||
virtual bool initClBinary(char *binaryIn, size_t size);
|
||||
|
||||
//! Returns the aclBinary associated with the progrm
|
||||
const aclBinary* binaryElf() const {
|
||||
return static_cast<const aclBinary*>(binaryElf_); }
|
||||
|
||||
//! Returns the brig associated with the progrm
|
||||
const HsaBrig* brig() {
|
||||
return static_cast<const HsaBrig*>(&brig_); }
|
||||
|
||||
const NullDevice& dev() const { return device_; }
|
||||
//! Returns the hsaBinary associated with the progrm
|
||||
const HsaDevice* hsaDevice() const {
|
||||
return dev().getBackendDevice();
|
||||
}
|
||||
|
||||
protected:
|
||||
//! pre-compile setup for GPU
|
||||
virtual bool initBuild(amd::option::Options* options);
|
||||
|
||||
//! post-compile setup for GPU
|
||||
virtual bool finiBuild(bool isBuildGood);
|
||||
|
||||
/*! \brief Compiles GPU CL program to LLVM binary (compiler frontend)
|
||||
*
|
||||
* \return True if we successefully compiled a GPU program
|
||||
*/
|
||||
virtual bool compileImpl(
|
||||
const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
);
|
||||
|
||||
/*! \brief Compiles LLVM binary to FSAIL code (compiler backend: link+opt+codegen)
|
||||
*
|
||||
* \return The build error code
|
||||
*/
|
||||
int compileBinaryToFSAIL(
|
||||
amd::option::Options* options //!< options for compilation
|
||||
);
|
||||
|
||||
|
||||
virtual bool linkImpl(amd::option::Options* options);
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
|
||||
amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options);
|
||||
|
||||
//! Initialize Binary
|
||||
virtual bool initClBinary();
|
||||
|
||||
//! Release the Binary
|
||||
virtual void releaseClBinary();
|
||||
|
||||
virtual const aclTargetInfo & info(const char * str = ""){
|
||||
return info_;
|
||||
}
|
||||
|
||||
virtual bool isElf(const char* bin) const {
|
||||
return amd::isElfMagic(bin);
|
||||
//return false;
|
||||
}
|
||||
|
||||
//! Returns the binary
|
||||
// This should ensure that the binary is updated with all the kernels
|
||||
// ClBinary& clBinary() { return binary_; }
|
||||
ClBinary* clBinary() {
|
||||
return static_cast<ClBinary*>(device::Program::clBinary());
|
||||
}
|
||||
const ClBinary* clBinary() const {
|
||||
return static_cast<const ClBinary*>(device::Program::clBinary());
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
//! Extracts a symbol from the binaryElf_
|
||||
// and copies it to a buffer allocated
|
||||
// by the function
|
||||
bool ExtractSymbolAndCopy(aclSections id,
|
||||
const char *symbol_name,
|
||||
void** address_to_copy,
|
||||
size_t* symbol_size_bytes,
|
||||
bool verify);
|
||||
//! Extracts the aclBinary used internally within the brig
|
||||
// and pulls the debug and ISA section for a particular kernel
|
||||
// and inserts it into aclBinary contained in the program
|
||||
bool updateAclBinaryWithKernelIsaAndDebug(std::string kernelName);
|
||||
//! Checks the existence of sections in binaryElf_
|
||||
// and calculates the next stage of compilation;
|
||||
// if set of the section is impossible, then
|
||||
// binary is invalid and function returns ACL_TYPE_DEFAULT
|
||||
aclType getNextCompilationStageFromBinary();
|
||||
//! Loads the global variables for the BRIG
|
||||
bool loadBrig();
|
||||
//! Unloads the global variables for the BRIG
|
||||
bool unloadBrig();
|
||||
bool saveBinaryAndSetType(type_t type);
|
||||
//! Disable default copy constructor
|
||||
FSAILProgram(const FSAILProgram&);
|
||||
|
||||
//! Disable operator=
|
||||
FSAILProgram& operator=(const FSAILProgram&);
|
||||
|
||||
//! Returns all the options to be appended while passing to the
|
||||
//compiler library
|
||||
std::string hsailOptions();
|
||||
|
||||
std::string openCLSource_; //!< Original OpenCL source
|
||||
std::string fsailProgram_; //!< FSAIL program after compilation.
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
//!< aclBinary and aclCompiler - for the compiler libray
|
||||
aclBinary* binaryElf_; //!<Binary for the new compiler library - shreyas edit
|
||||
aclBinaryOptions binOpts_; //!<Binary options to create aclBinary
|
||||
oclhsa::NullDevice& device_; //!< Device related to the program
|
||||
HsaBrig brig_; //!< Brig for the program
|
||||
bool isBrigLoaded_; //!< Boolean to verify is the Brig has been loaded
|
||||
};
|
||||
|
||||
/*@}*/} // namespace oclhsa
|
||||
|
||||
#endif /*WITHOUT_FSA_BACKEND*/
|
||||
#endif /* HSAPROGRAM_HPP_*/
|
||||
@@ -1,81 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef WITHOUT_GPU_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "hsasettings.hpp"
|
||||
|
||||
namespace oclhsa {
|
||||
|
||||
Settings::Settings()
|
||||
{
|
||||
// Initialize the HSA device default settings
|
||||
|
||||
// Set this to true when we drop the flag
|
||||
doublePrecision_ = ::CL_KHR_FP64;
|
||||
pollCompletion_ = ENVVAR_HSA_POLL_KERNEL_COMPLETION;
|
||||
|
||||
// Enable "local" memory in HSA
|
||||
enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE;
|
||||
enableSvm32BitsAtomics_ = HSA_ENABLE_ATOMICS_32B;
|
||||
|
||||
maxWorkGroupSize_ = 256;
|
||||
maxWorkGroupSize2DX_ = 16;
|
||||
maxWorkGroupSize2DY_ = 16;
|
||||
maxWorkGroupSize3DX_ = 4;
|
||||
maxWorkGroupSize3DY_ = 4;
|
||||
maxWorkGroupSize3DZ_ = 4;
|
||||
}
|
||||
|
||||
bool
|
||||
Settings::create(bool doublePrecision)
|
||||
{
|
||||
customHostAllocator_ = true;
|
||||
|
||||
// Enable extensions
|
||||
enableExtension(ClKhrByteAddressableStore);
|
||||
enableExtension(ClKhrGlobalInt32BaseAtomics);
|
||||
enableExtension(ClKhrGlobalInt32ExtendedAtomics);
|
||||
enableExtension(ClKhrLocalInt32BaseAtomics);
|
||||
enableExtension(ClKhrLocalInt32ExtendedAtomics);
|
||||
enableExtension(ClExtAtomicCounters32);
|
||||
//enableExtension(ClKhr3DImageWrites);
|
||||
enableExtension(ClKhrGlSharing);
|
||||
enableExtension(ClAmdMediaOps);
|
||||
enableExtension(ClAmdMediaOps2);
|
||||
#if defined(_WIN32)
|
||||
enableExtension(ClKhrD3d10Sharing);
|
||||
enableExtension(ClKhrD3d11Sharing);
|
||||
#endif // _WIN32
|
||||
//enableExtension(ClKhrImage2dFromBuffer);
|
||||
//enableExtension(ClAmdImage2dFromBufferReadOnly);
|
||||
// Make sure device supports doubles
|
||||
doublePrecision_ &= doublePrecision;
|
||||
|
||||
if (doublePrecision_) {
|
||||
// Enable KHR double precision extension
|
||||
enableExtension(ClKhrFp64);
|
||||
// Also enable AMD double precision extension?
|
||||
enableExtension(ClAmdFp64);
|
||||
}
|
||||
// ToDo: enable this after conformance test is updated to accept it
|
||||
// enableExtension(ClKhrIlProgram);
|
||||
|
||||
// Override current device settings
|
||||
override();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Settings::override()
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace oclhsa
|
||||
|
||||
#endif // WITHOUT_GPU_BACKEND
|
||||
@@ -1,65 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef HSASETTINGS_HPP_
|
||||
#define HSASETTINGS_HPP_
|
||||
|
||||
#ifndef WITHOUT_GPU_BACKEND
|
||||
|
||||
#include "library.hpp"
|
||||
|
||||
/*! \addtogroup HSA OCL Stub Implementation
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA OCL STUB Implementation
|
||||
namespace oclhsa {
|
||||
|
||||
//! Device settings
|
||||
class Settings : public device::Settings
|
||||
{
|
||||
public:
|
||||
union {
|
||||
struct {
|
||||
uint doublePrecision_: 1; //!< Enables double precision support
|
||||
uint pollCompletion_: 1; //!< Enables polling in HSA
|
||||
uint enableLocalMemory_: 1; //!< Enable HSA device local memory usage
|
||||
uint enableSvm32BitsAtomics_: 1; //!< Enable platform atomics in 32 bits
|
||||
uint reserved_: 27;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
//! Default max workgroup size for 1D
|
||||
int maxWorkGroupSize_;
|
||||
|
||||
//! Default max workgroup sizes for 2D
|
||||
int maxWorkGroupSize2DX_;
|
||||
int maxWorkGroupSize2DY_;
|
||||
|
||||
//! Default max workgroup sizes for 3D
|
||||
int maxWorkGroupSize3DX_;
|
||||
int maxWorkGroupSize3DY_;
|
||||
int maxWorkGroupSize3DZ_;
|
||||
|
||||
//! Default constructor
|
||||
Settings();
|
||||
|
||||
//! Creates settings
|
||||
bool create(bool doublePrecision);
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Settings(const Settings&);
|
||||
|
||||
//! Disable assignment
|
||||
Settings& operator=(const Settings&);
|
||||
|
||||
//! Overrides current settings based on registry/environment
|
||||
void override();
|
||||
};
|
||||
|
||||
/*@}*/} // namespace oclhsa
|
||||
|
||||
#endif /*WITHOUT_GPU_BACKEND*/
|
||||
#endif /*HSASETTINGS_HPP_*/
|
||||
Το diff αρχείου καταστέλλεται επειδή είναι πολύ μεγάλο
Φόρτωση Διαφορών
@@ -1,181 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef HSAVIRTUAL_HPP_
|
||||
#define HSAVIRTUAL_HPP_
|
||||
#include "hsadevice.hpp"
|
||||
#include "services.h"
|
||||
#include "utils/util.hpp"
|
||||
|
||||
namespace oclhsa {
|
||||
class Device;
|
||||
|
||||
// Timestamp for keeping track of some profiling information for various events
|
||||
// including EnqueueNDRangeKernel and clEnqueueCopyBuffer.
|
||||
class Timestamp {
|
||||
private:
|
||||
HsaSignal signal_;
|
||||
uint64_t start_;
|
||||
uint64_t end_;
|
||||
|
||||
public:
|
||||
// get-ers
|
||||
uint64_t getStart() const { return start_; }
|
||||
uint64_t getEnd() const { return end_; }
|
||||
HsaSignal getSignal() const { return signal_; }
|
||||
|
||||
// Default constructor
|
||||
Timestamp()
|
||||
: signal_(0),
|
||||
start_(0),
|
||||
end_(0) {}
|
||||
|
||||
// Deconstructor, which will delete the signal if we created one
|
||||
~Timestamp();
|
||||
|
||||
// Creates a signal for the timestamp, saves it, and returns it
|
||||
HsaSignal createSignal();
|
||||
|
||||
// Start a timestamp (get timestamp from OS)
|
||||
void start();
|
||||
|
||||
// End a timestamp (get timestamp from OS)
|
||||
void end();
|
||||
};
|
||||
|
||||
class VirtualGPU : public device::VirtualDevice {
|
||||
public:
|
||||
VirtualGPU(Device &device);
|
||||
~VirtualGPU();
|
||||
|
||||
bool create(HsaQueueType queueType);
|
||||
bool terminate();
|
||||
|
||||
void profilingBegin(amd::Command &command, bool drmProfiling = false);
|
||||
const Device& dev() const { return oclhsa_device_; }
|
||||
//! End the command profiling
|
||||
void profilingEnd(amd::Command &command);
|
||||
|
||||
//! Collect the profiling results
|
||||
bool profilingCollectResults(
|
||||
amd::Command* list //!< List of all commands in the batch.
|
||||
);
|
||||
void submitReadMemory(amd::ReadMemoryCommand& cmd);
|
||||
void submitWriteMemory(amd::WriteMemoryCommand& cmd);
|
||||
void submitCopyMemory(amd::CopyMemoryCommand& cmd);
|
||||
void submitMapMemory(amd::MapMemoryCommand& cmd);
|
||||
void submitUnmapMemory(amd::UnmapMemoryCommand& cmd);
|
||||
void submitKernel(amd::NDRangeKernelCommand& cmd);
|
||||
bool submitKernelInternal(
|
||||
const amd::NDRangeContainer& sizes, //!< Workload sizes
|
||||
const amd::Kernel& kernel, //!< Kernel for execution
|
||||
const_address parameters, //!< Parameters for the kernel
|
||||
void *event_handle //!< Handle to OCL event for debugging
|
||||
);
|
||||
void submitNativeFn(amd::NativeFnCommand& cmd);
|
||||
void submitMarker(amd::Marker& cmd);
|
||||
void submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd);
|
||||
void submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd);
|
||||
void submitPerfCounter(amd::PerfCounterCommand& cmd);
|
||||
void flush(amd::Command* list = NULL, bool wait = false);
|
||||
void submitFillMemory(amd::FillMemoryCommand& cmd);
|
||||
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
|
||||
|
||||
// { oclhsa OpenCL integration
|
||||
// Added these stub (no-ops) implementation of pure virtual methods,
|
||||
// when integrating HSA and OpenCL branches.
|
||||
// TODO: After inegration, whoever is working on VirtualGPU should write
|
||||
// actual implemention.
|
||||
virtual void submitSignal(amd::SignalCommand &cmd) {}
|
||||
virtual void submitMakeBuffersResident(amd::MakeBuffersResidentCommand &cmd) {}
|
||||
virtual void submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd);
|
||||
virtual void submitSvmCopyMemory(amd::SvmCopyMemoryCommand& cmd);
|
||||
virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd);
|
||||
virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd);
|
||||
virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
|
||||
void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand &cmd) {}
|
||||
void submitThreadTrace(amd::ThreadTraceCommand &vcmd) {}
|
||||
|
||||
/**
|
||||
* @brief Waits on an outstanding kernel without regard to how
|
||||
* it was dispatched - with or without a signal
|
||||
*
|
||||
* @return bool true if Wait returned successfully, false
|
||||
* otherwise
|
||||
*/
|
||||
bool releaseGpuMemoryFence();
|
||||
// } oclhsa OpenCL integration
|
||||
private:
|
||||
/**
|
||||
* @brief Retrieves the various configuration parameters that could
|
||||
* be used to execute a kernel - Enable Profiling, Sizes of Global,
|
||||
* Local work spaces, offsets for global Id, etc.
|
||||
*
|
||||
* @note: The implementation currently does not verify if the input
|
||||
* parameters for global, local and offset arrays are valid. For
|
||||
* example, it assumes that the values that are passed in conform to
|
||||
* openCL properties such as: CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||
* CL_DEVICE_MAX_WORK_GROUP_SIZE, etc
|
||||
*
|
||||
* @param lds_size The amount of LDS memory used in the kernel.
|
||||
*
|
||||
* @param profile_enable Flag to enable kernel profiling.
|
||||
*
|
||||
* @param config Output parameter updated with various execution
|
||||
* policy paramters.
|
||||
*
|
||||
* @param sizes The work item and work group size.
|
||||
*
|
||||
* @return HsaStatus ::kHsaStatusSuccess or ::kHsaStatusError
|
||||
*/
|
||||
HsaStatus getDispatchConfig(
|
||||
uint32_t lds_size,
|
||||
bool profile_enable,
|
||||
HsaDispatchConfig* config,
|
||||
const amd::NDRangeContainer& sizes,
|
||||
const amd::Kernel& kernel);
|
||||
|
||||
/**
|
||||
* @brief Synchronize kernel submits across different queue types
|
||||
* i.e. a submit to compute kernel should determine that there is no
|
||||
* outstanding kernel to another queue type, e.g. interop queue.
|
||||
* The same applies for submits to interop queues or queues of
|
||||
* another type.
|
||||
*
|
||||
* @param dispatch_queue Queue object into which the current kernel
|
||||
* would be submitted.
|
||||
*
|
||||
* @return HsaStatus ::kHsaStatusSuccess or ::kHsaStatusError
|
||||
*/
|
||||
HsaStatus synchronizeInterQueueKernels(HsaQueue *dispatchQueue);
|
||||
|
||||
/**
|
||||
* @brief Maintains the list of memory blocks allocated
|
||||
* for one or more kernel submissions
|
||||
*/
|
||||
std::vector<void *> kernelArgList_;
|
||||
|
||||
/**
|
||||
* @brief Indicates if a kernel dispatch is outstanding. This flag is
|
||||
* used to synchronized on kernel outputs.
|
||||
*/
|
||||
bool hasPendingDispatch_;
|
||||
|
||||
/**
|
||||
* @brief Maintains the queue type of the last kernel submit.
|
||||
* Submission of kernels across queue types must be coordinated
|
||||
* i.e. all outstanding kernels on one queue type must be finished
|
||||
* before kernels can be submitted onto a different queue type.
|
||||
*/
|
||||
HsaQueueType lastSubmitQueue_;
|
||||
|
||||
Timestamp* timestamp_;
|
||||
HsaDevice* gpu_device_; //!< Physical device
|
||||
HsaQueue* gpu_queue_; //!< Queue associated with a gpu
|
||||
HsaQueue* interopQueue_; //!< Interop queue associated with a gpu
|
||||
uint32_t dispatch_id_; //!< This variable must be updated atomically.
|
||||
Device& oclhsa_device_; //!< oclhsa device object
|
||||
};
|
||||
}
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
LIBRARY OCLHSA
|
||||
EXPORTS
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_OCLHSA_COMMON_HPP_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_OCLHSA_COMMON_HPP_
|
||||
|
||||
#include "hsacore_symbol_loader.hpp"
|
||||
#include "services_symbol_loader.hpp"
|
||||
|
||||
#include "hsacoreagent.h"
|
||||
#include "hsaagent.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const HsaCoreApiTable *hsacoreapi;
|
||||
extern const HsaServicesApiTable *servicesapi;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // header guard
|
||||
@@ -1,52 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
// Implementation of the the loading of dll and loading of all the exported
|
||||
// function symbols.
|
||||
|
||||
#include "device/hsa/services_symbol_loader.hpp"
|
||||
|
||||
#include "runtime/thread/thread.hpp"
|
||||
#include "runtime/utils/debug.hpp"
|
||||
#include "runtime/os/os.hpp"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
ServicesApiSymbols* ServicesApiSymbols::instance_ = NULL;
|
||||
// services_dll_handle_ is defined in ServicesApiSymbols class.
|
||||
// This macro must be used only in member functions of ServicesApiSymbols
|
||||
// class.
|
||||
#define LOADSYMBOL(api) \
|
||||
api = (pfn_ ## api) amd::Os::getSymbol(services_dll_handle_, # api); \
|
||||
if (api == NULL) { \
|
||||
amd::log_printf(amd::LOG_ERROR, __FILE__, __LINE__, \
|
||||
"amd::Os::getSymbol() for exported func " # api " failed."); \
|
||||
amd::Os::unloadLibrary(services_dll_handle_); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
ServicesApiSymbols::ServicesApiSymbols()
|
||||
: services_dll_name_(SERVICES_DLL_NAME) {
|
||||
services_dll_handle_ = amd::Os::loadLibrary(services_dll_name_.c_str());
|
||||
if (services_dll_handle_ == NULL) {
|
||||
// Do not print, otherwise tests fail when HSA core and services DLLs are
|
||||
// not installed, in which case only ORCA stack is initialized and it is
|
||||
// not an error
|
||||
// amd::log_printf(amd::LOG_INFO, __FILE__, __LINE__,
|
||||
//"Cannot load hsa servicese dll. HSA DLLs may not be installed on the machine."
|
||||
//" OpenCL requirement, returning without error.");
|
||||
return;
|
||||
}
|
||||
|
||||
LOADSYMBOL(HsaGetServicesApiTable)
|
||||
}
|
||||
|
||||
ServicesApiSymbols::~ServicesApiSymbols() {
|
||||
if (services_dll_handle_) {
|
||||
amd::Os::unloadLibrary(services_dll_handle_);
|
||||
services_dll_handle_ = NULL;
|
||||
}
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
|
||||
|
||||
// File: services_symbol_loader.hpp
|
||||
// The main purpose of this file (class ServicesApiSymbols), is to load the HSA
|
||||
// API function symbol HsaGetServicesApiTable() from hsaservices DLL/so module.
|
||||
// This function outputs HsaServicesApiTable which has pointers to the rest of the
|
||||
// hsaservices API functions, which should be used to invoke the API functions.
|
||||
|
||||
#include "services.h"
|
||||
#include "hsainterop.h"
|
||||
#include "hsaagent.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
// In case of change in the name of hsaservices dll name, change the
|
||||
// #define SERVICES_DLL_NAME value. this is the only place the DLL name should
|
||||
// be changed or referred to.
|
||||
#define SERVICES_DLL_NAME "hsaservices" LP64_ONLY("64")
|
||||
|
||||
// Convention: The typedefed function name must be prefixed with pfn_ indicating
|
||||
// it as pointer-to-function.
|
||||
typedef HsaStatus (*pfn_HsaGetServicesApiTable)(const HsaServicesApiTable **api_table);
|
||||
|
||||
// Singleton ServicesApiSymbols class contains the module handle and loaded
|
||||
// symbols of one accessor API accessor function.
|
||||
// To call hsaservices API funciton, instance of this class must be used.
|
||||
// Example:
|
||||
// // In initialization code
|
||||
// const HsaServicesApiTable *servicesapi = NULL;
|
||||
// ServicesApiSymbols::Instance().HsaGetServicesApiTable(&servicesapi);
|
||||
// ...
|
||||
// ...
|
||||
// // Calling the services api.
|
||||
// servicesapi->HsaGetDevices(...);
|
||||
// servicesapi->HsaRegisterMemory(...);
|
||||
class ServicesApiSymbols {
|
||||
public:
|
||||
// Only the access function symbol is loaded, which in turn has pointers to
|
||||
// rest of the hsaservices api.
|
||||
pfn_HsaGetServicesApiTable HsaGetServicesApiTable;
|
||||
static ServicesApiSymbols& Instance() {
|
||||
if (instance_ == NULL) {
|
||||
instance_ = new ServicesApiSymbols();
|
||||
}
|
||||
|
||||
return *instance_;
|
||||
}
|
||||
static void teardown(){
|
||||
if (instance_ != NULL){
|
||||
delete instance_;
|
||||
}
|
||||
}
|
||||
static bool IsDllLoaded(){
|
||||
return Instance().services_dll_handle_ ? true : false;
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
static ServicesApiSymbols* instance_;
|
||||
// Force singleton pattern.
|
||||
explicit ServicesApiSymbols();
|
||||
~ServicesApiSymbols();
|
||||
ServicesApiSymbols(const ServicesApiSymbols &) {}
|
||||
const ServicesApiSymbols &operator=(const ServicesApiSymbols &) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Data.
|
||||
void *services_dll_handle_;
|
||||
const std::string services_dll_name_;
|
||||
};
|
||||
#endif // _OPENCL_RUNTIME_DEVICE_HSA_SERVICES_SYMBOL_LOADER_HPP_
|
||||
@@ -1,97 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
/** @file */
|
||||
|
||||
#ifndef _OPENCL_RUNTIME_DEVICE_HSA_SYSTEM_MEMORY_H_
|
||||
#define _OPENCL_RUNTIME_DEVICE_HSA_SYSTEM_MEMORY_H_
|
||||
|
||||
#include "newcore.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
/**
|
||||
*******************************************************************************
|
||||
* @brief System memory types.
|
||||
* @details The memory option enumerations are used for specifying the various
|
||||
* configurable global system memory allocation options.
|
||||
*******************************************************************************
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Memory option used for requesting cacheable system memory.
|
||||
*/
|
||||
kHsaAmdSystemMemoryTypeDefault = 0,
|
||||
|
||||
/**
|
||||
* Memory option used for requesting system memory with caching disabled.
|
||||
*/
|
||||
kHsaAmdSystemMemoryTypeUncached = 1,
|
||||
|
||||
/**
|
||||
* Memory option used for requesting write-combined system memory.
|
||||
*/
|
||||
kHsaAmdSystemMemoryTypeWriteCombined = 2,
|
||||
|
||||
/**
|
||||
* Shortcut to get the number of supported memory type.
|
||||
*/
|
||||
kHsaAmdSystemMemoryTypeCount = 3
|
||||
} HsaAmdSystemMemoryType;
|
||||
|
||||
/**
|
||||
****************************************************************************
|
||||
* @brief Allocate system memory accessible by all AMD devices in the platform.
|
||||
* @details The HsaAmdAllocateSystemMemory() interface is used for allocating
|
||||
* global system memory accessible (read and write) by the host and all AMD
|
||||
* devices in the platform.
|
||||
*
|
||||
* @param size The allocation size in bytes.
|
||||
* @param alignment The alignment size in bytes for the address of resulting
|
||||
* allocation. If the value is zero, no particular alignment will be applied.
|
||||
* If the value is not zero, it needs to be a power of two and minimum of
|
||||
* sizeof(void*).
|
||||
* @param type Type of system memory.
|
||||
* @param address A pointer to the location of where to return the pointer to
|
||||
* the base of the allocated region of memory.
|
||||
*
|
||||
* @return HsaStatus
|
||||
* @retval kHsaStatusSuccess The requested amount of memory was successfully
|
||||
* allocated.
|
||||
* @retval kHsaStatusOutOfMemory The implementation was unable to allocate the
|
||||
* requested amount of device memory due to memory constraints.
|
||||
* @retval kHsaStatusInvalidArgument An address of NULL was specified, the size
|
||||
* is 0 or the alignment is invalid.
|
||||
*
|
||||
* @see HsaAmdFreeSystemMemory, HsaAmdSystemMemoryType
|
||||
**************************************************************************/
|
||||
COREAPI HsaStatus HsaAmdAllocateSystemMemory(size_t size,
|
||||
size_t alignment,
|
||||
HsaAmdSystemMemoryType type,
|
||||
void **address);
|
||||
|
||||
/**
|
||||
****************************************************************************
|
||||
* @brief Deallocate system memory.
|
||||
* @details The HsaAmdFreeSystemMemory() interface is used for
|
||||
* deallocating global system memory that was previously allocated with
|
||||
* HsaAmdAllocateSystemMemory().
|
||||
*
|
||||
* @param address A pointer to the address to be deallocated.
|
||||
*
|
||||
* @return HsaStatus
|
||||
* @retval kHsaStatusSuccess The requested memory was successfully deallocated.
|
||||
* @retval kHsaStatusInvalidArguement An address of NULL was specified.
|
||||
*
|
||||
* @see HsaAmdAllocateSystemMemory
|
||||
***************************************************************************
|
||||
*/
|
||||
COREAPI HsaStatus HsaAmdFreeSystemMemory(void *address);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
#endif // header guard
|
||||
Αναφορά σε νέο ζήτημα
Block a user