diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 4a5ed53157..6ba0fabdd7 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -1225,7 +1225,7 @@ class Device : public RuntimeObject { virtual device::VirtualDevice* createVirtualDevice(CommandQueue* queue = NULL) = 0; //! Create a program for device. - virtual device::Program* createProgram(option::Options* options = NULL) = 0; + virtual device::Program* createProgram(amd::Program& owner, option::Options* options = NULL) = 0; //! Allocate a chunk of device memory as a cache for a CL memory object virtual device::Memory* createMemory(Memory& owner) const = 0; diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp index 1bc93b23b6..2b400778d4 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.cpp +++ b/projects/clr/rocclr/runtime/device/devprogram.cpp @@ -65,8 +65,9 @@ inline static std::vector splitSpaceSeparatedString(const char *str } // ================================================================================================ -Program::Program(amd::Device& device) +Program::Program(amd::Device& device, amd::Program& owner) : device_(device), + owner_(owner), type_(TYPE_NONE), flags_(0), clBinary_(nullptr), @@ -94,6 +95,12 @@ Program::Program(amd::Device& device) // ================================================================================================ Program::~Program() { clear(); + + /* Delete the undefined memory object */ + for (auto it = undef_mem_obj_.begin(); it != undef_mem_obj_.end(); ++it) { + (*it)->release(); + } + #if defined(USE_COMGR_LIBRARY) for (auto const& kernelMeta : kernelMetadataMap_) { amd::Comgr::destroy_metadata(kernelMeta.second); @@ -3076,10 +3083,18 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { #if defined(USE_COMGR_LIBRARY) amd_comgr_status_t getSymbolFromModule(amd_comgr_symbol_t symbol, void* userData) { - size_t nlen; + size_t nlen = 0; + size_t* userDataInfo = nullptr; amd_comgr_status_t status; amd_comgr_symbol_type_t type; - std::vector* var_names = reinterpret_cast*>(userData); + std::vector* var_names = nullptr; + + /* Unpack the user data */ + SymbolInfo* sym_info = reinterpret_cast(userData); + + if (!sym_info) { + return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT; + } /* Retrieve the symbol info */ status = amd::Comgr::symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME_LENGTH, &nlen); @@ -3101,43 +3116,127 @@ amd_comgr_status_t getSymbolFromModule(amd_comgr_symbol_t symbol, void* userData } /* If symbol type is object(Variable) add it to vector */ - if (type == AMD_COMGR_SYMBOL_TYPE_OBJECT) { - var_names->push_back(std::string(name)); + if ((std::strcmp(name, "") != 0) && (type == sym_info->sym_type)) { + sym_info->var_names->push_back(std::string(name)); } delete[] name; return status; } -#endif /* USE_COMGR_LIBRARY */ -bool Program::getGlobalSymbolsFromCodeObj(std::vector* var_names) const { -#if defined(USE_COMGR_LIBRARY) +bool Program::getSymbolsFromCodeObj(std::vector* var_names, amd_comgr_symbol_type_t sym_type) const { amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS; amd_comgr_data_t dataObject; + SymbolInfo sym_info; + bool ret_val = true; - /* Create comgr data */ - status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &dataObject); - if (status != AMD_COMGR_STATUS_SUCCESS) { - buildLog_ += "COMGR: Cannot create comgr data \n"; - return false; - } + do { + /* Create comgr data */ + status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &dataObject); + if (status != AMD_COMGR_STATUS_SUCCESS) { + buildLog_ += "COMGR: Cannot create comgr data \n"; + ret_val = false; + break; + } - /* Set the binary as a dataObject */ - status = amd::Comgr::set_data(dataObject,static_cast(clBinary_->data().second), - reinterpret_cast(clBinary_->data().first)); - if (status != AMD_COMGR_STATUS_SUCCESS) { - buildLog_ += "COMGR: Cannot set comgr data \n"; - return false; - } + /* Set the binary as a dataObject */ + status = amd::Comgr::set_data(dataObject,static_cast(clBinary_->data().second), + reinterpret_cast(clBinary_->data().first)); + if (status != AMD_COMGR_STATUS_SUCCESS) { + buildLog_ += "COMGR: Cannot set comgr data \n"; + ret_val = false; + break; + } + + /* Pack the user data */ + sym_info.sym_type = sym_type; + sym_info.var_names = var_names; /* Iterate through list of symbols */ - status = amd::Comgr::iterate_symbols(dataObject, getSymbolFromModule, var_names); - if (status != AMD_COMGR_STATUS_SUCCESS) { - buildLog_ += "COMGR: Cannot iterate comgr symbols \n"; + status = amd::Comgr::iterate_symbols(dataObject, getSymbolFromModule, &sym_info); + if (status != AMD_COMGR_STATUS_SUCCESS) { + buildLog_ += "COMGR: Cannot iterate comgr symbols \n"; + ret_val = false; + break; + } + } while (0); + + return ret_val; +} +#endif /* USE_COMGR_LIBRARY */ + +bool Program::getGlobalVarFromCodeObj(std::vector* var_names) const { +#if defined(USE_COMGR_LIBRARY) + return getSymbolsFromCodeObj(var_names, AMD_COMGR_SYMBOL_TYPE_OBJECT); +#else + return true; +#endif +} + +bool Program::getUndefinedVarFromCodeObj(std::vector* var_names) const { +#if defined(USE_COMGR_LIBRARY) + return getSymbolsFromCodeObj(var_names, AMD_COMGR_SYMBOL_TYPE_NOTYPE); +#else + return true; +#endif +} + +bool Program::getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size) { + return owner()->varcallback(as_cl(owner()), var_name.c_str(), var_addr, var_size); +} + +bool Program::defineUndefinedVars() { + size_t address = 0; + size_t hsize = 0; + void* dptr = nullptr; + void* hptr = nullptr; + device::Memory* dev_mem = nullptr; + amd::Memory* amd_mem_obj = nullptr; + std::vector var_names; + + if (!getUndefinedVarFromCodeObj(&var_names)) { return false; } -#endif /* USE_COMGR_LIBRARY */ + + for (auto it = var_names.begin(); it != var_names.end(); ++it) { + if (!getUndefinedVarInfo(*it, &hptr, &hsize)) { + continue; + } + + amd_mem_obj = new (device().GlbCtx()) amd::Buffer(device().GlbCtx(), + CL_MEM_USE_HOST_PTR, hsize); + if (amd_mem_obj == nullptr) { + LogError("[OCL] failed to create a mem object!"); + return false; + } + + if (!amd_mem_obj->create(hptr)) { + LogError("[OCL] failed to create a svm hidden buffer!"); + amd_mem_obj->release(); + return false; + } + + undef_mem_obj_.push_back(amd_mem_obj); + + dev_mem = amd_mem_obj->getDeviceMemory(device()); + if (dev_mem == nullptr) { + LogError("[OCL] failed to create a mem object!"); + return false; + } + + dptr = reinterpret_cast(dev_mem->virtualAddress()); + if (dev_mem == nullptr) { + LogError("[OCL] failed to create a mem object!"); + return false; + } + + if(!defineGlobalVar(it->c_str(), dptr)) { + LogError("[OCL] failed to define global var"); + return false; + } + } return true; } -} + +} /* namespace device*/ diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp index 31b6086621..701fd22fd2 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.hpp +++ b/projects/clr/rocclr/runtime/device/devprogram.hpp @@ -62,6 +62,11 @@ namespace device { class ClBinary; class Kernel; +struct SymbolInfo { + size_t sym_type; + std::vector* var_names; +}; + //! A program object for a specific device. class Program : public amd::HeapObject { public: @@ -79,6 +84,7 @@ class Program : public amd::HeapObject { private: //! The device target for this binary. amd::SharedReference device_; + amd::Program& owner_; //!< owner of this program kernels_t kernels_; //!< The kernel entry points this binary. type_t type_; //!< type of this program @@ -125,9 +131,11 @@ class Program : public amd::HeapObject { CodeObjectMD* metadata_; //!< Runtime metadata #endif + std::vector undef_mem_obj_; + public: //! Construct a section. - Program(amd::Device& device); + Program(amd::Device& device, amd::Program& owner); //! Destroy this binary image. virtual ~Program(); @@ -135,6 +143,8 @@ class Program : public amd::HeapObject { //! Destroy all the kernels void clear(); + amd::Program* owner() const { return &owner_; } + //! Return the compiler options passed to build this program amd::option::Options* getCompilerOptions() const { return programOptions_; } @@ -228,7 +238,8 @@ class Program : public amd::HeapObject { //! Check if SRAM ECC is enable const bool sramEccEnable() const { return (sramEccEnabled_ == 1); } - virtual bool getGlobalSymbolsFromCodeObj(std::vector* var_names) const; + bool getGlobalVarFromCodeObj(std::vector* var_names) const; + bool getUndefinedVarFromCodeObj(std::vector* var_names) const; virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** dptr, size_t* bytes, const char* globalName) const { @@ -316,6 +327,17 @@ class Program : public amd::HeapObject { bool isElf(const char* bin) const { return amd::isElfMagic(bin); } + virtual bool defineGlobalVar(const char* name, void* dptr) { + ShouldNotReachHere(); + return false; + } + +#if defined(USE_COMGR_LIBRARY) + bool getSymbolsFromCodeObj(std::vector* var_names, amd_comgr_symbol_type_t sym_type) const; +#endif + bool getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size); + bool defineUndefinedVars(); + private: //! Compile the device program with LC path bool compileImplLC(const std::string& sourceCode, diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 1671e92e12..2ed35fef6c 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -296,11 +296,11 @@ bool NullDevice::isHsailProgram(amd::option::Options* options) { return true; } -device::Program* NullDevice::createProgram(amd::option::Options* options) { +device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) { if (isHsailProgram(options)) { - return new HSAILProgram(*this); + return new HSAILProgram(*this, owner); } - return new NullProgram(*this); + return new NullProgram(*this, owner); } void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInfo& memInfo, @@ -1148,11 +1148,11 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { } } -device::Program* Device::createProgram(amd::option::Options* options) { +device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) { if (isHsailProgram(options)) { - return new HSAILProgram(*this); + return new HSAILProgram(*this, owner); } - return new Program(*this); + return new Program(*this, owner); } //! Requested devices list as configured by the GPU_DEVICE_ORDINAL diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp index e4134f674f..d0c473f496 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp @@ -59,7 +59,7 @@ class NullDevice : public amd::Device { } //! Create the device program. - virtual device::Program* createProgram(amd::option::Options* options = NULL); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL); //! Just returns NULL for the dummy device virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; } @@ -398,7 +398,7 @@ class Device : public NullDevice, public CALGSLDevice { ) const; //! Create the device program. - virtual device::Program* createProgram(amd::option::Options* options = NULL); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL); //! Attempt to bind with external graphics API's device/context virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext, diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp index 2e4f86e6e1..5e977eb96c 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp @@ -1483,8 +1483,8 @@ bool Program::loadBinary(bool* hasRecompile) { return false; } -HSAILProgram::HSAILProgram(Device& device) - : Program(device), +HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) + : Program(device, owner), rawBinary_(NULL), kernels_(NULL), maxScratchRegs_(0), @@ -1494,8 +1494,8 @@ HSAILProgram::HSAILProgram(Device& device) loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } -HSAILProgram::HSAILProgram(NullDevice& device) - : Program(device), +HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) + : Program(device, owner), rawBinary_(NULL), kernels_(NULL), maxScratchRegs_(0), diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp index ead578304a..79517a9f55 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp @@ -107,7 +107,8 @@ class NullProgram : public device::Program { public: //! Default constructor - NullProgram(NullDevice& nullDev) : device::Program(nullDev), patch_(0) {} + NullProgram(NullDevice& nullDev, amd::Program& owner) + : device::Program(nullDev, owner), patch_(0) {} //! Default destructor ~NullProgram(); @@ -286,7 +287,7 @@ class NullProgram : public device::Program { class Program : public NullProgram { public: //! GPU program constructor - Program(Device& gpuDev) : NullProgram(gpuDev), glbData_(NULL) {} + Program(Device& gpuDev, amd::Program& owner) : NullProgram(gpuDev, owner), glbData_(NULL) {} //! GPU program destructor ~Program(); @@ -441,8 +442,8 @@ class HSAILProgram : public device::Program { public: //! Default constructor - HSAILProgram(Device& device); - HSAILProgram(NullDevice& device); + HSAILProgram(Device& device, amd::Program& owner); + HSAILProgram(NullDevice& device, amd::Program& owner); //! Default destructor ~HSAILProgram(); diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index 7d6c2c94ff..914e588b2f 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -344,12 +344,12 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, return true; } -device::Program* NullDevice::createProgram(amd::option::Options* options) { +device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) { device::Program* program; if (settings().useLightning_) { - program = new LightningProgram(*this); + program = new LightningProgram(*this, owner); } else { - program = new HSAILProgram(*this); + program = new HSAILProgram(*this, owner); } if (program == nullptr) { @@ -1236,12 +1236,12 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { } } -device::Program* Device::createProgram(amd::option::Options* options) { +device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) { device::Program* program; if (settings().useLightning_) { - program = new LightningProgram(*this); + program = new LightningProgram(*this, owner); } else { - program = new HSAILProgram(*this); + program = new HSAILProgram(*this, owner); } if (program == nullptr) { LogError("We failed memory allocation for program!"); diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp index e04abfba94..1e662fea8b 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp @@ -57,7 +57,7 @@ class NullDevice : public amd::Device { } //! Compile the given source code. - virtual device::Program* createProgram(amd::option::Options* options = NULL); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL); //! Just returns NULL for the dummy device virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; } @@ -331,7 +331,7 @@ class Device : public NullDevice { ) const; //! Create the device program. - virtual device::Program* createProgram(amd::option::Options* options = NULL); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL); //! Attempt to bind with external graphics API's device/context virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext, diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp index 0c9553f67d..ad5f9373bf 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp @@ -162,8 +162,8 @@ bool Segment::freeze(bool destroySysmem) { } const static char* Carrizo = "Carrizo"; -HSAILProgram::HSAILProgram(Device& device) - : Program(device), +HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) + : Program(device, owner), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), @@ -181,8 +181,8 @@ HSAILProgram::HSAILProgram(Device& device) loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } -HSAILProgram::HSAILProgram(NullDevice& device) - : Program(device), +HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) + : Program(device, owner), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), @@ -250,6 +250,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_ hsa_code_object_t code_object; code_object.handle = reinterpret_cast(binary); + defineUndefinedVars(); + hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr); if (status != HSA_STATUS_SUCCESS) { buildLog_ += "Error: AMD HSA Code Object loading failed.\n"; @@ -376,6 +378,20 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) { return true; } +bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) { + hsa_status_t hsa_status = HSA_STATUS_SUCCESS; + hsa_agent_t agent; + + agent.handle = 1; + hsa_status = executable_->DefineAgentExternalVariable(name, agent, HSA_VARIABLE_SEGMENT_GLOBAL, dptr); + if(HSA_STATUS_SUCCESS != hsa_status) { + buildLog_ += "Could not define Program External Variable"; + buildLog_ += "\n"; + } + + return (hsa_status == HSA_STATUS_SUCCESS); +} + bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes, const char* global_name) const { uint32_t length = 0; @@ -729,6 +745,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s hsa_code_object_t code_object; code_object.handle = reinterpret_cast(binary); + defineUndefinedVars(); + hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr); if (status != HSA_STATUS_SUCCESS) { buildLog_ += "Error: AMD HSA Code Object loading failed.\n"; diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp index ddc41c0c1d..e21b929462 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp @@ -126,8 +126,8 @@ class HSAILProgram : public device::Program { public: //! Default constructor - HSAILProgram(Device& device); - HSAILProgram(NullDevice& device); + HSAILProgram(Device& device, amd::Program& owner); + HSAILProgram(NullDevice& device, amd::Program& owner); //! Default destructor virtual ~HSAILProgram(); @@ -187,6 +187,7 @@ class HSAILProgram : public device::Program { } } + virtual bool defineGlobalVar(const char* name, void* dptr); virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** dptr, size_t* bytes, const char* globalName) const; @@ -218,13 +219,13 @@ class HSAILProgram : public device::Program { //! \class Lightning Compiler Program class LightningProgram : public HSAILProgram { public: - LightningProgram(NullDevice& device) : HSAILProgram(device) { + LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; machineTarget_ = dev().hwInfo()->machineTargetLC_; } - LightningProgram(Device& device) : HSAILProgram(device) { + LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; machineTarget_ = dev().hwInfo()->machineTargetLC_; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index 8c4f2e2bf4..8e6d2be1c1 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -772,12 +772,12 @@ bool Device::create(bool sramEccEnabled) { return true; } -device::Program* NullDevice::createProgram(amd::option::Options* options) { +device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) { device::Program* program; if (settings().useLightning_) { - program = new LightningProgram(*this); + program = new LightningProgram(*this, owner); } else { - program = new HSAILProgram(*this); + program = new HSAILProgram(*this, owner); } if (program == nullptr) { @@ -819,12 +819,12 @@ void Device::ReleaseExclusiveGpuAccess(VirtualGPU& vgpu) const { vgpusAccess().unlock(); } -device::Program* Device::createProgram(amd::option::Options* options) { +device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) { device::Program* program; if (settings().useLightning_) { - program = new LightningProgram(*this); + program = new LightningProgram(*this, owner); } else { - program = new HSAILProgram(*this); + program = new HSAILProgram(*this, owner); } if (program == nullptr) { diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp index 2ec5b65273..e3e0ff5c36 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.hpp @@ -107,7 +107,7 @@ class NullDevice : public amd::Device { const Settings& settings() const { return reinterpret_cast(*settings_); } //! Construct an HSAIL program object from the ELF assuming it is valid - virtual device::Program* createProgram(amd::option::Options* options = nullptr); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr); const AMDDeviceInfo& deviceInfo() const { return deviceInfo_; } //! Gets the backend device for the Null device type virtual hsa_agent_t getBackendDevice() const { @@ -300,7 +300,7 @@ class Device : public NullDevice { virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = nullptr); //! Construct an HSAIL program object from the ELF assuming it is valid - virtual device::Program* createProgram(amd::option::Options* options = nullptr); + virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr); virtual device::Memory* createMemory(amd::Memory& owner) const; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp index 0155dc2d18..e02c7f97c1 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp @@ -74,7 +74,7 @@ Program::~Program() { releaseClBinary(); } -Program::Program(roc::NullDevice& device) : device::Program(device) { +Program::Program(roc::NullDevice& device, amd::Program& owner) : device::Program(device, owner) { hsaExecutable_.handle = 0; hsaCodeObjectReader_.handle = 0; } @@ -113,6 +113,21 @@ bool Program::initClBinary(char* binaryIn, size_t size) { return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr)); } + +bool Program::defineGlobalVar(const char* name, void* dptr) { + hsa_status_t status = HSA_STATUS_SUCCESS; + hsa_agent_t hsa_device = dev().getBackendDevice(); + + status = hsa_executable_agent_global_variable_define(hsaExecutable_, hsa_device, name, dptr); + if (status != HSA_STATUS_SUCCESS) { + buildLog_ += "Error: Could not define global variable : "; + buildLog_ += hsa_strerror(status); + buildLog_ += "\n"; + } + + return (status == HSA_STATUS_SUCCESS); +} + bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes, const char* global_name) const { hsa_status_t status = HSA_STATUS_SUCCESS; @@ -197,7 +212,7 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, return true; } -HSAILProgram::HSAILProgram(roc::NullDevice& device) : roc::Program(device) { +HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) { xnackEnabled_ = dev().settings().enableXNACK_; sramEccEnabled_ = dev().info().sramEccEnabled_; machineTarget_ = dev().deviceInfo().complibTarget_; @@ -258,6 +273,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_ return false; } + defineUndefinedVars(); + // Load the code object. hsa_code_object_reader_t codeObjectReader; status = hsa_code_object_reader_create_from_memory(data, secSize, &codeObjectReader); @@ -402,8 +419,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_ } -LightningProgram::LightningProgram(roc::NullDevice& device) - : roc::Program(device) { +LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner) + : roc::Program(device, owner) { isLC_ = true; xnackEnabled_ = dev().settings().enableXNACK_; sramEccEnabled_ = dev().info().sramEccEnabled_; @@ -461,6 +478,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s return false; } + defineUndefinedVars(); + // Load the code object. status = hsa_code_object_reader_create_from_memory(binary, binSize, &hsaCodeObjectReader_); if (status != HSA_STATUS_SUCCESS) { diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp index 2831460472..40cdda596e 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp @@ -24,7 +24,7 @@ class Program : public device::Program { public: //! Default constructor - Program(roc::NullDevice& device); + Program(roc::NullDevice& device, amd::Program& owner); //! Default destructor ~Program(); @@ -59,6 +59,7 @@ class Program : public device::Program { //! Disable operator= Program& operator=(const Program&) = delete; + virtual bool defineGlobalVar(const char* name, void* dptr); protected: /* HSA executable */ hsa_executable_t hsaExecutable_; //!< Handle to HSA executable @@ -67,7 +68,7 @@ protected: class HSAILProgram : public roc::Program { public: - HSAILProgram(roc::NullDevice& device); + HSAILProgram(roc::NullDevice& device, amd::Program& owner); virtual ~HSAILProgram(); protected: @@ -83,7 +84,7 @@ private: class LightningProgram : public roc::Program { public: - LightningProgram(roc::NullDevice& device); + LightningProgram(roc::NullDevice& device, amd::Program& owner); virtual ~LightningProgram() {} protected: diff --git a/projects/clr/rocclr/runtime/platform/program.cpp b/projects/clr/rocclr/runtime/platform/program.cpp index 4b328000da..74a9f6f2cb 100644 --- a/projects/clr/rocclr/runtime/platform/program.cpp +++ b/projects/clr/rocclr/runtime/platform/program.cpp @@ -125,7 +125,7 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt } #endif // defined(WITH_COMPILER_LIB) options->oVariables->BinaryIsSpirv = language_ == SPIRV; - device::Program* program = rootDev.createProgram(options); + device::Program* program = rootDev.createProgram(*this, options); if (program == NULL) { return CL_OUT_OF_HOST_MEMORY; } diff --git a/projects/clr/rocclr/runtime/platform/program.hpp b/projects/clr/rocclr/runtime/platform/program.hpp index dda488c90c..2a73f8a94b 100644 --- a/projects/clr/rocclr/runtime/platform/program.hpp +++ b/projects/clr/rocclr/runtime/platform/program.hpp @@ -75,6 +75,10 @@ class Program : public RuntimeObject { SPIRV, Assembly }; + + typedef bool(CL_CALLBACK* VarInfoCallback)(cl_program, std::string, void**, size_t*); + VarInfoCallback varcallback; + private: //! Replaces the compiled program with the new version from HD void StubProgramSource(const std::string& app_name); @@ -179,6 +183,10 @@ class Program : public RuntimeObject { static bool ParseAllOptions(const std::string& options, option::Options& parsedOptions, bool optionChangable = true, bool linkOptsOnly = false); + + void setVarInfoCallBack(VarInfoCallback callback) { + varcallback = callback; + } }; /*! @}