From 20f05c4228a73684ee3b3fb938c2da2684fe1bae Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 18 May 2020 22:40:33 -0400 Subject: [PATCH] SWDEV-236178 - Reorganizing Platform/Modules code for easy access. Change-Id: Ie8920260ffc4ff01e44b48af8cec9ea5aed1aa9b --- rocclr/CMakeLists.txt | 2 + rocclr/hip_code_object.cpp | 458 ++++++++++++ rocclr/hip_code_object.hpp | 132 ++++ rocclr/hip_context.cpp | 1 + rocclr/hip_fatbin.hpp | 29 + rocclr/hip_global.cpp | 202 ++++++ rocclr/hip_global.hpp | 116 +++ rocclr/hip_intercept.cpp | 6 +- rocclr/hip_internal.hpp | 165 +---- rocclr/hip_memory.cpp | 53 +- rocclr/hip_module.cpp | 237 +----- rocclr/hip_platform.cpp | 835 ++++++---------------- rocclr/hip_platform.hpp | 66 +- rocclr/hip_texture.cpp | 54 +- tests/src/runtimeApi/module/hipModule.cpp | 3 +- 15 files changed, 1312 insertions(+), 1047 deletions(-) mode change 100644 => 100755 rocclr/CMakeLists.txt create mode 100755 rocclr/hip_code_object.cpp create mode 100755 rocclr/hip_code_object.hpp create mode 100755 rocclr/hip_fatbin.hpp create mode 100755 rocclr/hip_global.cpp create mode 100755 rocclr/hip_global.hpp mode change 100644 => 100755 rocclr/hip_intercept.cpp mode change 100644 => 100755 rocclr/hip_platform.hpp mode change 100644 => 100755 tests/src/runtimeApi/module/hipModule.cpp diff --git a/rocclr/CMakeLists.txt b/rocclr/CMakeLists.txt old mode 100644 new mode 100755 index 37135af81d..252ac5abfa --- a/rocclr/CMakeLists.txt +++ b/rocclr/CMakeLists.txt @@ -75,10 +75,12 @@ add_definitions(-DBSD_LIBELF) add_library(hip64 OBJECT hip_context.cpp + hip_code_object.cpp hip_device.cpp hip_device_runtime.cpp hip_error.cpp hip_event.cpp + hip_global.cpp hip_memory.cpp hip_module.cpp hip_peer.cpp diff --git a/rocclr/hip_code_object.cpp b/rocclr/hip_code_object.cpp new file mode 100755 index 0000000000..bb957b8cf3 --- /dev/null +++ b/rocclr/hip_code_object.cpp @@ -0,0 +1,458 @@ +#include "hip_code_object.hpp" + +#include + +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "platform/program.hpp" + +namespace hip { + +uint64_t CodeObject::ElfSize(const void *emi) { + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; + const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); + + uint64_t max_offset = ehdr->e_shoff; + uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; + + for (uint16_t i=0; i < ehdr->e_shnum; ++i){ + uint64_t cur_offset = static_cast(shdr[i].sh_offset); + if (max_offset < cur_offset) { + max_offset = cur_offset; + total_size = max_offset; + if(SHT_NOBITS != shdr[i].sh_type) { + total_size += static_cast(shdr[i].sh_size); + } + } + } + return total_size; +} + +bool CodeObject::isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name) { + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; +} + +hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data, + const std::vector& devices, + std::vector>& code_objs) { + std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return hipErrorInvalidKernelFile; + } + + code_objs.resize(devices.size()); + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + unsigned num_code_objs = 0; + for (uint64_t i = 0; i < obheader->numBundles; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { + + std::size_t offset = 0; + if (!std::strncmp(desc->triple, HIP_AMDGCN_AMDHSA_TRIPLE, + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HIP_AMDGCN_AMDHSA_TRIPLE); //For code objects created by CLang + } else if (!std::strncmp(desc->triple, HCC_AMDGCN_AMDHSA_TRIPLE, + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1)) { + offset = sizeof(HCC_AMDGCN_AMDHSA_TRIPLE); //For code objects created by Hcc + } else { + continue; + } + std::string target(desc->triple + offset, desc->tripleSize - offset); + + const void *image = reinterpret_cast( + reinterpret_cast(obheader) + desc->offset); + size_t size = desc->size; + + for (size_t dev = 0; dev < devices.size(); ++dev) { + const char* name = devices[dev]; + + if (!isCompatibleCodeObject(target, name)) { + continue; + } + code_objs[dev] = std::make_pair(image, size); + num_code_objs++; + } + } + if (num_code_objs == devices.size()) { + return hipSuccess; + } else { + DevLogError("hipErrorNoBinaryForGpu: Coudn't find binary for current devices!"); + guarantee(false); + return hipErrorNoBinaryForGpu; + } +} + +hipError_t CodeObject::add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, + size_t binary_size) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + amd::Context* ctx = g_devices[deviceId]->asContext(); + if (CL_SUCCESS != program->addDeviceProgram(*ctx->devices()[0], binary_ptr, + binary_size, false)) { + return hipErrorNotFound; + } + return hipSuccess; +} + +hipError_t CodeObject::build_module(hipModule_t hmod, const std::vector& devices) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + program->setVarInfoCallBack(&getSvarInfo); + if (CL_SUCCESS != program->build(devices, nullptr, nullptr, nullptr, kOptionChangeable, kNewDevProg)) { + DevLogPrintfError("Build error for module: 0x%x \n", hmod); + return hipErrorSharedObjectInitFailed; + } + return hipSuccess; +} + +DynCO::DynCO(): program_(nullptr) {} + +hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { + + amd::ScopedLock lock(dclock_); + + const void *mmap_ptr = nullptr; + size_t mmap_size = 0; + + guarantee(fname || image); + if (fname != nullptr) { + /* We are given file name */ + + if (!amd::Os::MemoryMapFile(fname, &mmap_ptr, &mmap_size)) { + return hipErrorFileNotFound; + } + } else if (image != nullptr) { + /*We are directly given image pointer directly */ + mmap_ptr = image; + } else { + return hipErrorMissingConfiguration; + } + + return loadCodeObjectData(mmap_ptr, mmap_size); +} + +//Dynamic Code Object +DynCO::~DynCO() { + amd::ScopedLock lock(dclock_); + + if (program_ != nullptr) { + program_->release(); + program_ = nullptr; + } + + for (auto& elem : vars_) { + delete elem.second; + } + vars_.clear(); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); +} + +hipError_t DynCO::getDeviceVar(DeviceVar** dvar, std::string var_name, int device_id) { + amd::ScopedLock lock(dclock_); + + auto it = vars_.find(var_name); + if (it == vars_.end()) { + DevLogPrintfError("Cannot find the Var: %s ", var_name.c_str()); + return hipErrorNotFound; + } + + it->second->getDeviceVar(dvar, device_id, module()); + return hipSuccess; +} + +hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) { + amd::ScopedLock lock(dclock_); + + auto it = functions_.find(func_name); + if (it == functions_.end()) { + DevLogPrintfError("Cannot find the function: %s ", func_name.c_str()); + return hipErrorNotFound; + } + + /* See if this could be solved */ + return it->second->getDynFunc(hfunc, reinterpret_cast(as_cl(program_))); +} + +hipError_t DynCO::loadCodeObjectData(const void* mmap_ptr, size_t mmap_size) { + + amd::ScopedLock lock(dclock_); + + /* initialize image it to the mmap_ptr, if this is of no_clang_offload + bundle then they directly pass the image */ + const void* image = mmap_ptr; + std::vector> code_objs; + hipError_t hip_error = extractCodeObjectFromFatBinary(mmap_ptr, + {hip::getCurrentDevice()->devices()[0]->info().name_}, + code_objs); + if (hip_error == hipSuccess) { + image = code_objs[0].first; + } else if(hip_error == hipErrorNoBinaryForGpu) { + return hip_error; + } + + program_ = new amd::Program(*hip::getCurrentDevice()->asContext(), + amd::Program::Language::Binary, mmap_ptr, mmap_size); + if (program_ == NULL) { + return hipErrorOutOfMemory; + } + + program_->setVarInfoCallBack(&getSvarInfo); + + if (CL_SUCCESS != program_->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, + ElfSize(image), false)) { + return hipErrorInvalidKernelFile; + } + + //This has to happen before Program has been built, other wise undef vars fail. + IHIP_RETURN_ONFAIL(populateDynGlobalVars()); + + //program->setVarInfoCallBack(&getSvarInfo); + if(CL_SUCCESS != program_->build(hip::getCurrentDevice()->devices(), nullptr, nullptr, nullptr, + kOptionChangeable, kNewDevProg)) { + return hipErrorSharedObjectInitFailed; + } + + //This has to happen after Program has been built, other wise symbolTable_ not populated. + IHIP_RETURN_ONFAIL(populateDynGlobalFuncs()); + + return hipSuccess; +} + +hipError_t DynCO::populateDynGlobalVars() { + amd::ScopedLock lock(dclock_); + + std::vector var_names; + std::vector undef_var_names; + + device::Program* dev_program + = program_->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + DevLogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + if (!dev_program->getUndefinedVarFromCodeObj(&undef_var_names)) { + DevLogPrintfError("Could not get undefined Variables for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : var_names) { + vars_.insert(std::make_pair(elem, new Var(elem, Var::DeviceVarKind::DVK_Variable, 0, 0, 0, nullptr))); + } + + for (auto& elem : undef_var_names) { + vars_.insert(std::make_pair(elem, new Var(elem, Var::DeviceVarKind::DVK_Texture, 0, 0, 0, nullptr))); + } + + return hipSuccess; +} + +hipError_t DynCO::populateDynGlobalFuncs() { + amd::ScopedLock lock(dclock_); + + std::vector func_names; + device::Program* dev_program + = program_->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + // Get all the global func names from COMGR + if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + DevLogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : func_names) { + functions_.insert(std::make_pair(elem, new Function(elem))); + } + + return hipSuccess; +} + +//Static Code Object +StatCO::StatCO() { +} + +StatCO::~StatCO() { + amd::ScopedLock lock(sclock_); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); + + for (auto& elem : vars_) { + delete elem.second; + } + vars_.clear(); +} + +hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfoType& programs) { + amd::ScopedLock lock(sclock_); + + if (programs.size() > 0) { + return hipSuccess; + } + + std::vector> code_objs; + std::vector devices; + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + devices.push_back(g_devices[dev]->devices()[0]->info().name_); + } + + IHIP_RETURN_ONFAIL(extractCodeObjectFromFatBinary((char*)data, devices, code_objs)); + programs.resize(g_devices.size()); + + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + amd::Context* ctx = g_devices[dev]->asContext(); + amd::Program* program = new amd::Program(*ctx); + if (program == nullptr) { + return hipErrorOutOfMemory; + } + programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)), + new FatBinaryMetaInfo(false, code_objs[dev].first, code_objs[dev].second)); + } + + return hipSuccess; +} + +FatBinaryInfoType* StatCO::addFatBinary(const void* data, bool initialized) { + amd::ScopedLock lock(sclock_); + + if (initialized) { + digestFatBinary(data, modules_[data]); + } + + return &modules_[data]; +} + +hipError_t StatCO::removeFatBinary(FatBinaryInfoType* module) { + amd::ScopedLock lock(sclock_); + + auto vit = vars_.begin(); + while (vit != vars_.end()) { + if (vit->second->moduleInfo() == module) { + delete vit->second; + vit = vars_.erase(vit); + } else { + ++vit; + } + } + + auto fit = functions_.begin(); + while (fit != functions_.end()) { + if (fit->second->moduleInfo() == module) { + delete fit->second; + fit = functions_.erase(fit); + } else { + ++fit; + } + } + + auto mit = modules_.begin(); + while (mit != modules_.end()) { + if (&mit->second == module) { + for (size_t dev=0; dev < g_devices.size(); ++dev) { + delete (*module)[dev].second; + } + mit = modules_.erase(mit); + } else { + ++mit; + } + } + + return hipSuccess; +} + +hipError_t StatCO::registerStatFunction(const void* hostFunction, Function* func) { + amd::ScopedLock lock(sclock_); + + if (functions_.find(hostFunction) != functions_.end()) { + DevLogPrintfError("hostFunctionPtr: 0x%x already exists", hostFunction); + guarantee(false); + } + functions_.insert(std::make_pair(hostFunction, func)); + + return hipSuccess; +} + +hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFunc(hfunc, deviceId); +} + +hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFuncAttr(func_attr, deviceId); +} + +hipError_t StatCO::registerStatGlobalVar(const void* hostVar, Var* var) { + amd::ScopedLock lock(sclock_); + + if (vars_.find(hostVar) != vars_.end()) { + return hipErrorInvalidSymbol; + } + + vars_.insert(std::make_pair(hostVar, var)); + return hipSuccess; +} + +hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + amd::ScopedLock lock(sclock_); + + const auto it = vars_.find(hostVar); + if (it == vars_.end()) { + return hipErrorInvalidSymbol; + } + + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getStatDeviceVar(&dvar, deviceId)); + + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + return hipSuccess; +} + +hipError_t StatCO::getStatGlobalVarByName(std::string hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(sclock_); + + for (auto& elem : vars_) { + if ((elem.second->name() == hostVar) + && (elem.second->module(deviceId) == hmod)) { + *dev_ptr = elem.second->device_ptr(deviceId); + *size_ptr = elem.second->device_size(deviceId); + return hipSuccess; + } + } + + return hipErrorNotFound; +} +}; //namespace: hip diff --git a/rocclr/hip_code_object.hpp b/rocclr/hip_code_object.hpp new file mode 100755 index 0000000000..38d95c0e27 --- /dev/null +++ b/rocclr/hip_code_object.hpp @@ -0,0 +1,132 @@ +#ifndef HIP_CODE_OBJECT_HPP +#define HIP_CODE_OBJECT_HPP + +#include "hip_global.hpp" + +#include + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip_internal.hpp" +#include "device/device.hpp" +#include "platform/program.hpp" + +//Forward Declaration for friend usage +class PlatformState; + +namespace hip { + +//Code Object base class +class CodeObject { +public: + virtual ~CodeObject() {} + + //Functions to add_dev_prog and build + static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, + size_t binary_size); + static hipError_t build_module(hipModule_t hmod, const std::vector& devices); + + //ClangOFFLOADBundle info + #define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" + #define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" + #define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" + + //Clang Offload bundler description & Header + struct __ClangOffloadBundleDesc { + uint64_t offset; + uint64_t size; + uint64_t tripleSize; + const char triple[1]; + }; + + struct __ClangOffloadBundleHeader { + const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; + uint64_t numBundles; + __ClangOffloadBundleDesc desc[1]; + }; + +protected: + CodeObject() {} + //Given an ptr to image or file, extracts to code object + //for corresponding devices + hipError_t extractCodeObjectFromFatBinary(const void*, + const std::vector&, + std::vector>&); + + uint64_t ElfSize(const void* emi); + +private: + bool isCompatibleCodeObject(const std::string& codeobj_target_id, + const char* device_name); + + friend const std::vector& modules(); +}; + +//Dynamic Code Object +class DynCO : public CodeObject { + amd::Monitor dclock_{"Guards Static Code object", true}; + +public: + DynCO(); + virtual ~DynCO(); + + //LoadsCodeObject and its data + hipError_t loadCodeObject(const char* fname, const void* image=nullptr); + hipModule_t module() { return reinterpret_cast(as_cl(program_)); }; + + //Gets GlobalVar/Functions from a dynamically loaded code object + hipError_t getDynFunc(hipFunction_t* hfunc, std::string func_name); + hipError_t getDeviceVar(DeviceVar** dvar, std::string var_name, int deviceId); + +private: + amd::Program* program_; + + //Maps for vars/funcs, could be keyed in with std::string name + std::unordered_map functions_; + std::unordered_map vars_; + + //Load Code Object Data(Vars/UndefinedVars/Funcs) + hipError_t loadCodeObjectData(const void* mmap_ptr, size_t mmap_size); + + //Populate Global Vars/Funcs from an code object(@ module_load) + hipError_t populateDynGlobalFuncs(); + hipError_t populateDynGlobalVars(); +}; + +//Static Code Object +class StatCO: public CodeObject { + amd::Monitor sclock_{"Guards Static Code object", true}; +public: + StatCO(); + virtual ~StatCO(); + + //Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary" + FatBinaryInfoType* addFatBinary(const void* data, bool initialized); + hipError_t removeFatBinary(FatBinaryInfoType* module); + hipError_t digestFatBinary(const void* data, FatBinaryInfoType& programs); + + //Register vars/funcs given to use from __hipRegister[Var/Func] + hipError_t registerStatFunction(const void* hostFunction, Function* func); + hipError_t registerStatGlobalVar(const void* hostVar, Var* var); + + //Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise. + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); + hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + hipError_t getStatGlobalVarByName(std::string hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + +private: + friend class ::PlatformState; + //Populated during __hipRegisterFatBinary + std::unordered_map modules_; + //Populated during __hipRegisterFuncs + std::unordered_map functions_; + //Populated during __hipRegisterVars + std::unordered_map vars_; +}; + +}; //namespace: hip + +#endif /* HIP_CODE_OBJECT_HPP */ diff --git a/rocclr/hip_context.cpp b/rocclr/hip_context.cpp index 2d0abc6add..7c42c65159 100755 --- a/rocclr/hip_context.cpp +++ b/rocclr/hip_context.cpp @@ -20,6 +20,7 @@ #include #include "hip_internal.hpp" +#include "hip_platform.hpp" #include "platform/runtime.hpp" #include "utils/flags.hpp" #include "utils/versions.hpp" diff --git a/rocclr/hip_fatbin.hpp b/rocclr/hip_fatbin.hpp new file mode 100755 index 0000000000..8f936283fb --- /dev/null +++ b/rocclr/hip_fatbin.hpp @@ -0,0 +1,29 @@ +#ifndef HIP_FAT_BINARY_HPP +#define HIP_FAT_BINARY_HPP + +namespace hip { + +class FatBinaryMetaInfo { +public: + FatBinaryMetaInfo(bool built, const void* binary_ptr, size_t binary_size): + built_(built), binary_ptr_(binary_ptr), binary_size_(binary_size) {} + ~FatBinaryMetaInfo() {} + + //Set once the mod has been built + void set_built() { built_ = true; } + + //Accessor for private vars + bool built() const { return built_; } + const void* binary_ptr() const { return binary_ptr_; } + size_t binary_size() const { return binary_size_; } +private: + bool built_; //Set when mod is built. Used in Lazy Binary + const void* binary_ptr_; //Binary image ptr + size_t binary_size_; //Binary Size +}; + +typedef std::vector> FatBinaryInfoType; + +}; /* namespace hip */ + +#endif /* HIP_FAT_BINARY_HPP */ diff --git a/rocclr/hip_global.cpp b/rocclr/hip_global.cpp new file mode 100755 index 0000000000..1bbc157d58 --- /dev/null +++ b/rocclr/hip_global.cpp @@ -0,0 +1,202 @@ +#include "hip_global.hpp" + +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_code_object.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Device Vars +DeviceVar::DeviceVar(std::string name, hipModule_t hmod) : shadowVptr(nullptr), name_(name), + amd_mem_obj_(nullptr), device_ptr_(nullptr), + size_(0) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + if (dev_program == nullptr) { + DevLogPrintfError("Cannot get Device Function for module: 0x%x \n", hmod); + guarantee(false); + } + + if(!dev_program->createGlobalVarObj(&amd_mem_obj_, &device_ptr_, &size_, name.c_str())) { + DevLogPrintfError("Cannot create Global Var obj for symbol: %s \n", name); + guarantee(false); + } + + if (amd_mem_obj_ == nullptr || device_ptr_ == nullptr) { + DevLogPrintfError("Cannot get memory for creating device Var: %s", name.c_str()); + guarantee(false); + } + + amd::MemObjMap::AddMemObj(device_ptr_, amd_mem_obj_); +} + +DeviceVar::~DeviceVar() { + if (device_ptr_ != nullptr) { + amd::MemObjMap::RemoveMemObj(device_ptr_); + amd_mem_obj_->release(); + } + + if (shadowVptr != nullptr) { + textureReference* texRef = reinterpret_cast(shadowVptr); + delete texRef; + shadowVptr = nullptr; + } + + device_ptr_ = nullptr; + size_ = 0; +} + +//Device Functions +DeviceFunc::DeviceFunc(std::string name, hipModule_t hmod) : dflock_("function lock"), + name_(name), kernel_(nullptr) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol *symbol = program->findSymbol(name.c_str()); + if (symbol == nullptr) { + DevLogPrintfError("Cannot find Symbol with name: %s \n", name); + guarantee(false); + } + + kernel_ = new amd::Kernel(*program, *symbol, name); + if (kernel_ == nullptr) { + DevLogPrintfError("Cannot create kernel with name: %s \n", name); + guarantee(false); + } +} + +DeviceFunc::~DeviceFunc() { + if (kernel_ != nullptr) { + kernel_->release(); + } +} + +//Abstract functions +Function::Function(std::string name, FatBinaryInfoType* modules) + : name_(name), modules_(modules) { + dFunc_.resize(g_devices.size()); +} + +Function::~Function() { + for (auto& elem : dFunc_) { + delete elem; + } + name_ = ""; + modules_ = nullptr; +} + +hipError_t Function::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod) { + guarantee(dFunc_.size() == g_devices.size()); + if (dFunc_[ihipGetDevice()] == nullptr) { + dFunc_[ihipGetDevice()] = new DeviceFunc(name_, hmod); + } + *hfunc = dFunc_[ihipGetDevice()]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFunc(hipFunction_t* hfunc, int deviceId) { + guarantee(modules_ != nullptr); + guarantee(deviceId >= 0); + guarantee(deviceId < modules_->size()); + + hipModule_t module = (*modules_)[deviceId].first; + FatBinaryMetaInfo* fb_meta = (*modules_)[deviceId].second; + + if (!fb_meta->built()) { + IHIP_RETURN_ONFAIL(CodeObject::add_program(deviceId, module, fb_meta->binary_ptr(), + fb_meta->binary_size())); + IHIP_RETURN_ONFAIL(CodeObject::build_module(module, g_devices[deviceId]->devices())); + fb_meta->set_built(); + } + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, (*modules_)[deviceId].first); + } + *hfunc = dFunc_[deviceId]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId) { + guarantee(modules_ != nullptr); + guarantee(deviceId >= 0); + guarantee(deviceId < modules_->size()); + + hipModule_t module = (*modules_)[deviceId].first; + FatBinaryMetaInfo* fb_meta = (*modules_)[deviceId].second; + + if (!fb_meta->built()) { + IHIP_RETURN_ONFAIL(CodeObject::add_program(deviceId, module, fb_meta->binary_ptr(), + fb_meta->binary_size())); + IHIP_RETURN_ONFAIL(CodeObject::build_module(module, g_devices[deviceId]->devices())); + fb_meta->set_built(); + } + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, (*modules_)[deviceId].first); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + amd::Kernel* kernel = dFunc_[deviceId]->kernel(); + const device::Kernel::WorkGroupInfo* wginfo = kernel->getDeviceKernel(*devices[deviceId])->workGroupInfo(); + func_attr->localSizeBytes = wginfo->privateMemSize_; + func_attr->sharedSizeBytes = wginfo->localMemSize_; + func_attr->maxDynamicSharedSizeBytes = wginfo->availableLDSSize_ - wginfo->localMemSize_; + func_attr->maxThreadsPerBlock = wginfo->size_; + func_attr->numRegs = wginfo->usedVGPRs_; + + return hipSuccess; +} + +//Abstract Vars +Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfoType* modules) : name_(name), dVarKind_(dVarKind), size_(size), + type_(type), norm_(norm), modules_(modules) { + dVar_.resize(g_devices.size()); +} + +Var::~Var() { + for (auto& elem : dVar_) { + delete elem; + } + modules_ = nullptr; +} + +hipError_t Var::getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod) { + guarantee(deviceId >= 0); + guarantee(deviceId < g_devices.size()); + guarantee(dVar_.size() == g_devices.size()); + + if (dVar_[deviceId] == nullptr) { + dVar_[deviceId] = new DeviceVar(name_, hmod); + } + + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) { + guarantee(deviceId >= 0); + guarantee(deviceId < g_devices.size()); + + hipModule_t module = (*modules_)[deviceId].first; + FatBinaryMetaInfo* fb_meta = (*modules_)[deviceId].second; + + if (!fb_meta->built()) { + IHIP_RETURN_ONFAIL(CodeObject::add_program(deviceId, module, fb_meta->binary_ptr(), + fb_meta->binary_size())); + IHIP_RETURN_ONFAIL(CodeObject::build_module(module, g_devices[deviceId]->devices())); + fb_meta->set_built(); + } + + if (dVar_[deviceId] == nullptr) { + dVar_[deviceId] = new DeviceVar(name_, (*modules_)[deviceId].first); + } + + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +}; //namespace: hip diff --git a/rocclr/hip_global.hpp b/rocclr/hip_global.hpp new file mode 100755 index 0000000000..52274be51a --- /dev/null +++ b/rocclr/hip_global.hpp @@ -0,0 +1,116 @@ +#ifndef HIP_GLOBAL_HPP +#define HIP_GLOBAL_HPP + +#include +#include + +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_fatbin.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Forward Declaration +class CodeObject; + +//Device Structures +class DeviceVar { +public: + DeviceVar(std::string name, hipModule_t hmod); + ~DeviceVar(); + + //Accessors for device ptr and size, populated during constructor. + hipDeviceptr_t device_ptr() const { return device_ptr_; } + size_t size() const { return size_; } + std::string name() const { return name_; } + void* shadowVptr; + +private: + std::string name_; //Name of the var + amd::Memory* amd_mem_obj_; //amd_mem_obj abstraction + hipDeviceptr_t device_ptr_; //Device Pointer + size_t size_; //Size of the var +}; + +class DeviceFunc { +public: + DeviceFunc(std::string name, hipModule_t hmod); + ~DeviceFunc(); + + amd::Monitor dflock_; + + //Converts DeviceFunc to hipFunction_t(used by app) and vice versa. + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + static DeviceFunc* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + + //Accessor for kernel_ and name_ populated during constructor. + std::string name() const { return name_; } + amd::Kernel* kernel() const { return kernel_; } + +private: + std::string name_; //name of the func(not unique identifier) + amd::Kernel* kernel_; //Kernel ptr referencing to ROCclr Symbol +}; + +//Abstract Structures +class Function { +public: + Function(std::string name, FatBinaryInfoType* modules=nullptr); + ~Function(); + + //Return DeviceFunc for this this dynamically loaded module + hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod); + + //Return Device Func & attr . Generate/build if not already done so. + hipError_t getStatFunc(hipFunction_t *hfunc, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId); + void resize_dFunc(size_t size) { dFunc_.resize(size); } + FatBinaryInfoType* moduleInfo() { return modules_; }; + +private: + std::vector dFunc_; //DeviceFuncObj per Device + std::string name_; //name of the func(not unique identifier) + FatBinaryInfoType* modules_; // static module where it is referenced +}; + +class Var { +public: + //Types of variable + enum DeviceVarKind { + DVK_Variable = 0, + DVK_Surface, + DVK_Texture + }; + + Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfoType* modules = nullptr); + ~Var(); + + //Return DeviceVar for this dynamically loaded module + hipError_t getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod); + + //Return DeviceVar for module Generate/build if not already done so. + hipError_t getStatDeviceVar(DeviceVar** dvar, int deviceId); + void resize_dVar(size_t size) { dVar_.resize(size); } + + //Accessor for device_ptrs. + std::string name() const { return name_; } + hipModule_t module(int deviceId) const { return (*modules_)[deviceId].first; } + hipDeviceptr_t device_ptr(int deviceId) const { return dVar_[deviceId]->device_ptr(); } + size_t device_size(int deviceId) const { return dVar_[deviceId]->size(); } + FatBinaryInfoType* moduleInfo() { return modules_; }; + +private: + std::vector dVar_; // DeviceVarObj per Device + std::string name_; // Variable name (not unique identifier) + DeviceVarKind dVarKind_; // Variable kind + size_t size_; // Size of the variable + int type_; // Type(Textures/Surfaces only) + int norm_; // Type(Textures/Surfaces only) + FatBinaryInfoType* modules_; // static module where it is referenced +}; + +}; //namespace: hip +#endif /* HIP_GLOBAL_HPP */ diff --git a/rocclr/hip_intercept.cpp b/rocclr/hip_intercept.cpp old mode 100644 new mode 100755 index c1b7f53534..10eeb43927 --- a/rocclr/hip_intercept.cpp +++ b/rocclr/hip_intercept.cpp @@ -20,6 +20,7 @@ #include "hip/hip_runtime.h" #include "hip_internal.hpp" +#include "hip_platform.hpp" #include "hip_prof_api.h" // HIP API callback/activity @@ -44,8 +45,9 @@ const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream) DevLogPrintfError("Wrong Device Id: %d \n", deviceId); return NULL; } - hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); - if (func == nullptr) { + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); + if (hip_error != hipSuccess) { return NULL; } return hipKernelNameRef(func); diff --git a/rocclr/hip_internal.hpp b/rocclr/hip_internal.hpp index c42bda9fec..a53db4be74 100755 --- a/rocclr/hip_internal.hpp +++ b/rocclr/hip_internal.hpp @@ -80,6 +80,25 @@ typedef struct ihipIpcMemHandle_st { HIP_ERROR_PRINT(hip::g_lastError, __VA_ARGS__) \ return hip::g_lastError; +#define HIP_RETURN_ONFAIL(func) \ + do { \ + hipError_t herror = (func); \ + if (herror != hipSuccess) { \ + HIP_RETURN(herror); \ + } \ + } while (0); + +// Cannot be use in place of HIP_RETURN. +// Refrain from using for external HIP APIs +#define IHIP_RETURN_ONFAIL(func) \ + do { \ + hipError_t herror = (func); \ + if (herror != hipSuccess) { \ + return herror; \ + } \ + } while (0); + + namespace hc { class accelerator; class accelerator_view; @@ -198,17 +217,6 @@ namespace hip { extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread extern amd::HostQueue* getNullStream(); - - struct Function { - amd::Kernel* function_; - amd::Monitor lock_; - - Function(amd::Kernel* f) : function_(f), lock_("function lock") {} - ~Function() { function_->release(); } - hipFunction_t asHipFunction() { return reinterpret_cast(this); } - - static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } - }; }; struct ihipExec_t { @@ -219,138 +227,6 @@ struct ihipExec_t { std::vector arguments_; }; -class PlatformState { - amd::Monitor lock_{"Guards global function map", true}; - - std::unordered_map>> modules_; - bool initialized_{false}; - - void digestFatBinary(const void* data, std::vector>& programs); -public: - void init(); - std::vector>* addFatBinary(const void*data) - { - amd::ScopedLock lock(lock_); - if (initialized_) { - digestFatBinary(data, modules_[data]); - } - return &modules_[data]; - } - void removeFatBinary(std::vector>* module) - { - amd::ScopedLock lock(lock_); - for (auto& mod : modules_) { - if (&mod.second == module) { - modules_.erase(&mod); - return; - } - } - } - - struct RegisteredVar { - public: - RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {} - ~RegisteredVar() {} - - hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; - amd::Memory* amd_mem_obj() const { return amd_mem_obj_; }; - size_t getvarsize() const { return size_; }; - - size_t size_; // Size of the variable - hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. - amd::Memory* amd_mem_obj_; - }; - - struct DeviceFunction { - std::string deviceName; - std::vector< std::pair< hipModule_t, bool > >* modules; - std::vector functions; - }; - enum DeviceVarKind { - DVK_Variable, - DVK_Surface, - DVK_Texture - }; - struct DeviceVar { - DeviceVarKind kind; - void* shadowVptr; - std::string hostVar; - size_t size; - std::vector< std::pair< hipModule_t, bool > >* modules; - std::vector rvars; - bool dyn_undef; - int type; // surface/texture type - int norm; // texture has normalized output - bool shadowAllocated = false; // shadow ptr is allocated on-demand and needs freeing. - }; -private: - class Module { - public: - Module(hipModule_t hip_module_) : hip_module(hip_module_) {} - std::unordered_map functions_; - private: - hipModule_t hip_module; - }; - std::unordered_map module_map_; - - std::unordered_map functions_; - std::unordered_multimap vars_; - // Map from the host shadow symbol to its device name. As different modules - // may have the same name, each symbol is uniquely identified by a pair of - // module handle and its name. - std::unordered_map> symbols_; - - typedef std::pair CodeObjPairType; - std::unordered_map code_obj_; - - static PlatformState* platform_; - - PlatformState() {} - ~PlatformState() {} -public: - static PlatformState& instance() { - if (platform_ == nullptr) { - // __hipRegisterFatBinary() will call this when app starts, thus - // there is no multiple entry issue here. - platform_ = new PlatformState(); - } - return *platform_; - } - - bool unregisterFunc(hipModule_t hmod); - std::vector< std::pair >* unregisterVar(hipModule_t hmod); - - - bool findSymbol(const void *hostVar, hipModule_t &hmod, std::string &devName); - PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); - void registerVarSym(const void *hostVar, hipModule_t hmod, const char *symbolName); - void registerVar(const char* symbolName, const DeviceVar& var); - void registerFunction(const void* hostFunction, const DeviceFunction& func); - - bool registerModFuncs(std::vector& func_names, hipModule_t* module); - bool findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); - bool createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name); - hipFunction_t getFunc(const void* hostFunction, int deviceId); - bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr); - bool getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, - hipDeviceptr_t* dev_ptr, size_t* size_ptr); - bool getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); - - bool getGlobalVarFromSymbol(const void* hostVar, int deviceId, - hipDeviceptr_t* dev_ptr, size_t* size_ptr); - - bool getShadowVarInfo(std::string var_name, hipModule_t hmod, - void** var_addr, size_t* var_size); - void setupArgument(const void *arg, size_t size, size_t offset); - void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); - - void popExec(ihipExec_t& exec); -}; - -constexpr bool kOptionChangeable = true; -constexpr bool kNewDevProg = false; - /// Wait all active streams on the blocking queue. The method enqueues a wait command and /// doesn't stall the current thread extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false); @@ -363,5 +239,6 @@ extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset); extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, size_t* var_size); - +constexpr bool kOptionChangeable = true; +constexpr bool kNewDevProg = false; #endif // HIP_SRC_HIP_INTERNAL_H diff --git a/rocclr/hip_memory.cpp b/rocclr/hip_memory.cpp index 84c8b77f92..2854b0d7eb 100755 --- a/rocclr/hip_memory.cpp +++ b/rocclr/hip_memory.cpp @@ -20,6 +20,7 @@ #include #include "hip_internal.hpp" +#include "hip_platform.hpp" #include "hip_conversions.hpp" #include "platform/context.hpp" #include "platform/command.hpp" @@ -744,18 +745,7 @@ hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeByt size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("cannot find symbol 0x%x \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } - /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - &device_ptr, &sym_size)) { - DevLogPrintfError("Cannot get global var: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, &sym_size)); /* Size Check to make sure offset is correct */ if ((offset + sizeBytes) > sym_size) { @@ -777,18 +767,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); - HIP_RETURN(hipErrorInvalidSymbol); - } - /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - &device_ptr, &sym_size)) { - DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, &sym_size)); /* Size Check to make sure offset is correct */ if ((offset + sizeBytes) > sym_size) { @@ -810,18 +789,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t si size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); - HIP_RETURN(hipErrorInvalidSymbol); - } - /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - &device_ptr, &sym_size)) { - DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, &sym_size)); /* Size Check to make sure offset is correct */ if ((offset + sizeBytes) > sym_size) { @@ -843,18 +811,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBy size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); - HIP_RETURN(hipErrorInvalidSymbol); - } - /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - &device_ptr, &sym_size)) { - DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, &sym_size)); /* Size Check to make sure offset is correct */ if ((offset + sizeBytes) > sym_size) { diff --git a/rocclr/hip_module.cpp b/rocclr/hip_module.cpp index f618254599..383f7a77a9 100755 --- a/rocclr/hip_module.cpp +++ b/rocclr/hip_module.cpp @@ -39,9 +39,8 @@ extern hipError_t ihipLaunchKernel(const void* hostFunction, hipEvent_t stopEvent, int flags); -const std::string& FunctionName(const hipFunction_t f) -{ - return hip::Function::asFunction(f)->function_->name(); +const std::string& FunctionName(const hipFunction_t f) { + return hip::DeviceFunc::asFunction(f)->kernel()->name(); } static uint64_t ElfSize(const void *emi) @@ -65,223 +64,48 @@ static uint64_t ElfSize(const void *emi) return total_size; } -hipError_t hipModuleLoad(hipModule_t* module, const char* fname) -{ - HIP_INIT_API(hipModuleLoad, module, fname); - - const void* mmap_ptr = nullptr; - size_t mmap_size = 0; - - if (!fname) { - HIP_RETURN(hipErrorInvalidValue); - } - - if (!amd::Os::MemoryMapFile(fname, &mmap_ptr, &mmap_size)) { - HIP_RETURN(hipErrorFileNotFound); - } - - HIP_RETURN(ihipModuleLoadData(module, mmap_ptr, mmap_size)); -} - -bool ihipModuleUnregisterGlobal(hipModule_t hmod) { - std::vector< std::pair >* modules = - PlatformState::instance().unregisterVar(hmod); - if (modules != nullptr) { - delete modules; - } - return true; -} - -hipError_t hipModuleUnload(hipModule_t hmod) -{ +hipError_t hipModuleUnload(hipModule_t hmod) { HIP_INIT_API(hipModuleUnload, hmod); - if (hmod == nullptr) { - HIP_RETURN(hipErrorInvalidValue); - } + HIP_RETURN(PlatformState::instance().unloadModule(hmod)); +} - amd::Program* program = as_amd(reinterpret_cast(hmod)); +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { + HIP_INIT_API(hipModuleLoad, module, fname); - if(!PlatformState::instance().unregisterFunc(hmod)) { - DevLogPrintfError("Cannot unregister module: 0x%x \n", hmod); - HIP_RETURN(hipErrorInvalidSymbol); - } - - if(!ihipModuleUnregisterGlobal(hmod)) { - DevLogPrintfError("Cannot unregister Global vars for module: 0x%x \n", hmod); - HIP_RETURN(hipErrorInvalidSymbol); - } - - program->release(); - - HIP_RETURN(hipSuccess); + HIP_RETURN(PlatformState::instance().loadModule(module, fname)); } hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(hipModuleLoadData, module, image); - HIP_RETURN(ihipModuleLoadData(module, image, 0)); + HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); } hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, - unsigned int numOptions, hipJitOption* options, - void** optionsValues) + unsigned int numOptions, hipJitOption* options, + void** optionsValues) { /* TODO: Pass options to Program */ HIP_INIT_API(hipModuleLoadDataEx, module, image); - HIP_RETURN(ihipModuleLoadData(module, image, 0)); + HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); } extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, const std::vector& devices, std::vector>& code_objs); -inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) { - - std::vector undef_vars; - device::Program* dev_program - = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); - - if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { - DevLogPrintfError("Could not get undefined Variables for Module: 0x%x \n", *module); - return false; - } - - for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) { - auto modules = new std::vector >(g_devices.size()); - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, true); - } - - texture* tex_hptr - = new texture(); - memset(tex_hptr, 0x00, sizeof(texture)); - - PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, - reinterpret_cast(tex_hptr), - it->c_str(), - sizeof(*tex_hptr), - modules, - std::vector{g_devices.size()}, - true, - /*type*/ 0, - /*norm*/ 0}; - PlatformState::instance().registerVar(it->c_str(), dvar); - } - - return true; -} - -inline bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { - - std::vector func_names; - device::Program* dev_program - = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); - - // Get all the global func names from COMGR - if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { - DevLogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", *module); - return false; - } - - return PlatformState::instance().registerModFuncs(func_names, module); -} - - -inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) { - - size_t var_size = 0; - hipDeviceptr_t device_ptr = nullptr; - std::vector var_names; - - device::Program* dev_program - = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); - - if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { - DevLogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", *module); - return false; - } - - for (auto it = var_names.begin(); it != var_names.end(); ++it) { - auto modules = new std::vector >(g_devices.size()); - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, true); - } - - PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, - nullptr, - it->c_str(), - 0, - modules, - std::vector{g_devices.size()}, - false, - /*type*/ 0, - /*norm*/ 0}; - PlatformState::instance().registerVar(it->c_str(), dvar); - } - - return true; -} - -hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size) -{ - /* initialize image it to the mmap_ptr, if this is of no_clang_offload bundle then they directly pass the image */ - const void* image = mmap_ptr; - std::vector> code_objs; - hipError_t code_obj_err = __hipExtractCodeObjectFromFatBinary(mmap_ptr, - {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs); - if (code_obj_err == hipSuccess) { - image = code_objs[0].first; - } else if(code_obj_err == hipErrorNoBinaryForGpu) { - return code_obj_err; - } - - amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext(), - amd::Program::Language::Binary, mmap_ptr, mmap_size); - if (program == NULL) { - return hipErrorOutOfMemory; - } - - program->setVarInfoCallBack(&getSvarInfo); - - if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, - ElfSize(image), false)) { - return hipErrorInvalidKernelFile; - } - - *module = reinterpret_cast(as_cl(program)); - - if (!ihipModuleRegisterGlobal(program, module)) { - return hipErrorSharedObjectSymbolNotFound; - } - - if (!ihipModuleRegisterUndefined(program, module)) { - return hipErrorSharedObjectSymbolNotFound; - } - - if (CL_SUCCESS != program->build(hip::getCurrentDevice()->devices(), nullptr, nullptr, nullptr, - kOptionChangeable, kNewDevProg)) { - return hipErrorSharedObjectInitFailed; - } - - if (!ihipModuleRegisterFunc(program, module)) { - return hipErrorSharedObjectSymbolNotFound; - } - - return hipSuccess; -} - -hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) -{ +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) { HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); - if (!PlatformState::instance().findModFunc(hfunc, hmod, name)) { + if (hipSuccess != PlatformState::instance().getDynFunc(hfunc, hmod, name)) { DevLogPrintfError("Cannot find the function: %s for module: 0x%x \n", name, hmod); HIP_RETURN(hipErrorNotFound); } + HIP_RETURN(hipSuccess); } @@ -290,8 +114,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, - dptr, bytes)) { + if (hipSuccess != PlatformState::instance().getDynGlobalVar(name, ihipGetDevice(), hmod, dptr, bytes)) { DevLogPrintfError("Cannot find global Var: %s for module: 0x%x at device: %d \n", name, hmod, ihipGetDevice()); HIP_RETURN(hipErrorNotFound); @@ -307,12 +130,12 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc HIP_RETURN(hipErrorInvalidValue); } - hip::Function* function = hip::Function::asFunction(hfunc); + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(hfunc); if (function == nullptr) { HIP_RETURN(hipErrorInvalidHandle); } - amd::Kernel* kernel = function->function_; + amd::Kernel* kernel = function->kernel(); if (kernel == nullptr) { HIP_RETURN(hipErrorInvalidDeviceFunction); } @@ -365,9 +188,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(hipFuncGetAttributes, attr, func); - if (!PlatformState::instance().getFuncAttr(func, attr)) { - HIP_RETURN(hipErrorInvalidDeviceFunction); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatFuncAttr(attr, func, ihipGetDevice())); HIP_RETURN(hipSuccess); } @@ -383,10 +204,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, HIP_INIT_API(ihipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); - hip::Function* function = hip::Function::asFunction(f); - amd::Kernel* kernel = function->function_; + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(f); + amd::Kernel* kernel = function->kernel(); - amd::ScopedLock lock(function->lock_); + amd::ScopedLock lock(function->dflock_); hip::Event* eStart = reinterpret_cast(startEvent); hip::Event* eStop = reinterpret_cast(stopEvent); @@ -557,7 +378,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, size_t sharedMemBytes, hipStream_t stream) { - HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr, nullptr, 0)); } @@ -571,7 +392,7 @@ extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, hipEvent_t stopEvent, int flags) { - HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags)); } @@ -583,10 +404,8 @@ hipError_t hipLaunchCooperativeKernel(const void* f, sharedMemBytes, hStream); int deviceId = ihipGetDevice(); - hipFunction_t func = PlatformState::instance().getFunc(f, deviceId); - if (func == nullptr) { - HIP_RETURN(hipErrorInvalidDeviceFunction); - } + hipFunction_t func = nullptr; + HIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, f, deviceId)); HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, blockDim.x, blockDim.y, blockDim.z, @@ -650,7 +469,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL for (size_t dev = 0; dev < g_devices.size(); ++dev) { // Find the matching device and request the kernel function if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { - func = PlatformState::instance().getFunc(launch.func, dev); + IHIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, launch.func, dev)); // Save ROCclr index of the first device in the launch if (i == 0) { firstDevice = queue->vdev()->device().index(); @@ -714,7 +533,7 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getTexRef(name, hmod, texRef)) { + if (!PlatformState::instance().getDynTexRef(name, hmod, texRef)) { DevLogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", name, hmod); HIP_RETURN(hipErrorNotFound); diff --git a/rocclr/hip_platform.cpp b/rocclr/hip_platform.cpp index 5a96bcbc68..23a1b3afc6 100755 --- a/rocclr/hip_platform.cpp +++ b/rocclr/hip_platform.cpp @@ -20,6 +20,7 @@ #include #include +#include "hip_platform.hpp" #include "hip_internal.hpp" #include "platform/program.hpp" #include "platform/runtime.hpp" @@ -39,23 +40,6 @@ struct __CudaFatBinaryWrapper { void* dummy1; }; -#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" -#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" -#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-" - -struct __ClangOffloadBundleDesc { - uint64_t offset; - uint64_t size; - uint64_t tripleSize; - const char triple[1]; -}; - -struct __ClangOffloadBundleHeader { - const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1]; - uint64_t numBundles; - __ClangOffloadBundleDesc desc[1]; -}; - hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name); @@ -85,61 +69,7 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id, return codeobj_target_id == short_name; } -// Extracts code objects from fat binary in data for device names given in devices. -// Returns true if code objects are extracted successfully. -hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, - const std::vector& devices, - std::vector>& code_objs) -{ - std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1); - if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { - return hipErrorInvalidKernelFile; - } - - code_objs.resize(devices.size()); - const auto obheader = reinterpret_cast(data); - const auto* desc = &obheader->desc[0]; - unsigned num_code_objs = 0; - for (uint64_t i = 0; i < obheader->numBundles; ++i, - desc = reinterpret_cast( - reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { - - std::size_t offset = 0; - if (!std::strncmp(desc->triple, HIP_AMDGCN_AMDHSA_TRIPLE, - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1)) { - offset = sizeof(HIP_AMDGCN_AMDHSA_TRIPLE); //For code objects created by CLang - } else if (!std::strncmp(desc->triple, HCC_AMDGCN_AMDHSA_TRIPLE, - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1)) { - offset = sizeof(HCC_AMDGCN_AMDHSA_TRIPLE); //For code objects created by Hcc - } else { - continue; - } - std::string target(desc->triple + offset, desc->tripleSize - offset); - - const void *image = reinterpret_cast( - reinterpret_cast(obheader) + desc->offset); - size_t size = desc->size; - - for (size_t dev = 0; dev < devices.size(); ++dev) { - const char* name = devices[dev]; - - if (!isCompatibleCodeObject(target, name)) { - continue; - } - code_objs[dev] = std::make_pair(image, size); - num_code_objs++; - } - } - if (num_code_objs == devices.size()) { - return hipSuccess; - } else { - DevLogError("hipErrorNoBinaryForGpu: Coudn't find binary for current devices!"); - guarantee(false); //Aborting the program - return hipErrorNoBinaryForGpu; - } -} - -extern "C" std::vector>* __hipRegisterFatBinary(const void* data) +extern "C" hip::FatBinaryInfoType* __hipRegisterFatBinary(const void* data) { const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { @@ -151,169 +81,6 @@ extern "C" std::vector>* __hipRegisterFatBinary(con return PlatformState::instance().addFatBinary(fbwrapper->binary); } -void PlatformState::digestFatBinary(const void* data, std::vector>& programs) -{ - if (programs.size() > 0) { - return; - } - - std::vector> code_objs; - std::vector devices; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - devices.push_back(g_devices[dev]->devices()[0]->info().name_); - } - - if (hipSuccess != __hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) { - return; - } - - programs.resize(g_devices.size()); - - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - amd::Context* ctx = g_devices[dev]->asContext(); - amd::Program* program = new amd::Program(*ctx); - if (program == nullptr) { - return; - } - programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); - code_obj_.insert(std::make_pair(program, std::make_pair(code_objs[dev].first, code_objs[dev].second))); - } -} - -void PlatformState::init() -{ - amd::ScopedLock lock(lock_); - - if(initialized_ || g_devices.empty()) { - return; - } - initialized_ = true; - - for (auto& it : modules_) { - digestFatBinary(it.first, it.second); - } - for (auto& it : functions_) { - it.second.functions.resize(g_devices.size()); - } - for (auto& it : vars_) { - it.second.rvars.resize(g_devices.size()); - } -} - -bool PlatformState::unregisterFunc(hipModule_t hmod) { - amd::ScopedLock lock(lock_); - auto mod_it = module_map_.find(hmod); - if (mod_it != module_map_.cend()) { - PlatformState::Module* mod_ptr = mod_it->second; - if(mod_ptr != nullptr) { - for (auto func_it = mod_ptr->functions_.begin(); func_it != mod_ptr->functions_.end(); ++func_it) { - PlatformState::DeviceFunction &devFunc = func_it->second; - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - if (devFunc.functions[dev] != 0) { - hip::Function* f = reinterpret_cast(devFunc.functions[dev]); - delete f; - } - } - delete devFunc.modules; - } - delete mod_ptr; - } - module_map_.erase(mod_it); - } - return true; -} - -std::vector< std::pair >* PlatformState::unregisterVar(hipModule_t hmod) { - amd::ScopedLock lock(lock_); - std::vector< std::pair >* rmodules = nullptr; - auto it = vars_.begin(); - while (it != vars_.end()) { - DeviceVar& dvar = it->second; - if ((*dvar.modules)[0].first == hmod) { - rmodules = dvar.modules; - if (dvar.shadowAllocated) { - texture* tex_hptr - = reinterpret_cast *>(dvar.shadowVptr); - delete tex_hptr; - } - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - if (dvar.rvars[dev].getdeviceptr()) { - amd::MemObjMap::RemoveMemObj(dvar.rvars[dev].getdeviceptr()); - dvar.rvars[dev].amd_mem_obj()->release(); - } - } - vars_.erase(it++); - } else { - ++it; - } - } - return rmodules; -} - -PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) { - DeviceVar* dvar = nullptr; - if (hmod != nullptr) { - // If module is provided, then get the var only from that module - auto var_range = vars_.equal_range(hostVar); - for (auto it = var_range.first; it != var_range.second; ++it) { - if ((*it->second.modules)[deviceId].first == hmod) { - dvar = &(it->second); - break; - } - } - } else { - // If var count is < 2, return the var - if (vars_.count(hostVar) < 2) { - auto it = vars_.find(hostVar); - dvar = ((it == vars_.end()) ? nullptr : &(it->second)); - } else { - // If var count is > 2, return the original var, - // if original var count != 1, return vars_.end()/Invalid - size_t orig_global_count = 0; - auto var_range = vars_.equal_range(hostVar); - for (auto it = var_range.first; it != var_range.second; ++it) { - // when dyn_undef is set, it is a shadow var - if (it->second.dyn_undef == false) { - ++orig_global_count; - dvar = &(it->second); - } - } - dvar = ((orig_global_count == 1) ? dvar : nullptr); - } - } - - return dvar; -} - -bool PlatformState::findSymbol(const void *hostVar, - hipModule_t &hmod, std::string &symbolName) { - auto it = symbols_.find(hostVar); - if (it != symbols_.end()) { - hmod = it->second.first; - symbolName = it->second.second; - return true; - } - DevLogPrintfError("Could not find the Symbol: %s \n", symbolName.c_str()); - return false; -} - -void PlatformState::registerVarSym(const void* hostVar, hipModule_t hmod, const char* symbolName) { - amd::ScopedLock lock(lock_); - symbols_.insert(std::make_pair(hostVar, std::make_pair(hmod, std::string(symbolName)))); -} - -void PlatformState::registerVar(const char* hostvar, - const DeviceVar& rvar) { - amd::ScopedLock lock(lock_); - vars_.insert(std::make_pair(std::string(hostvar), rvar)); -} - -void PlatformState::registerFunction(const void* hostFunction, - const DeviceFunction& func) { - amd::ScopedLock lock(lock_); - functions_.insert(std::make_pair(hostFunction, func)); -} - bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) { device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); @@ -344,15 +111,17 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, void** var_addr, size_t* var_size) { - DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod); - if (dvar != nullptr) { - *var_addr = dvar->shadowVptr; - *var_size = dvar->size; + + amd::ScopedLock lock(lock_); + if (hipSuccess == getDynGlobalVar(var_name.c_str(), ihipGetDevice(), hmod, var_addr, var_size)) { return true; - } else { - DevLogPrintfError("Cannot find Var name: %s in module: 0x%x \n", var_name.c_str(), hmod); - return false; } + + if (hipSuccess == getStatGlobalVarByName(var_name, ihipGetDevice(), hmod, var_addr, var_size)) { + return true; + } + + return false; } bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr, @@ -361,275 +130,6 @@ bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** va var_addr, var_size); } -bool PlatformState::registerModFuncs(std::vector& func_names, hipModule_t* module) { - amd::ScopedLock lock(lock_); - PlatformState::Module* mod_ptr = new PlatformState::Module(*module); - - for (auto it = func_names.begin(); it != func_names.end(); ++it) { - auto modules = new std::vector >(g_devices.size()); - for (size_t dev = 0; dev < g_devices.size(); ++dev) { - modules->at(dev) = std::make_pair(*module, true); - } - - PlatformState::DeviceFunction dfunc{*it, modules, - std::vector(g_devices.size(), 0)}; - mod_ptr->functions_.insert(std::make_pair(*it, dfunc)); - } - - module_map_.insert(std::make_pair(*module, mod_ptr)); - return true; -} - -bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { - amd::ScopedLock lock(lock_); - - auto mod_it = module_map_.find(hmod); - if (mod_it != module_map_.cend()) { - assert(mod_it->second != nullptr); - auto func_it = mod_it->second->functions_.find(name); - if (func_it != mod_it->second->functions_.cend()) { - PlatformState::DeviceFunction& devFunc = func_it->second; - if (devFunc.functions[ihipGetDevice()] == 0) { - if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) { - DevLogPrintfError("Could not create a function: %s at module: 0x%x \n", name, hmod); - return false; - } - } - *hfunc = devFunc.functions[ihipGetDevice()]; - return true; - } - } - DevLogPrintfError("Cannot find module: 0x%x in PlatformState Module Map \n", hmod); - return false; -} - -bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { - amd::Program* program = as_amd(reinterpret_cast(hmod)); - - const amd::Symbol* symbol = program->findSymbol(name); - if (!symbol) { - DevLogPrintfError("Cannot find Symbol with name: %s \n", name); - return false; - } - - amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); - if (!kernel) { - DevLogPrintfError("Could not create a new kernel with name: %s \n", name); - return false; - } - - hip::Function* f = new hip::Function(kernel); - if (!f) { - DevLogPrintfError("Could not create a new function with name: %s \n", name); - return false; - } - - *hfunc = f->asHipFunction(); - - return true; -} - - -hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { - amd::ScopedLock lock(lock_); - const auto it = functions_.find(hostFunction); - if (it != functions_.cend()) { - PlatformState::DeviceFunction& devFunc = it->second; - if (devFunc.functions[deviceId] == 0) { - hipModule_t module = (*devFunc.modules)[deviceId].first; - if (!(*devFunc.modules)[deviceId].second) { - amd::Program* program = as_amd(reinterpret_cast(module)); - amd::Context* ctx = g_devices[deviceId]->asContext(); - auto code_obj_it = code_obj_.find(program); - if (code_obj_.end() == code_obj_it) { - DevLogError("Cannot find image & size for static symbols"); - guarantee(false); //Aborting the program - return nullptr; - } - if (CL_SUCCESS != program->addDeviceProgram(*ctx->devices()[0], code_obj_it->second.first, - code_obj_it->second.second, false)) { - DevLogError("Cannot add Device Program"); - guarantee(false); //Aborting the program - return nullptr; - } - program->setVarInfoCallBack(&getSvarInfo); - if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr, - kOptionChangeable, kNewDevProg)) { - DevLogPrintfError("Build error for module: 0x%x at device: %u \n", module, deviceId); - return nullptr; - } - (*devFunc.modules)[deviceId].second = true; - } - hipFunction_t function = nullptr; - if (createFunc(&function, module, devFunc.deviceName.c_str()) && - function != nullptr) { - devFunc.functions[deviceId] = function; - } else { - DevLogPrintfError("__hipRegisterFunction cannot find kernel %s for device %d\n", - devFunc.deviceName.c_str(), deviceId); - return nullptr; - } - } - return devFunc.functions[deviceId]; - } - DevLogPrintfError("Cannot find function: 0x%x in PlatformState \n", hostFunction); - return nullptr; -} - -bool PlatformState::getFuncAttr(const void* hostFunction, - hipFuncAttributes* func_attr) { - if (func_attr == nullptr) { - return false; - } - - const auto it = functions_.find(hostFunction); - if (it == functions_.cend()) { - DevLogPrintfError("Cannot find hostFunction 0x%x \n", hostFunction); - return false; - } - - PlatformState::DeviceFunction& devFunc = it->second; - int deviceId = ihipGetDevice(); - - /* If module has not been initialized yet, build the kernel now*/ - if (!(*devFunc.modules)[deviceId].second) { - if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { - DevLogPrintfError("Cannot get hostFunction: 0x%x for deviceId:%d \n", hostFunction, deviceId); - return false; - } - } - - amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); - if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { - DevLogPrintfError("Cannot get Func attributes for function: %s \n", - devFunc.deviceName.c_str()); - return false; - } - return true; -} - -bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) { - amd::ScopedLock lock(lock_); - DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod); - if (dvar == nullptr) { - DevLogPrintfError("Cannot find var:%s for creating texture reference at module: 0x%x \n", - hostVar, hmod); - return false; - } - - switch (dvar->kind) { - case PlatformState::DVK_Variable: - // TODO: Need to define a target-specific symbol info to indicate the device - // variable kind, i.e. regular variable, texture or surface. - // Before that, have to assume the specified variable is a texture or - // surface reference variable. - dvar->kind = DVK_Texture; - // FALL THROUGH - case PlatformState::DVK_Texture: - break; - default: - // If it's already used as non-texture variable, bail out. - return false; - } - - if (!dvar->shadowVptr) { - dvar->shadowVptr = new texture{}; - dvar->shadowAllocated = true; - } - *texRef = reinterpret_cast(dvar->shadowVptr); - registerVarSym(dvar->shadowVptr, hmod, hostVar); - - return true; -} - -bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, - hipDeviceptr_t* dev_ptr, size_t* size_ptr) { - amd::ScopedLock lock(lock_); - DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod); - if (dvar != nullptr) { - if (dvar->rvars[deviceId].getdeviceptr() == nullptr) { - size_t sym_size = 0; - hipDeviceptr_t device_ptr = nullptr; - amd::Memory* amd_mem_obj = nullptr; - - if (!(*dvar->modules)[deviceId].second) { - amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); - amd::Context* ctx = g_devices[deviceId]->asContext(); - auto code_obj_it = code_obj_.find(program); - if (code_obj_.end() == code_obj_it) { - DevLogError("Cannot find image & size for static symbols"); - guarantee(false); //Aborting the program - return false; - } - if (CL_SUCCESS != program->addDeviceProgram(*ctx->devices()[0], code_obj_it->second.first, - code_obj_it->second.second, false)) { - DevLogError("Cannot add Device Program"); - guarantee(false) //Aborting the program - return false; - } - program->setVarInfoCallBack(&getSvarInfo); - if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr, - kOptionChangeable, kNewDevProg)) { - DevLogPrintfError("Build Failure for module: 0x%x \n", hmod); - return false; - } - (*dvar->modules)[deviceId].second = true; - } - if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first, - &amd_mem_obj, &device_ptr, &sym_size)) - && (device_ptr != nullptr)) { - dvar->rvars[deviceId].size_ = sym_size; - dvar->rvars[deviceId].devicePtr_ = device_ptr; - dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; - amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); - } else { - DevLogPrintfError("__hipRegisterVar cannot find Var: %s for deviceId: 0x%x \n", - dvar->hostVar.c_str(), deviceId); - return false; - } - } - *size_ptr = dvar->rvars[deviceId].getvarsize(); - *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); - return true; - } else { - DevLogPrintfError("Could not find global var: %s at module:0x%x \n", hostVar, hmod); - return false; - } -} - -bool PlatformState::getGlobalVarFromSymbol(const void* hostVar, int deviceId, - hipDeviceptr_t* dev_ptr, - size_t* size_ptr) { - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(hostVar, hmod, symbolName)) { - return false; - } - return PlatformState::instance().getGlobalVar(symbolName.c_str(), - ihipGetDevice(), hmod, - dev_ptr, size_ptr); -} - -void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { - auto& arguments = execStack_.top().arguments_; - - if (arguments.size() < offset + size) { - arguments.resize(offset + size); - } - - ::memcpy(&arguments[offset], arg, size); -} - -void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, - hipStream_t stream) { - execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); -} - -void PlatformState::popExec(ihipExec_t& exec) { - exec = std::move(execStack_.top()); - execStack_.pop(); -} - namespace { const int HIP_ENABLE_DEFERRED_LOADING{[] () { char *var = getenv("HIP_ENABLE_DEFERRED_LOADING"); @@ -638,7 +138,7 @@ const int HIP_ENABLE_DEFERRED_LOADING{[] () { } /* namespace */ extern "C" void __hipRegisterFunction( - std::vector >* modules, + hip::FatBinaryInfoType* modules, const void* hostFunction, char* deviceFunction, const char* deviceName, @@ -647,14 +147,16 @@ extern "C" void __hipRegisterFunction( uint3* bid, dim3* blockDim, dim3* gridDim, - int* wSize) -{ - PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; - PlatformState::instance().registerFunction(hostFunction, func); + int* wSize) { + hip::Function* func = new hip::Function(std::string(deviceName), modules); + PlatformState::instance().registerStatFunction(hostFunction, func); if (!HIP_ENABLE_DEFERRED_LOADING) { HIP_INIT(); - for (size_t i = 0; i < g_devices.size(); ++i) { - PlatformState::instance().getFunc(hostFunction, i); + hipFunction_t hfunc = nullptr; + hipError_t hip_error = hipSuccess; + for (size_t dev_idx = 0; dev_idx < g_devices.size(); ++dev_idx) { + hip_error = PlatformState::instance().getStatFunc(&hfunc, hostFunction, dev_idx); + guarantee(hip_error == hipSuccess); } } } @@ -665,7 +167,7 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - std::vector >* modules, // The device modules containing code object + hip::FatBinaryInfoType* modules, // The device modules containing code object void* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code @@ -674,70 +176,32 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { - PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, - var, - std::string{hostVar}, - size, - modules, - std::vector{g_devices.size()}, - false, - /*type*/ 0, - /*norm*/ 0}; - - PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, nullptr, deviceVar); + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, size, 0, 0, modules); + PlatformState::instance().registerStatGlobalVar(var, var_ptr); } -extern "C" void __hipRegisterSurface(std::vector>* - modules, // The device modules containing code object +extern "C" void __hipRegisterSurface(hip::FatBinaryInfoType* modules, // The device modules containing code object void* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code int type, int ext) { - PlatformState::DeviceVar dvar{PlatformState::DVK_Surface, - var, - std::string{hostVar}, - sizeof(surfaceReference), // Copy whole surfaceReference - modules, - std::vector{g_devices.size()}, - false, - type, - /*norm*/ 0}; - PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, nullptr, deviceVar); + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, sizeof(surfaceReference), 0, 0, modules); + PlatformState::instance().registerStatGlobalVar(var, var_ptr); } -extern "C" void __hipRegisterTexture(std::vector>* - modules, // The device modules containing code object +extern "C" void __hipRegisterTexture(hip::FatBinaryInfoType* modules, // The device modules containing code object void* var, // The shadow variable in host code char* hostVar, // Variable name in host code char* deviceVar, // Variable name in device code int type, int norm, int ext) { - PlatformState::DeviceVar dvar{PlatformState::DVK_Texture, - var, - std::string{hostVar}, - sizeof(textureReference), // Copy whole textureReference so far. - modules, - std::vector{g_devices.size()}, - false, - type, - norm}; - PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, nullptr, deviceVar); + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, sizeof(textureReference), 0, 0, modules); + PlatformState::instance().registerStatGlobalVar(var, var_ptr); } -extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) +extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfoType* modules) { HIP_INIT(); - std::for_each(modules->begin(), modules->end(), [](std::pair module){ - if (module.first != nullptr) { - as_amd(reinterpret_cast(module.first))->release(); - } - }); - if (modules->size() > 0) { - PlatformState::instance().unregisterVar((*modules)[0].first); - } PlatformState::instance().removeFatBinary(modules); } @@ -808,8 +272,9 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) DevLogPrintfError("Wrong DeviceId: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } - hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); - if (func == nullptr) { + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); + if ((hip_error != hipSuccess) || (func == nullptr)) { DevLogPrintfError("Could not retrieve hostFunction: 0x%x \n", hostFunction); HIP_RETURN(hipErrorInvalidDeviceFunction); } @@ -830,38 +295,20 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } - size_t size = 0; - if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - devPtr, &size)) { - DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", - symbolName.c_str(), ihipGetDevice()); - HIP_RETURN(hipErrorInvalidSymbol); - } + hipError_t hip_error = hipSuccess; + size_t sym_size = 0; + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size)); + HIP_RETURN(hipSuccess); } hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); - hipModule_t hmod; - std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { - DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); - HIP_RETURN(hipErrorInvalidSymbol); - } - hipDeviceptr_t devPtr = nullptr; - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, - &devPtr, sizePtr)) { - DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", - symbolName.c_str(), ihipGetDevice()); - HIP_RETURN(hipErrorInvalidSymbol); - } + hipDeviceptr_t device_ptr = nullptr; + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr)); + HIP_RETURN(hipSuccess); } @@ -897,8 +344,8 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( const amd::Device& device, hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) { - hip::Function* function = hip::Function::asFunction(func); - const amd::Kernel& kernel = *function->function_; + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func); + const amd::Kernel& kernel = *function->kernel(); const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo(); if (bCalcPotentialBlkSz == false) { @@ -989,9 +436,10 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, if ((gridSize == nullptr) || (blockSize == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } - hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); - if (func == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); } const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; int max_blocks_per_grid = 0; @@ -1093,9 +541,10 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, HIP_RETURN(hipErrorInvalidValue); } - hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); - if (func == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); } const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; @@ -1118,9 +567,10 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, HIP_RETURN(hipErrorInvalidValue); } - hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); - if (func == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); } const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; @@ -1290,10 +740,10 @@ const std::vector& modules() { if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) continue; - const auto obheader = reinterpret_cast(&bundle[0]); + const auto obheader = reinterpret_cast(&bundle[0]); const auto* desc = &obheader->desc[0]; for (uint64_t i = 0; i < obheader->numBundles; ++i, - desc = reinterpret_cast( + desc = reinterpret_cast( reinterpret_cast(&desc->triple[0]) + desc->tripleSize)) { std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1); @@ -1336,7 +786,6 @@ const std::unordered_map& functions() return r; } - void hipLaunchKernelGGLImpl( uintptr_t function_address, const dim3& numBlocks, @@ -1391,8 +840,10 @@ hipError_t ihipLaunchKernel(const void* hostFunction, DevLogPrintfError("Wrong Device Id: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } - hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); - if (func == nullptr) { + + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); + if ((hip_error != hipSuccess) || (func == nullptr)) { #ifdef ATI_OS_LINUX const auto it = hip_impl::functions().find(reinterpret_cast(hostFunction)); if (it == hip_impl::functions().cend()) { @@ -1449,3 +900,167 @@ extern "C" float __gnu_h2f_ieee(unsigned short h){ extern "C" unsigned short __gnu_f2h_ieee(float f){ return (unsigned short)__convert_float_to_half(f); } + +void PlatformState::init() +{ + amd::ScopedLock lock(lock_); + + if(initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + + for (auto& it : statCO_.modules_) { + digestFatBinary(it.first, it.second); + } + + for (auto &it : statCO_.vars_) { + it.second->resize_dVar(g_devices.size()); + } + + for (auto &it : statCO_.functions_) { + it.second->resize_dFunc(g_devices.size()); + } +} + +hipError_t PlatformState::loadModule(hipModule_t *module, const char* fname, const void* image) { + amd::ScopedLock lock(lock_); + + hip::DynCO* dynCo = new hip::DynCO(); + hipError_t hip_error = dynCo->loadCodeObject(fname, image); + if (hip_error != hipSuccess) { + delete dynCo; + return hip_error; + } + + *module = dynCo->module(); + assert(*module != nullptr); + + if (dynCO_map_.find(*module) != dynCO_map_.end()) { + return hipErrorAlreadyMapped; + } + dynCO_map_.insert(std::make_pair(*module, dynCo)); + + return hipSuccess; +} + +hipError_t PlatformState::unloadModule(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + return hipErrorNotFound; + } + + delete it->second; + dynCO_map_.erase(hmod); + + return hipSuccess; +} + +hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, + const char* func_name) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + DevLogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + + return it->second->getDynFunc(hfunc, func_name); +} + +hipError_t PlatformState::getDynGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + DevLogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + + hip::DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar, deviceId)); + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + + return hipSuccess; +} + +hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + DevLogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + + hip::DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar, ihipGetDevice())); + + dvar->shadowVptr = new texture(); + *texRef = reinterpret_cast(dvar->shadowVptr); + return hipSuccess; +} + +hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfoType& programs) { + return statCO_.digestFatBinary(data, programs); +} + +hip::FatBinaryInfoType* PlatformState::addFatBinary(const void* data) { + return statCO_.addFatBinary(data, initialized_); +} + +hipError_t PlatformState::removeFatBinary(hip::FatBinaryInfoType* module) { + return statCO_.removeFatBinary(module); +} + +hipError_t PlatformState::registerStatFunction(const void* hostFunction, hip::Function* func) { + return statCO_.registerStatFunction(hostFunction, func); +} + +hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* var) { + return statCO_.registerStatGlobalVar(hostVar, var); +} + +hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { + return statCO_.getStatFunc(hfunc, hostFunction, deviceId); +} + +hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { + return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId); +} + +hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr); +} + +hipError_t PlatformState::getStatGlobalVarByName(std::string hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + return statCO_.getStatGlobalVarByName(hostVar, deviceId, hmod, dev_ptr, size_ptr); +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); +} + +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} + +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} + diff --git a/rocclr/hip_platform.hpp b/rocclr/hip_platform.hpp old mode 100644 new mode 100755 index fcbfb53bbb..b53a1a750d --- a/rocclr/hip_platform.hpp +++ b/rocclr/hip_platform.hpp @@ -19,11 +19,75 @@ THE SOFTWARE. */ #pragma once +#include "hip_internal.hpp" +#include "hip_fatbin.hpp" #include "device/device.hpp" +#include "hip_code_object.hpp" namespace hip_impl { + hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device, hipFunction_t func, int blockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz); -} +} /* namespace hip_impl*/ + +class PlatformState { + amd::Monitor lock_{"Guards PlatformState globals", true}; + + /* Singleton object */ + static PlatformState* platform_; + PlatformState() {} + ~PlatformState() {} + +public: + void init(); + + //Dynamic Code Objects functions + hipError_t loadModule(hipModule_t* module, const char* fname, const void* image = nullptr); + hipError_t unloadModule(hipModule_t hmod); + + hipError_t getDynFunc(hipFunction_t *hfunc, hipModule_t hmod, const char* func_name); + hipError_t getDynGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + hipError_t getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + + /* Singleton instance */ + static PlatformState& instance() { + if (platform_ == nullptr) { + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); + } + return *platform_; + } + + //Static Code Objects functions + hip::FatBinaryInfoType* addFatBinary(const void* data); + hipError_t removeFatBinary(hip::FatBinaryInfoType* module); + hipError_t digestFatBinary(const void* data, hip::FatBinaryInfoType& programs); + + hipError_t registerStatFunction(const void* hostFunction, hip::Function* func); + hipError_t registerStatGlobalVar(const void* hostVar, hip::Var* var); + + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); + hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + hipError_t getStatGlobalVarByName(std::string hostVar, int deviceId, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, + void** var_addr, size_t* var_size); + + //Exec Functions + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + void popExec(ihipExec_t& exec); + +private: + //Dynamic Code Object map, keyin module to get the corresponding object + std::unordered_map dynCO_map_; + hip::StatCO statCO_; //Static Code object var + bool initialized_{false}; +}; diff --git a/rocclr/hip_texture.cpp b/rocclr/hip_texture.cpp index 9d16e3da01..fced181c5b 100755 --- a/rocclr/hip_texture.cpp +++ b/rocclr/hip_texture.cpp @@ -21,6 +21,7 @@ #include #include #include "hip_internal.hpp" +#include "hip_platform.hpp" #include "hip_conversions.hpp" #include "platform/sampler.hpp" @@ -478,10 +479,10 @@ hipError_t hipBindTexture2D(size_t* offset, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + assert(refDevSize == sizeof(textureReference)); hipError_t err = ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch); if (err != hipSuccess) { @@ -525,10 +526,9 @@ hipError_t hipBindTextureToArray(const textureReference* texref, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + assert(refDevSize == sizeof(textureReference)); hipError_t err = ihipBindTextureToArray(texref, array, desc); if (err != hipSuccess) { @@ -572,10 +572,10 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + assert(refDevSize == sizeof(textureReference)); hipError_t err = ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc); if (err != hipSuccess) { @@ -608,10 +608,8 @@ hipError_t hipBindTexture(size_t* offset, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); assert(refDevSize == sizeof(textureReference)); hipError_t err = ihipBindTexture(offset, texref, devPtr, desc, size); if (err != hipSuccess) { @@ -804,10 +802,8 @@ hipError_t hipTexRefSetArray(textureReference* texRef, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)); assert(refDevSize == sizeof(textureReference)); // Any previous address or HIP array state associated with the texture reference is superseded by this function. @@ -882,10 +878,8 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)); assert(refDevSize == sizeof(textureReference)); // Any previous address or HIP array state associated with the texture reference is superseded by this function. @@ -929,10 +923,8 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)); assert(refDevSize == sizeof(textureReference)); // Any previous address or HIP array state associated with the texture reference is superseded by this function. @@ -1209,10 +1201,8 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, hipDeviceptr_t refDevPtr = nullptr; size_t refDevSize = 0; - if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, - &refDevSize)) { - HIP_RETURN(hipErrorInvalidSymbol); - } + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)); assert(refDevSize == sizeof(textureReference)); // Any previous address or HIP array state associated with the texture reference is superseded by this function. diff --git a/tests/src/runtimeApi/module/hipModule.cpp b/tests/src/runtimeApi/module/hipModule.cpp old mode 100644 new mode 100755 index 30dec3ddd8..9ed5a72415 --- a/tests/src/runtimeApi/module/hipModule.cpp +++ b/tests/src/runtimeApi/module/hipModule.cpp @@ -97,6 +97,7 @@ int main() { assert(A[i] == B[i]); } - HIPCHECK(hipCtxDestroy(context)); + HIPCHECK(hipModuleUnload(Module)); + HIPCHECK(hipCtxDestroy(context)); passed(); }