diff --git a/rocclr/hip_code_object.cpp b/rocclr/hip_code_object.cpp index 249ea6fe10..9881b79c3e 100755 --- a/rocclr/hip_code_object.cpp +++ b/rocclr/hip_code_object.cpp @@ -1,782 +1,782 @@ -/* -Copyright (c) 2015-2020 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "hip_code_object.hpp" -#include "amd_hsa_elf.hpp" - -#include - -#include -#include "hip/hip_runtime_api.h" -#include "hip/hip_runtime.h" -#include "hip_internal.hpp" -#include "platform/program.hpp" -#include - -hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - amd::HostQueue& queue, bool isAsync = false); -namespace { -size_t constexpr strLiteralLength(char const* str) { - return *str ? 1 + strLiteralLength(str + 1) : 0; -} -constexpr char const* CLANG_OFFLOAD_BUNDLER_MAGIC_STR = "__CLANG_OFFLOAD_BUNDLE__"; -constexpr char const* OFFLOAD_KIND_HIP = "hip"; -constexpr char const* OFFLOAD_KIND_HIPV4 = "hipv4"; -constexpr char const* OFFLOAD_KIND_HCC = "hcc"; -constexpr char const* AMDGCN_TARGET_TRIPLE = "amdgcn-amd-amdhsa-"; - -// ClangOFFLOADBundle info. -static constexpr size_t bundle_magic_string_size = - strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC_STR); - -// Clang Offload bundler description & Header. -struct __ClangOffloadBundleInfo { - uint64_t offset; - uint64_t size; - uint64_t bundleEntryIdSize; - const char bundleEntryId[1]; -}; - -struct __ClangOffloadBundleHeader { - const char magic[bundle_magic_string_size - 1]; - uint64_t numOfCodeObjects; - __ClangOffloadBundleInfo desc[1]; -}; -} // namespace - -namespace hip { - -uint64_t CodeObject::ElfSize(const void *emi) { - return amd::Elf::getElfSize(emi); -} - -static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, - bool& sramEccSupported) { - switch (EFlags & EF_AMDGPU_MACH) { - case EF_AMDGPU_MACH_AMDGCN_GFX700: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx700"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX701: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx701"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX702: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx702"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX703: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx703"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX704: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx704"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX705: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx705"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX801: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx801"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX802: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx802"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX803: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx803"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX805: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx805"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX810: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx810"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX900: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx900"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX902: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx902"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX904: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx904"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX906: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx906"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX908: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx908"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX909: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx909"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX90A: - xnackSupported = true; - sramEccSupported = true; - proc_name = "gfx90a"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX90C: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx90c"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1010: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1010"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1011: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1011"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1012: - xnackSupported = true; - sramEccSupported = false; - proc_name = "gfx1012"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1030: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1030"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1031: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1031"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1032: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1032"; - break; - case EF_AMDGPU_MACH_AMDGCN_GFX1033: - xnackSupported = false; - sramEccSupported = false; - proc_name = "gfx1033"; - break; - default: - return false; - } - return true; -} - -static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string& target_id, - unsigned& co_version) { - if (!code_object) return false; - const Elf64_Ehdr* ehdr = reinterpret_cast(code_object); - if (ehdr->e_machine != EM_AMDGPU) return false; - if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false; - - bool isXnackSupported{false}, isSramEccSupported{false}; - - std::string proc_name; - if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false; - target_id = std::string(AMDGCN_TARGET_TRIPLE) + '-' + proc_name; - - switch (ehdr->e_ident[EI_ABIVERSION]) { - case ELFABIVERSION_AMDGPU_HSA_V2: { - co_version = 2; - return false; - } - - case ELFABIVERSION_AMDGPU_HSA_V3: { - co_version = 3; - if (isSramEccSupported) { - if (ehdr->e_flags & EF_AMDGPU_FEATURE_SRAMECC_V3) - target_id += ":sramecc+"; - else - target_id += ":sramecc-"; - } - if (isXnackSupported) { - if (ehdr->e_flags & EF_AMDGPU_FEATURE_XNACK_V3) - target_id += ":xnack+"; - else - target_id += ":xnack-"; - } - break; - } - - case ELFABIVERSION_AMDGPU_HSA_V4: { - co_version = 4; - unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; - if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) - target_id += ":sramecc-"; - else if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_ON_V4) - target_id += ":sramecc+"; - - unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4; - if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4) - target_id += ":xnack-"; - else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4) - target_id += ":xnack+"; - break; - } - - default: { - return false; - } - } - return true; -} - -// Consumes the string 'consume_' from the starting of the given input -// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- -// input will become gfx908. -static bool consume(std::string& input, std::string consume_) { - if (input.substr(0, consume_.size()) != consume_) { - return false; - } - input = input.substr(consume_.size()); - return true; -} - -// Trim String till character, will be used to get gpuname -// example: input is gfx908:sram-ecc+ and trim char is : -// input will become sram-ecc+. -static std::string trimName(std::string& input, char trim) { - auto pos_ = input.find(trim); - auto res = input; - if (pos_ == std::string::npos) { - input = ""; - } else { - res = input.substr(0, pos_); - input = input.substr(pos_); - } - return res; -} - -static char getFeatureValue(std::string& input, std::string feature) { - char res = ' '; - if (consume(input, std::move(feature))) { - res = input[0]; - input = input.substr(1); - } - return res; -} - -static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value, - char& xnack_value) { - processor = trimName(input, ':'); - sramecc_value = getFeatureValue(input, std::string(":sramecc")); - if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false; - xnack_value = getFeatureValue(input, std::string(":xnack")); - if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false; - return true; -} - -static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, - std::string& co_triple_target_id, unsigned& co_version) { - std::string offload_kind = trimName(bundled_co_entry_id, '-'); - if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP && - offload_kind != OFFLOAD_KIND_HCC) - return false; - - if (offload_kind != OFFLOAD_KIND_HIPV4) - return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id, co_version); - - // For code object V4 onwards the bundled code object entry ID correctly - // specifies the target tripple. - co_version = 4; - co_triple_target_id = bundled_co_entry_id.substr(1); - return true; -} - -static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, - std::string agent_triple_target_id) { - // Primitive Check - if (co_triple_target_id == agent_triple_target_id) return true; - - // Parse code object triple target id - if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { - return false; - } - - std::string co_processor; - char co_sram_ecc, co_xnack; - if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { - return false; - } - - if (!co_triple_target_id.empty()) return false; - - // Parse agent isa triple target id - if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { - return false; - } - - std::string agent_isa_processor; - char isa_sram_ecc, isa_xnack; - if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { - return false; - } - - if (!agent_triple_target_id.empty()) return false; - - // Check for compatibility - if (agent_isa_processor != co_processor) return false; - if (co_sram_ecc != ' ') { - if (co_sram_ecc != isa_sram_ecc) return false; - } - if (co_xnack != ' ') { - if (co_xnack != isa_xnack) return false; - } - - return true; -} - -// This will be moved to COMGR eventually -hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, - const void ** image, const std::vector& device_names, - std::vector>& code_objs) { - - hipError_t hip_error = hipSuccess; - - if (fdesc < 0) { - return hipErrorFileNotFound; - } - - // Map the file to memory, with offset 0. - //file will be unmapped in ModuleUnload - //const void* image = nullptr; - if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) { - return hipErrorInvalidValue; - } - - // retrieve code_objs{binary_image, binary_size} for devices - hip_error = extractCodeObjectFromFatBinary(*image, device_names, code_objs); - - return hip_error; -} - -// This will be moved to COMGR eventually -hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data, - const std::vector& device_names, - std::vector>& code_objs, - std::string& uri) { - - // Get the URI from memory - if (!amd::Os::GetURIFromMemory(data, 0, uri)) { - return hipErrorInvalidValue; - } - - return extractCodeObjectFromFatBinary(data, device_names, code_objs); -} - -// This will be moved to COMGR eventually -hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data, - const std::vector& agent_triple_target_ids, - std::vector>& code_objs) { - std::string magic((const char*)data, bundle_magic_string_size); - if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { - return hipErrorInvalidKernelFile; - } - - // Initialize Code objects - code_objs.reserve(agent_triple_target_ids.size()); - for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { - code_objs.push_back(std::make_pair(nullptr, 0)); - } - - const auto obheader = reinterpret_cast(data); - const auto* desc = &obheader->desc[0]; - size_t num_code_objs = code_objs.size(); - for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, - desc = reinterpret_cast( - reinterpret_cast(&desc->bundleEntryId[0]) + - desc->bundleEntryIdSize)) { - const void* image = - reinterpret_cast(reinterpret_cast(obheader) + desc->offset); - const size_t image_size = desc->size; - - if (num_code_objs == 0) break; - std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; - - unsigned co_version = 0; - std::string co_triple_target_id; - if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id, co_version)) continue; - - for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { - if (code_objs[dev].first) continue; - if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { - code_objs[dev] = std::make_pair(image, image_size); - --num_code_objs; - } - } - } - if (num_code_objs == 0) { - return hipSuccess; - } else { - LogPrintfError("%s", - "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); - LogPrintfError("%s", " Devices:"); - for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { - LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(), - ((code_objs[i].first) ? "Found" : "Not Found")); - } - const auto obheader = reinterpret_cast(data); - const auto* desc = &obheader->desc[0]; - LogPrintfError("%s", " Bundled Code Objects:"); - for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, - desc = reinterpret_cast( - reinterpret_cast(&desc->bundleEntryId[0]) + - desc->bundleEntryIdSize)) { - std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; - const void* image = - reinterpret_cast(reinterpret_cast(obheader) + desc->offset); - - unsigned co_version = 0; - std::string co_triple_target_id; - bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id, co_version); - - if (valid_co) { - LogPrintfError(" %s - [code object v%u is %s]", bundleEntryId.c_str(), co_version, - co_triple_target_id.c_str()); - } else { - LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str()); - } - } - - guarantee(false, "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); - return hipErrorNoBinaryForGpu; - } -} - -hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { - - amd::ScopedLock lock(dclock_); - - // Number of devices = 1 in dynamic code object - fb_info_ = new FatBinaryInfo(fname, image); - std::vector devices = { g_devices[ihipGetDevice()] }; - IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices)); - - // No Lazy loading for DynCO - IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice())); - - // Define Global variables - IHIP_RETURN_ONFAIL(populateDynGlobalVars()); - - // Define Global functions - IHIP_RETURN_ONFAIL(populateDynGlobalFuncs()); - - return hipSuccess; -} - -//Dynamic Code Object -DynCO::~DynCO() { - amd::ScopedLock lock(dclock_); - - for (auto& elem : vars_) { - delete elem.second; - } - vars_.clear(); - - for (auto& elem : functions_) { - delete elem.second; - } - functions_.clear(); - - delete fb_info_; -} - -hipError_t DynCO::getDeviceVar(DeviceVar** dvar, std::string var_name) { - amd::ScopedLock lock(dclock_); - - CheckDeviceIdMatch(); - - auto it = vars_.find(var_name); - if (it == vars_.end()) { - LogPrintfError("Cannot find the Var: %s ", var_name.c_str()); - return hipErrorNotFound; - } - - it->second->getDeviceVar(dvar, device_id_, module()); - return hipSuccess; -} - -hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) { - amd::ScopedLock lock(dclock_); - - CheckDeviceIdMatch(); - - if(hfunc == nullptr) { - return hipErrorInvalidValue; - } - - auto it = functions_.find(func_name); - if (it == functions_.end()) { - LogPrintfError("Cannot find the function: %s ", func_name.c_str()); - return hipErrorNotFound; - } - - /* See if this could be solved */ - return it->second->getDynFunc(hfunc, module()); -} - -hipError_t DynCO::populateDynGlobalVars() { - amd::ScopedLock lock(dclock_); - - std::vector var_names; - std::vector undef_var_names; - - //For Dynamic Modules there is only one hipFatBinaryDevInfo_ - device::Program* dev_program - = fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram - (*hip::getCurrentDevice()->devices()[0]); - - if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { - LogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", module()); - return hipErrorSharedObjectSymbolNotFound; - } - - for (auto& elem : var_names) { - vars_.insert(std::make_pair(elem, new Var(elem, Var::DeviceVarKind::DVK_Variable, 0, 0, 0, nullptr))); - } - - return hipSuccess; -} - -hipError_t DynCO::populateDynGlobalFuncs() { - amd::ScopedLock lock(dclock_); - - std::vector func_names; - device::Program* dev_program - = fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram( - *hip::getCurrentDevice()->devices()[0]); - - // Get all the global func names from COMGR - if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { - LogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", module()); - return hipErrorSharedObjectSymbolNotFound; - } - - for (auto& elem : func_names) { - functions_.insert(std::make_pair(elem, new Function(elem))); - } - - return hipSuccess; -} - -//Static Code Object -StatCO::StatCO() { -} - -StatCO::~StatCO() { - amd::ScopedLock lock(sclock_); - - for (auto& elem : functions_) { - delete elem.second; - } - functions_.clear(); - - for (auto& elem : vars_) { - delete elem.second; - } - vars_.clear(); -} - -hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) { - amd::ScopedLock lock(sclock_); - - if (programs != nullptr) { - return hipSuccess; - } - - // Create a new fat binary object and extract the fat binary for all devices. - programs = new FatBinaryInfo(nullptr, data); - IHIP_RETURN_ONFAIL(programs->ExtractFatBinary(g_devices)); - - return hipSuccess; -} - -FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) { - amd::ScopedLock lock(sclock_); - - if (initialized) { - digestFatBinary(data, modules_[data]); - } - return &modules_[data]; -} - -hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { - amd::ScopedLock lock(sclock_); - - auto vit = vars_.begin(); - while (vit != vars_.end()) { - if (vit->second->moduleInfo() == module) { - delete vit->second; - vit = vars_.erase(vit); - } else { - ++vit; - } - } - - auto it = managedVars_.begin(); - while (it != managedVars_.end()) { - if ((*it)->moduleInfo() == module) { - delete *it; - managedVars_.erase(it); - } else { - ++it; - } - } - - auto fit = functions_.begin(); - while (fit != functions_.end()) { - if (fit->second->moduleInfo() == module) { - delete fit->second; - fit = functions_.erase(fit); - } else { - ++fit; - } - } - - auto mit = modules_.begin(); - while (mit != modules_.end()) { - if (&mit->second == module) { - delete mit->second; - mit = modules_.erase(mit); - } else { - ++mit; - } - } - - return hipSuccess; -} - -hipError_t StatCO::registerStatFunction(const void* hostFunction, Function* func) { - amd::ScopedLock lock(sclock_); - - if (functions_.find(hostFunction) != functions_.end()) { - DevLogPrintfError("hostFunctionPtr: 0x%x already exists", hostFunction); - } - functions_.insert(std::make_pair(hostFunction, func)); - - return hipSuccess; -} - -hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { - amd::ScopedLock lock(sclock_); - - const auto it = functions_.find(hostFunction); - if (it == functions_.end()) { - return hipErrorInvalidSymbol; - } - - return it->second->getStatFunc(hfunc, deviceId); -} - -hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { - amd::ScopedLock lock(sclock_); - - const auto it = functions_.find(hostFunction); - if (it == functions_.end()) { - return hipErrorInvalidSymbol; - } - - return it->second->getStatFuncAttr(func_attr, deviceId); -} - -hipError_t StatCO::registerStatGlobalVar(const void* hostVar, Var* var) { - amd::ScopedLock lock(sclock_); - - if (vars_.find(hostVar) != vars_.end()) { - return hipErrorInvalidSymbol; - } - - vars_.insert(std::make_pair(hostVar, var)); - return hipSuccess; -} - -hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, - size_t* size_ptr) { - amd::ScopedLock lock(sclock_); - - const auto it = vars_.find(hostVar); - if (it == vars_.end()) { - return hipErrorInvalidSymbol; - } - - DeviceVar* dvar = nullptr; - IHIP_RETURN_ONFAIL(it->second->getStatDeviceVar(&dvar, deviceId)); - - *dev_ptr = dvar->device_ptr(); - *size_ptr = dvar->size(); - return hipSuccess; -} - -hipError_t StatCO::registerStatManagedVar(Var* var) { - managedVars_.emplace_back(var); - return hipSuccess; -} - -hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { - amd::ScopedLock lock(sclock_); - - if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() || - !managedVarsDevicePtrInitalized_[deviceId]) { - for (auto var : managedVars_) { - DeviceVar* dvar = nullptr; - IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); - - amd::HostQueue* queue = hip::getNullStream(); - if(queue != nullptr) { - ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), - dvar->size(), hipMemcpyHostToDevice, *queue); - } else { - ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); - return hipErrorInvalidResourceHandle; - } - } - managedVarsDevicePtrInitalized_[deviceId] = true; - } - return hipSuccess; -} -}; //namespace: hip +/* +Copyright (c) 2015-2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip_code_object.hpp" +#include "amd_hsa_elf.hpp" + +#include + +#include +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include + +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false); +namespace { +size_t constexpr strLiteralLength(char const* str) { + return *str ? 1 + strLiteralLength(str + 1) : 0; +} +constexpr char const* CLANG_OFFLOAD_BUNDLER_MAGIC_STR = "__CLANG_OFFLOAD_BUNDLE__"; +constexpr char const* OFFLOAD_KIND_HIP = "hip"; +constexpr char const* OFFLOAD_KIND_HIPV4 = "hipv4"; +constexpr char const* OFFLOAD_KIND_HCC = "hcc"; +constexpr char const* AMDGCN_TARGET_TRIPLE = "amdgcn-amd-amdhsa-"; + +// ClangOFFLOADBundle info. +static constexpr size_t bundle_magic_string_size = + strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC_STR); + +// Clang Offload bundler description & Header. +struct __ClangOffloadBundleInfo { + uint64_t offset; + uint64_t size; + uint64_t bundleEntryIdSize; + const char bundleEntryId[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[bundle_magic_string_size - 1]; + uint64_t numOfCodeObjects; + __ClangOffloadBundleInfo desc[1]; +}; +} // namespace + +namespace hip { + +uint64_t CodeObject::ElfSize(const void *emi) { + return amd::Elf::getElfSize(emi); +} + +static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, + bool& sramEccSupported) { + switch (EFlags & EF_AMDGPU_MACH) { + case EF_AMDGPU_MACH_AMDGCN_GFX700: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx700"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX701: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx701"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX702: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx702"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX703: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx703"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX704: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx704"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX705: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx705"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX801: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx801"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX802: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx802"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX803: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx803"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX805: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx805"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX810: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx810"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX900: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx900"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX902: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx902"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX904: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx904"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX906: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx906"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX908: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx908"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX909: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx909"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90A: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx90a"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90C: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx90c"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1010: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1010"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1011: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1011"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1012: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1012"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1030: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1030"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1031: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1031"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1032: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1032"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1033: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1033"; + break; + default: + return false; + } + return true; +} + +static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string& target_id, + unsigned& co_version) { + if (!code_object) return false; + const Elf64_Ehdr* ehdr = reinterpret_cast(code_object); + if (ehdr->e_machine != EM_AMDGPU) return false; + if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false; + + bool isXnackSupported{false}, isSramEccSupported{false}; + + std::string proc_name; + if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false; + target_id = std::string(AMDGCN_TARGET_TRIPLE) + '-' + proc_name; + + switch (ehdr->e_ident[EI_ABIVERSION]) { + case ELFABIVERSION_AMDGPU_HSA_V2: { + co_version = 2; + return false; + } + + case ELFABIVERSION_AMDGPU_HSA_V3: { + co_version = 3; + if (isSramEccSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_SRAMECC_V3) + target_id += ":sramecc+"; + else + target_id += ":sramecc-"; + } + if (isXnackSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_XNACK_V3) + target_id += ":xnack+"; + else + target_id += ":xnack-"; + } + break; + } + + case ELFABIVERSION_AMDGPU_HSA_V4: { + co_version = 4; + unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; + if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) + target_id += ":sramecc-"; + else if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_ON_V4) + target_id += ":sramecc+"; + + unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4; + if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4) + target_id += ":xnack-"; + else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4) + target_id += ":xnack+"; + break; + } + + default: { + return false; + } + } + return true; +} + +// Consumes the string 'consume_' from the starting of the given input +// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- +// input will become gfx908. +static bool consume(std::string& input, std::string consume_) { + if (input.substr(0, consume_.size()) != consume_) { + return false; + } + input = input.substr(consume_.size()); + return true; +} + +// Trim String till character, will be used to get gpuname +// example: input is gfx908:sram-ecc+ and trim char is : +// input will become sram-ecc+. +static std::string trimName(std::string& input, char trim) { + auto pos_ = input.find(trim); + auto res = input; + if (pos_ == std::string::npos) { + input = ""; + } else { + res = input.substr(0, pos_); + input = input.substr(pos_); + } + return res; +} + +static char getFeatureValue(std::string& input, std::string feature) { + char res = ' '; + if (consume(input, std::move(feature))) { + res = input[0]; + input = input.substr(1); + } + return res; +} + +static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value, + char& xnack_value) { + processor = trimName(input, ':'); + sramecc_value = getFeatureValue(input, std::string(":sramecc")); + if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false; + xnack_value = getFeatureValue(input, std::string(":xnack")); + if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false; + return true; +} + +static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, + std::string& co_triple_target_id, unsigned& co_version) { + std::string offload_kind = trimName(bundled_co_entry_id, '-'); + if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP && + offload_kind != OFFLOAD_KIND_HCC) + return false; + + if (offload_kind != OFFLOAD_KIND_HIPV4) + return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id, co_version); + + // For code object V4 onwards the bundled code object entry ID correctly + // specifies the target tripple. + co_version = 4; + co_triple_target_id = bundled_co_entry_id.substr(1); + return true; +} + +static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, + std::string agent_triple_target_id) { + // Primitive Check + if (co_triple_target_id == agent_triple_target_id) return true; + + // Parse code object triple target id + if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { + return false; + } + + std::string co_processor; + char co_sram_ecc, co_xnack; + if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { + return false; + } + + if (!co_triple_target_id.empty()) return false; + + // Parse agent isa triple target id + if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { + return false; + } + + std::string agent_isa_processor; + char isa_sram_ecc, isa_xnack; + if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { + return false; + } + + if (!agent_triple_target_id.empty()) return false; + + // Check for compatibility + if (agent_isa_processor != co_processor) return false; + if (co_sram_ecc != ' ') { + if (co_sram_ecc != isa_sram_ecc) return false; + } + if (co_xnack != ' ') { + if (co_xnack != isa_xnack) return false; + } + + return true; +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, + const void ** image, const std::vector& device_names, + std::vector>& code_objs) { + + hipError_t hip_error = hipSuccess; + + if (fdesc < 0) { + return hipErrorFileNotFound; + } + + // Map the file to memory, with offset 0. + //file will be unmapped in ModuleUnload + //const void* image = nullptr; + if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) { + return hipErrorInvalidValue; + } + + // retrieve code_objs{binary_image, binary_size} for devices + hip_error = extractCodeObjectFromFatBinary(*image, device_names, code_objs); + + return hip_error; +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data, + const std::vector& device_names, + std::vector>& code_objs, + std::string& uri) { + + // Get the URI from memory + if (!amd::Os::GetURIFromMemory(data, 0, uri)) { + return hipErrorInvalidValue; + } + + return extractCodeObjectFromFatBinary(data, device_names, code_objs); +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data, + const std::vector& agent_triple_target_ids, + std::vector>& code_objs) { + std::string magic((const char*)data, bundle_magic_string_size); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return hipErrorInvalidKernelFile; + } + + // Initialize Code objects + code_objs.reserve(agent_triple_target_ids.size()); + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + code_objs.push_back(std::make_pair(nullptr, 0)); + } + + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + size_t num_code_objs = code_objs.size(); + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + const size_t image_size = desc->size; + + if (num_code_objs == 0) break; + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + + unsigned co_version = 0; + std::string co_triple_target_id; + if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id, co_version)) continue; + + for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { + if (code_objs[dev].first) continue; + if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { + code_objs[dev] = std::make_pair(image, image_size); + --num_code_objs; + } + } + } + if (num_code_objs == 0) { + return hipSuccess; + } else { + LogPrintfError("%s", + "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); + LogPrintfError("%s", " Devices:"); + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(), + ((code_objs[i].first) ? "Found" : "Not Found")); + } + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + LogPrintfError("%s", " Bundled Code Objects:"); + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + + unsigned co_version = 0; + std::string co_triple_target_id; + bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id, co_version); + + if (valid_co) { + LogPrintfError(" %s - [code object v%u is %s]", bundleEntryId.c_str(), co_version, + co_triple_target_id.c_str()); + } else { + LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str()); + } + } + + guarantee(false, "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); + return hipErrorNoBinaryForGpu; + } +} + +hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { + + amd::ScopedLock lock(dclock_); + + // Number of devices = 1 in dynamic code object + fb_info_ = new FatBinaryInfo(fname, image); + std::vector devices = { g_devices[ihipGetDevice()] }; + IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices)); + + // No Lazy loading for DynCO + IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice())); + + // Define Global variables + IHIP_RETURN_ONFAIL(populateDynGlobalVars()); + + // Define Global functions + IHIP_RETURN_ONFAIL(populateDynGlobalFuncs()); + + return hipSuccess; +} + +//Dynamic Code Object +DynCO::~DynCO() { + amd::ScopedLock lock(dclock_); + + for (auto& elem : vars_) { + delete elem.second; + } + vars_.clear(); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); + + delete fb_info_; +} + +hipError_t DynCO::getDeviceVar(DeviceVar** dvar, std::string var_name) { + amd::ScopedLock lock(dclock_); + + CheckDeviceIdMatch(); + + auto it = vars_.find(var_name); + if (it == vars_.end()) { + LogPrintfError("Cannot find the Var: %s ", var_name.c_str()); + return hipErrorNotFound; + } + + it->second->getDeviceVar(dvar, device_id_, module()); + return hipSuccess; +} + +hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) { + amd::ScopedLock lock(dclock_); + + CheckDeviceIdMatch(); + + if(hfunc == nullptr) { + return hipErrorInvalidValue; + } + + auto it = functions_.find(func_name); + if (it == functions_.end()) { + LogPrintfError("Cannot find the function: %s ", func_name.c_str()); + return hipErrorNotFound; + } + + /* See if this could be solved */ + return it->second->getDynFunc(hfunc, module()); +} + +hipError_t DynCO::populateDynGlobalVars() { + amd::ScopedLock lock(dclock_); + + std::vector var_names; + std::vector undef_var_names; + + //For Dynamic Modules there is only one hipFatBinaryDevInfo_ + device::Program* dev_program + = fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram + (*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + LogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : var_names) { + vars_.insert(std::make_pair(elem, new Var(elem, Var::DeviceVarKind::DVK_Variable, 0, 0, 0, nullptr))); + } + + return hipSuccess; +} + +hipError_t DynCO::populateDynGlobalFuncs() { + amd::ScopedLock lock(dclock_); + + std::vector func_names; + device::Program* dev_program + = fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram( + *hip::getCurrentDevice()->devices()[0]); + + // Get all the global func names from COMGR + if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + LogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : func_names) { + functions_.insert(std::make_pair(elem, new Function(elem))); + } + + return hipSuccess; +} + +//Static Code Object +StatCO::StatCO() { +} + +StatCO::~StatCO() { + amd::ScopedLock lock(sclock_); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); + + for (auto& elem : vars_) { + delete elem.second; + } + vars_.clear(); +} + +hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) { + amd::ScopedLock lock(sclock_); + + if (programs != nullptr) { + return hipSuccess; + } + + // Create a new fat binary object and extract the fat binary for all devices. + programs = new FatBinaryInfo(nullptr, data); + IHIP_RETURN_ONFAIL(programs->ExtractFatBinary(g_devices)); + + return hipSuccess; +} + +FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) { + amd::ScopedLock lock(sclock_); + + if (initialized) { + digestFatBinary(data, modules_[data]); + } + return &modules_[data]; +} + +hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { + amd::ScopedLock lock(sclock_); + + auto vit = vars_.begin(); + while (vit != vars_.end()) { + if (vit->second->moduleInfo() == module) { + delete vit->second; + vit = vars_.erase(vit); + } else { + ++vit; + } + } + + auto it = managedVars_.begin(); + while (it != managedVars_.end()) { + if ((*it)->moduleInfo() == module) { + delete *it; + managedVars_.erase(it); + } else { + ++it; + } + } + + auto fit = functions_.begin(); + while (fit != functions_.end()) { + if (fit->second->moduleInfo() == module) { + delete fit->second; + fit = functions_.erase(fit); + } else { + ++fit; + } + } + + auto mit = modules_.begin(); + while (mit != modules_.end()) { + if (&mit->second == module) { + delete mit->second; + mit = modules_.erase(mit); + } else { + ++mit; + } + } + + return hipSuccess; +} + +hipError_t StatCO::registerStatFunction(const void* hostFunction, Function* func) { + amd::ScopedLock lock(sclock_); + + if (functions_.find(hostFunction) != functions_.end()) { + DevLogPrintfError("hostFunctionPtr: 0x%x already exists", hostFunction); + } + functions_.insert(std::make_pair(hostFunction, func)); + + return hipSuccess; +} + +hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFunc(hfunc, deviceId); +} + +hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFuncAttr(func_attr, deviceId); +} + +hipError_t StatCO::registerStatGlobalVar(const void* hostVar, Var* var) { + amd::ScopedLock lock(sclock_); + + if (vars_.find(hostVar) != vars_.end()) { + return hipErrorInvalidSymbol; + } + + vars_.insert(std::make_pair(hostVar, var)); + return hipSuccess; +} + +hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + amd::ScopedLock lock(sclock_); + + const auto it = vars_.find(hostVar); + if (it == vars_.end()) { + return hipErrorInvalidSymbol; + } + + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getStatDeviceVar(&dvar, deviceId)); + + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + return hipSuccess; +} + +hipError_t StatCO::registerStatManagedVar(Var* var) { + managedVars_.emplace_back(var); + return hipSuccess; +} + +hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { + amd::ScopedLock lock(sclock_); + + if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() || + !managedVarsDevicePtrInitalized_[deviceId]) { + for (auto var : managedVars_) { + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); + + amd::HostQueue* queue = hip::getNullStream(); + if(queue != nullptr) { + ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), + dvar->size(), hipMemcpyHostToDevice, *queue); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + return hipErrorInvalidResourceHandle; + } + } + managedVarsDevicePtrInitalized_[deviceId] = true; + } + return hipSuccess; +} +}; //namespace: hip diff --git a/rocclr/hip_code_object.hpp b/rocclr/hip_code_object.hpp index c580a8ac6a..6e406ad8da 100755 --- a/rocclr/hip_code_object.hpp +++ b/rocclr/hip_code_object.hpp @@ -1,156 +1,156 @@ -/* -Copyright (c) 2015-2020 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_CODE_OBJECT_HPP -#define HIP_CODE_OBJECT_HPP - -#include "hip_global.hpp" - -#include -#include - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hip_internal.hpp" -#include "device/device.hpp" -#include "platform/program.hpp" - -//Forward Declaration for friend usage -class PlatformState; - -namespace hip { - -//Code Object base class -class CodeObject { - public: - virtual ~CodeObject() {} - - // Functions to add_dev_prog and build - static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, - size_t binary_size); - static hipError_t build_module(hipModule_t hmod, const std::vector& devices); - - // Given an file desc and file size, extracts to code object for corresponding devices, - // return code_objs{binary_ptr, binary_size}, which could be used to determine foffset - static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, - const void ** image, const std::vector& device_names, - std::vector>& code_objs); - - // Given an ptr to memory, extracts to code object for corresponding devices, - // returns code_objs{binary_ptr, binary_size} and uniform resource indicator - static hipError_t ExtractCodeObjectFromMemory(const void* data, - const std::vector& device_names, - std::vector>& code_objs, - std::string& uri); - - static uint64_t ElfSize(const void* emi); - -protected: - //Given an ptr to image or file, extracts to code object - //for corresponding devices - static hipError_t extractCodeObjectFromFatBinary(const void*, - const std::vector&, - std::vector>&); - - CodeObject() {} -private: - friend const std::vector& modules(); -}; - -//Dynamic Code Object -class DynCO : public CodeObject { - amd::Monitor dclock_{"Guards Dynamic Code object", true}; - -public: - DynCO() : device_id_(ihipGetDevice()) {} - virtual ~DynCO(); - - //LoadsCodeObject and its data - hipError_t loadCodeObject(const char* fname, const void* image=nullptr); - hipModule_t module() { return fb_info_->Module(ihipGetDevice()); }; - - //Gets GlobalVar/Functions from a dynamically loaded code object - hipError_t getDynFunc(hipFunction_t* hfunc, std::string func_name); - hipError_t getDeviceVar(DeviceVar** dvar, std::string var_name); - - // Device ID Check to check if module is launched in the same device it was loaded. - inline void CheckDeviceIdMatch() { - if (device_id_ != ihipGetDevice()) { - guarantee(false, "Device mismatch from where this module is loaded"); - } - } - -private: - int device_id_; - FatBinaryInfo* fb_info_; - - //Maps for vars/funcs, could be keyed in with std::string name - std::unordered_map functions_; - std::unordered_map vars_; - - //Populate Global Vars/Funcs from an code object(@ module_load) - hipError_t populateDynGlobalFuncs(); - hipError_t populateDynGlobalVars(); -}; - -//Static Code Object -class StatCO: public CodeObject { - amd::Monitor sclock_{"Guards Static Code object", true}; -public: - StatCO(); - virtual ~StatCO(); - - //Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary" - FatBinaryInfo** addFatBinary(const void* data, bool initialized); - hipError_t removeFatBinary(FatBinaryInfo** module); - hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs); - - //Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar] - hipError_t registerStatFunction(const void* hostFunction, Function* func); - hipError_t registerStatGlobalVar(const void* hostVar, Var* var); - hipError_t registerStatManagedVar(Var *var); - - //Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise. - hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); - hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); - hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, - size_t* size_ptr); - - //Managed variable is a defined symbol in code object - //pointer to the alocated managed memory has to be copied to the address of symbol - hipError_t initStatManagedVarDevicePtr(int deviceId); -private: - friend class ::PlatformState; - //Populated during __hipRegisterFatBinary - std::unordered_map modules_; - //Populated during __hipRegisterFuncs - std::unordered_map functions_; - //Populated during __hipRegisterVars - std::unordered_map vars_; - //Populated during __hipRegisterManagedVar - std::vector managedVars_; - std::unordered_map managedVarsDevicePtrInitalized_; -}; - -}; // namespace hip - -#endif /* HIP_CODE_OBJECT_HPP */ +/* +Copyright (c) 2015-2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_CODE_OBJECT_HPP +#define HIP_CODE_OBJECT_HPP + +#include "hip_global.hpp" + +#include +#include + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip_internal.hpp" +#include "device/device.hpp" +#include "platform/program.hpp" + +//Forward Declaration for friend usage +class PlatformState; + +namespace hip { + +//Code Object base class +class CodeObject { + public: + virtual ~CodeObject() {} + + // Functions to add_dev_prog and build + static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, + size_t binary_size); + static hipError_t build_module(hipModule_t hmod, const std::vector& devices); + + // Given an file desc and file size, extracts to code object for corresponding devices, + // return code_objs{binary_ptr, binary_size}, which could be used to determine foffset + static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, + const void ** image, const std::vector& device_names, + std::vector>& code_objs); + + // Given an ptr to memory, extracts to code object for corresponding devices, + // returns code_objs{binary_ptr, binary_size} and uniform resource indicator + static hipError_t ExtractCodeObjectFromMemory(const void* data, + const std::vector& device_names, + std::vector>& code_objs, + std::string& uri); + + static uint64_t ElfSize(const void* emi); + +protected: + //Given an ptr to image or file, extracts to code object + //for corresponding devices + static hipError_t extractCodeObjectFromFatBinary(const void*, + const std::vector&, + std::vector>&); + + CodeObject() {} +private: + friend const std::vector& modules(); +}; + +//Dynamic Code Object +class DynCO : public CodeObject { + amd::Monitor dclock_{"Guards Dynamic Code object", true}; + +public: + DynCO() : device_id_(ihipGetDevice()) {} + virtual ~DynCO(); + + //LoadsCodeObject and its data + hipError_t loadCodeObject(const char* fname, const void* image=nullptr); + hipModule_t module() { return fb_info_->Module(ihipGetDevice()); }; + + //Gets GlobalVar/Functions from a dynamically loaded code object + hipError_t getDynFunc(hipFunction_t* hfunc, std::string func_name); + hipError_t getDeviceVar(DeviceVar** dvar, std::string var_name); + + // Device ID Check to check if module is launched in the same device it was loaded. + inline void CheckDeviceIdMatch() { + if (device_id_ != ihipGetDevice()) { + guarantee(false, "Device mismatch from where this module is loaded"); + } + } + +private: + int device_id_; + FatBinaryInfo* fb_info_; + + //Maps for vars/funcs, could be keyed in with std::string name + std::unordered_map functions_; + std::unordered_map vars_; + + //Populate Global Vars/Funcs from an code object(@ module_load) + hipError_t populateDynGlobalFuncs(); + hipError_t populateDynGlobalVars(); +}; + +//Static Code Object +class StatCO: public CodeObject { + amd::Monitor sclock_{"Guards Static Code object", true}; +public: + StatCO(); + virtual ~StatCO(); + + //Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary" + FatBinaryInfo** addFatBinary(const void* data, bool initialized); + hipError_t removeFatBinary(FatBinaryInfo** module); + hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs); + + //Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar] + hipError_t registerStatFunction(const void* hostFunction, Function* func); + hipError_t registerStatGlobalVar(const void* hostVar, Var* var); + hipError_t registerStatManagedVar(Var *var); + + //Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise. + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); + hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + + //Managed variable is a defined symbol in code object + //pointer to the alocated managed memory has to be copied to the address of symbol + hipError_t initStatManagedVarDevicePtr(int deviceId); +private: + friend class ::PlatformState; + //Populated during __hipRegisterFatBinary + std::unordered_map modules_; + //Populated during __hipRegisterFuncs + std::unordered_map functions_; + //Populated during __hipRegisterVars + std::unordered_map vars_; + //Populated during __hipRegisterManagedVar + std::vector managedVars_; + std::unordered_map managedVarsDevicePtrInitalized_; +}; + +}; // namespace hip + +#endif /* HIP_CODE_OBJECT_HPP */ diff --git a/rocclr/hip_fatbin.cpp b/rocclr/hip_fatbin.cpp index c36e0237e7..04ae382bdf 100755 --- a/rocclr/hip_fatbin.cpp +++ b/rocclr/hip_fatbin.cpp @@ -1,158 +1,158 @@ -#include "hip_fatbin.hpp" - -#include "hip_code_object.hpp" - -namespace hip { - -FatBinaryDeviceInfo::~FatBinaryDeviceInfo() { - if (program_ != nullptr) { - program_->release(); - program_ = nullptr; - } -} - -FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) - : fdesc_(amd::Os::FDescInit()), fsize_(0), image_(image), uri_(std::string()) { - - if (fname != nullptr) { - fname_ = std::string(fname); - } else { - fname_ = std::string(); - } - - fatbin_dev_info_.resize(g_devices.size()); -} - -FatBinaryInfo::~FatBinaryInfo() { - - for (auto& fbd: fatbin_dev_info_) { - delete fbd; - } - - if (fdesc_ > 0) { - if (fsize_ && !amd::Os::MemoryUnmapFile(image_, fsize_)) { - guarantee(false, "Cannot unmap file"); - } - if (!amd::Os::CloseFileHandle(fdesc_)) { - guarantee(false, "Cannot close file"); - } - } - - fname_ = std::string(); - fdesc_ = amd::Os::FDescInit(); - fsize_ = 0; - image_ = nullptr; - uri_ = std::string(); -} - -hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devices) { - hipError_t hip_error = hipSuccess; - std::vector> code_objs; - - // Copy device names for Extract Code object File - std::vector device_names; - device_names.reserve(devices.size()); - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); - } - - // We are given file name, get the file desc and file size - if (fname_.size() > 0) { - // Get File Handle & size of the file. - if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) { - return hipErrorFileNotFound; - } - if (fsize_ == 0) { - return hipErrorInvalidKernelFile; - } - - // Extract the code object from file - hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_, - device_names, code_objs); - - } else if (image_ != nullptr) { - // We are directly given image pointer directly, try to extract file desc & file Size - hip_error = CodeObject::ExtractCodeObjectFromMemory(image_, - device_names, code_objs, uri_); - } else { - return hipErrorInvalidValue; - } - - if (hip_error == hipErrorNoBinaryForGpu) { - guarantee(false, "hipErrorNoBinaryForGpu: Couldn't find binary for current devices!"); - return hip_error; - } - - if (hip_error == hipErrorInvalidKernelFile) { - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - // the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed - fatbin_dev_info_[devices[dev_idx]->deviceId()] - = new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); - } - } else if(hip_error == hipSuccess) { - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - // Calculate the offset wrt binary_image and the original image - size_t offset_l - = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) - - reinterpret_cast
(const_cast(image_))); - - fatbin_dev_info_[devices[dev_idx]->deviceId()] - = new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l); - } - } - - for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { - fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ - = new amd::Program(*devices[dev_idx]->asContext()); - if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { - return hipErrorOutOfMemory; - } - } - - return hipSuccess; -} - -hipError_t FatBinaryInfo::AddDevProgram(const int device_id) { - // Device Id bounds Check - DeviceIdCheck(device_id); - - FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; - // If fat binary was already added, skip this step and return success - if (fbd_info->add_dev_prog_ == false) { - amd::Context* ctx = g_devices[device_id]->asContext(); - if (CL_SUCCESS != fbd_info->program_->addDeviceProgram(*ctx->devices()[0], - fbd_info->binary_image_, - fbd_info->binary_size_, false, - nullptr, nullptr, fdesc_, - fbd_info->binary_offset_, uri_)) { - return hipErrorInvalidKernelFile; - } - fbd_info->add_dev_prog_ = true; - } - return hipSuccess; -} - -hipError_t FatBinaryInfo::BuildProgram(const int device_id) { - - // Device Id Check and Add DeviceProgram if not added so far - DeviceIdCheck(device_id); - IHIP_RETURN_ONFAIL(AddDevProgram(device_id)); - - // If Program was already built skip this step and return success - FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; - if (fbd_info->prog_built_ == false) { - if(CL_SUCCESS != fbd_info->program_->build(g_devices[device_id]->devices(), - nullptr, nullptr, nullptr, - kOptionChangeable, kNewDevProg)) { - return hipErrorSharedObjectInitFailed; - } - fbd_info->prog_built_ = true; - } - - if (!fbd_info->program_->load()) { - return hipErrorSharedObjectInitFailed; - } - return hipSuccess; -} - -} //namespace : hip +#include "hip_fatbin.hpp" + +#include "hip_code_object.hpp" + +namespace hip { + +FatBinaryDeviceInfo::~FatBinaryDeviceInfo() { + if (program_ != nullptr) { + program_->release(); + program_ = nullptr; + } +} + +FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) + : fdesc_(amd::Os::FDescInit()), fsize_(0), image_(image), uri_(std::string()) { + + if (fname != nullptr) { + fname_ = std::string(fname); + } else { + fname_ = std::string(); + } + + fatbin_dev_info_.resize(g_devices.size()); +} + +FatBinaryInfo::~FatBinaryInfo() { + + for (auto& fbd: fatbin_dev_info_) { + delete fbd; + } + + if (fdesc_ > 0) { + if (fsize_ && !amd::Os::MemoryUnmapFile(image_, fsize_)) { + guarantee(false, "Cannot unmap file"); + } + if (!amd::Os::CloseFileHandle(fdesc_)) { + guarantee(false, "Cannot close file"); + } + } + + fname_ = std::string(); + fdesc_ = amd::Os::FDescInit(); + fsize_ = 0; + image_ = nullptr; + uri_ = std::string(); +} + +hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devices) { + hipError_t hip_error = hipSuccess; + std::vector> code_objs; + + // Copy device names for Extract Code object File + std::vector device_names; + device_names.reserve(devices.size()); + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); + } + + // We are given file name, get the file desc and file size + if (fname_.size() > 0) { + // Get File Handle & size of the file. + if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) { + return hipErrorFileNotFound; + } + if (fsize_ == 0) { + return hipErrorInvalidKernelFile; + } + + // Extract the code object from file + hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_, + device_names, code_objs); + + } else if (image_ != nullptr) { + // We are directly given image pointer directly, try to extract file desc & file Size + hip_error = CodeObject::ExtractCodeObjectFromMemory(image_, + device_names, code_objs, uri_); + } else { + return hipErrorInvalidValue; + } + + if (hip_error == hipErrorNoBinaryForGpu) { + guarantee(false, "hipErrorNoBinaryForGpu: Couldn't find binary for current devices!"); + return hip_error; + } + + if (hip_error == hipErrorInvalidKernelFile) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + // the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); + } + } else if(hip_error == hipSuccess) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + // Calculate the offset wrt binary_image and the original image + size_t offset_l + = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) + - reinterpret_cast
(const_cast(image_))); + + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l); + } + } + + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ + = new amd::Program(*devices[dev_idx]->asContext()); + if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { + return hipErrorOutOfMemory; + } + } + + return hipSuccess; +} + +hipError_t FatBinaryInfo::AddDevProgram(const int device_id) { + // Device Id bounds Check + DeviceIdCheck(device_id); + + FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; + // If fat binary was already added, skip this step and return success + if (fbd_info->add_dev_prog_ == false) { + amd::Context* ctx = g_devices[device_id]->asContext(); + if (CL_SUCCESS != fbd_info->program_->addDeviceProgram(*ctx->devices()[0], + fbd_info->binary_image_, + fbd_info->binary_size_, false, + nullptr, nullptr, fdesc_, + fbd_info->binary_offset_, uri_)) { + return hipErrorInvalidKernelFile; + } + fbd_info->add_dev_prog_ = true; + } + return hipSuccess; +} + +hipError_t FatBinaryInfo::BuildProgram(const int device_id) { + + // Device Id Check and Add DeviceProgram if not added so far + DeviceIdCheck(device_id); + IHIP_RETURN_ONFAIL(AddDevProgram(device_id)); + + // If Program was already built skip this step and return success + FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; + if (fbd_info->prog_built_ == false) { + if(CL_SUCCESS != fbd_info->program_->build(g_devices[device_id]->devices(), + nullptr, nullptr, nullptr, + kOptionChangeable, kNewDevProg)) { + return hipErrorSharedObjectInitFailed; + } + fbd_info->prog_built_ = true; + } + + if (!fbd_info->program_->load()) { + return hipErrorSharedObjectInitFailed; + } + return hipSuccess; +} + +} //namespace : hip diff --git a/rocclr/hip_fatbin.hpp b/rocclr/hip_fatbin.hpp index 421fe78b94..219a96e802 100755 --- a/rocclr/hip_fatbin.hpp +++ b/rocclr/hip_fatbin.hpp @@ -1,87 +1,87 @@ -#ifndef HIP_FAT_BINARY_HPP -#define HIP_FAT_BINARY_HPP - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hip_internal.hpp" -#include "platform/program.hpp" - -namespace hip { - -//Fat Binary Per Device info -class FatBinaryDeviceInfo { -public: - FatBinaryDeviceInfo (const void* binary_image, size_t binary_size, size_t binary_offset) - : binary_image_(binary_image), binary_size_(binary_size), - binary_offset_(binary_offset), program_(nullptr), - add_dev_prog_(false), prog_built_(false) {} - - ~FatBinaryDeviceInfo(); - -private: - const void* binary_image_; // binary image ptr - size_t binary_size_; // binary image size - size_t binary_offset_; // image offset from original - - amd::Program* program_; // reinterpreted as hipModule_t - friend class FatBinaryInfo; - - //Control Variables - bool add_dev_prog_; - bool prog_built_; -}; - - -// Fat Binary Info -class FatBinaryInfo { -public: - FatBinaryInfo(const char* fname, const void* image); - ~FatBinaryInfo(); - - // Loads Fat binary from file or image, unbundles COs for devices. - hipError_t ExtractFatBinary(const std::vector& devices); - hipError_t AddDevProgram(const int device_id); - hipError_t BuildProgram(const int device_id); - - - // Device Id bounds check - inline void DeviceIdCheck(const int device_id) const { - guarantee(device_id >= 0, "Invalid DeviceId less than 0"); - guarantee(static_cast(device_id) < fatbin_dev_info_.size(), "Invalid DeviceId, greater than no of fatbin device info!"); - } - - // Getter Methods - amd::Program* GetProgram(int device_id) { - DeviceIdCheck(device_id); - return fatbin_dev_info_[device_id]->program_; - } - - hipModule_t Module(int device_id) const { - DeviceIdCheck(device_id); - return reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); - } - - hipError_t GetModule(int device_id, hipModule_t* hmod) const { - DeviceIdCheck(device_id); - *hmod = reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); - return hipSuccess; - } - -private: - std::string fname_; // File name - amd::Os::FileDesc fdesc_; // File descriptor - size_t fsize_; // Total file size - - // Even when file is passed image will be mmapped till ~desctructor. - const void* image_; // Image - - // Only used for FBs where image is directly passed - std::string uri_; // Uniform resource indicator - - // Per Device Info, like corresponding binary ptr, size. - std::vector fatbin_dev_info_; -}; - -}; /* namespace hip */ - -#endif /* HIP_FAT_BINARY_HPP */ +#ifndef HIP_FAT_BINARY_HPP +#define HIP_FAT_BINARY_HPP + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip_internal.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Fat Binary Per Device info +class FatBinaryDeviceInfo { +public: + FatBinaryDeviceInfo (const void* binary_image, size_t binary_size, size_t binary_offset) + : binary_image_(binary_image), binary_size_(binary_size), + binary_offset_(binary_offset), program_(nullptr), + add_dev_prog_(false), prog_built_(false) {} + + ~FatBinaryDeviceInfo(); + +private: + const void* binary_image_; // binary image ptr + size_t binary_size_; // binary image size + size_t binary_offset_; // image offset from original + + amd::Program* program_; // reinterpreted as hipModule_t + friend class FatBinaryInfo; + + //Control Variables + bool add_dev_prog_; + bool prog_built_; +}; + + +// Fat Binary Info +class FatBinaryInfo { +public: + FatBinaryInfo(const char* fname, const void* image); + ~FatBinaryInfo(); + + // Loads Fat binary from file or image, unbundles COs for devices. + hipError_t ExtractFatBinary(const std::vector& devices); + hipError_t AddDevProgram(const int device_id); + hipError_t BuildProgram(const int device_id); + + + // Device Id bounds check + inline void DeviceIdCheck(const int device_id) const { + guarantee(device_id >= 0, "Invalid DeviceId less than 0"); + guarantee(static_cast(device_id) < fatbin_dev_info_.size(), "Invalid DeviceId, greater than no of fatbin device info!"); + } + + // Getter Methods + amd::Program* GetProgram(int device_id) { + DeviceIdCheck(device_id); + return fatbin_dev_info_[device_id]->program_; + } + + hipModule_t Module(int device_id) const { + DeviceIdCheck(device_id); + return reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + } + + hipError_t GetModule(int device_id, hipModule_t* hmod) const { + DeviceIdCheck(device_id); + *hmod = reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + return hipSuccess; + } + +private: + std::string fname_; // File name + amd::Os::FileDesc fdesc_; // File descriptor + size_t fsize_; // Total file size + + // Even when file is passed image will be mmapped till ~desctructor. + const void* image_; // Image + + // Only used for FBs where image is directly passed + std::string uri_; // Uniform resource indicator + + // Per Device Info, like corresponding binary ptr, size. + std::vector fatbin_dev_info_; +}; + +}; /* namespace hip */ + +#endif /* HIP_FAT_BINARY_HPP */ diff --git a/rocclr/hip_global.cpp b/rocclr/hip_global.cpp index 00c29013a2..c5025d978f 100755 --- a/rocclr/hip_global.cpp +++ b/rocclr/hip_global.cpp @@ -1,196 +1,196 @@ -#include "hip_global.hpp" - -#include "hip/hip_runtime.h" -#include "hip_internal.hpp" -#include "hip_code_object.hpp" -#include "platform/program.hpp" - -namespace hip { - -//Device Vars -DeviceVar::DeviceVar(std::string name, hipModule_t hmod) : shadowVptr(nullptr), name_(name), - amd_mem_obj_(nullptr), device_ptr_(nullptr), - size_(0) { - amd::Program* program = as_amd(reinterpret_cast(hmod)); - device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); - if (dev_program == nullptr) { - LogPrintfError("Cannot get Device Program for module: 0x%x \n", hmod); - guarantee(false, "Cannot get Device Program"); - } - - if(!dev_program->createGlobalVarObj(&amd_mem_obj_, &device_ptr_, &size_, name.c_str())) { - LogPrintfError("Cannot create Global Var obj for symbol: %s \n", name.c_str()); - guarantee(false, "Cannot create GlobalVar Obj"); - } - - // Handle size 0 symbols - if (size_ != 0) { - if (amd_mem_obj_ == nullptr || device_ptr_ == nullptr) { - LogPrintfError("Cannot get memory for creating device Var: %s", name.c_str()); - guarantee(false, "Cannot get memory for creating device var"); - } - amd::MemObjMap::AddMemObj(device_ptr_, amd_mem_obj_); - } -} - -DeviceVar::~DeviceVar() { - if (amd_mem_obj_ != nullptr) { - amd::MemObjMap::RemoveMemObj(device_ptr_); - amd_mem_obj_->release(); - } - - if (shadowVptr != nullptr) { - textureReference* texRef = reinterpret_cast(shadowVptr); - delete texRef; - shadowVptr = nullptr; - } - - device_ptr_ = nullptr; - size_ = 0; -} - -//Device Functions -DeviceFunc::DeviceFunc(std::string name, hipModule_t hmod) : dflock_("function lock"), - name_(name), kernel_(nullptr) { - amd::Program* program = as_amd(reinterpret_cast(hmod)); - - const amd::Symbol *symbol = program->findSymbol(name.c_str()); - if (symbol == nullptr) { - LogPrintfError("Cannot find Symbol with name: %s \n", name.c_str()); - guarantee(false, "Cannot find Symbol"); - } - - kernel_ = new amd::Kernel(*program, *symbol, name); - if (kernel_ == nullptr) { - LogPrintfError("Cannot create kernel with name: %s \n", name.c_str()); - guarantee(false, "Cannot Create kernel"); - } -} - -DeviceFunc::~DeviceFunc() { - if (kernel_ != nullptr) { - kernel_->release(); - } -} - -//Abstract functions -Function::Function(std::string name, FatBinaryInfo** modules) - : name_(name), modules_(modules) { - dFunc_.resize(g_devices.size()); -} - -Function::~Function() { - for (auto& elem : dFunc_) { - delete elem; - } - name_ = ""; - modules_ = nullptr; -} - -hipError_t Function::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod) { - guarantee((dFunc_.size() == g_devices.size()), "dFunc Size mismatch"); - if (dFunc_[ihipGetDevice()] == nullptr) { - dFunc_[ihipGetDevice()] = new DeviceFunc(name_, hmod); - } - *hfunc = dFunc_[ihipGetDevice()]->asHipFunction(); - - return hipSuccess; -} - -hipError_t Function::getStatFunc(hipFunction_t* hfunc, int deviceId) { - guarantee(modules_ != nullptr, "Module not initialized"); - - hipModule_t hmod = nullptr; - IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); - IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); - - if (dFunc_[deviceId] == nullptr) { - dFunc_[deviceId] = new DeviceFunc(name_, hmod); - } - *hfunc = dFunc_[deviceId]->asHipFunction(); - - return hipSuccess; -} - -hipError_t Function::getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId) { - guarantee((modules_ != nullptr), "Module not initialized"); - - hipModule_t hmod = nullptr; - IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); - IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); - - if (dFunc_[deviceId] == nullptr) { - dFunc_[deviceId] = new DeviceFunc(name_, hmod); - } - - const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); - - amd::Kernel* kernel = dFunc_[deviceId]->kernel(); - const device::Kernel::WorkGroupInfo* wginfo = kernel->getDeviceKernel(*devices[deviceId])->workGroupInfo(); - func_attr->sharedSizeBytes = static_cast(wginfo->localMemSize_); - func_attr->binaryVersion = static_cast(kernel->signature().version()); - func_attr->cacheModeCA = 0; - func_attr->constSizeBytes = 0; - func_attr->localSizeBytes = wginfo->privateMemSize_; - func_attr->maxDynamicSharedSizeBytes = static_cast(wginfo->availableLDSSize_ - - wginfo->localMemSize_); - - func_attr->maxThreadsPerBlock = static_cast(wginfo->size_); - func_attr->numRegs = static_cast(wginfo->usedVGPRs_); - func_attr->preferredShmemCarveout = 0; - func_attr->ptxVersion = 30; - - - return hipSuccess; -} - -//Abstract Vars -Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, - FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), size_(size), - type_(type), norm_(norm), modules_(modules) { - dVar_.resize(g_devices.size()); -} - -Var::Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, - unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), - size_(size), modules_(modules), managedVarPtr_(pointer), align_(align) { - dVar_.resize(g_devices.size()); -} - -Var::~Var() { - for (auto& elem : dVar_) { - delete elem; - } - modules_ = nullptr; -} - -hipError_t Var::getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod) { - guarantee((deviceId >= 0), "Invalid DeviceId, less than zero"); - guarantee((static_cast(deviceId) < g_devices.size()), - "Invalid DeviceId, greater than no of code objects"); - guarantee((dVar_.size() == g_devices.size()), - "Device Var not initialized to size"); - - if (dVar_[deviceId] == nullptr) { - dVar_[deviceId] = new DeviceVar(name_, hmod); - } - - *dvar = dVar_[deviceId]; - return hipSuccess; -} - -hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) { - guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero"); - guarantee((static_cast(deviceId) < g_devices.size()), - "Invalid DeviceId, greater than no of code objects"); - if (dVar_[deviceId] == nullptr) { - hipModule_t hmod = nullptr; - IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); - IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); - dVar_[deviceId] = new DeviceVar(name_, hmod); - } - *dvar = dVar_[deviceId]; - return hipSuccess; -} - -}; //namespace: hip +#include "hip_global.hpp" + +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_code_object.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Device Vars +DeviceVar::DeviceVar(std::string name, hipModule_t hmod) : shadowVptr(nullptr), name_(name), + amd_mem_obj_(nullptr), device_ptr_(nullptr), + size_(0) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + if (dev_program == nullptr) { + LogPrintfError("Cannot get Device Program for module: 0x%x \n", hmod); + guarantee(false, "Cannot get Device Program"); + } + + if(!dev_program->createGlobalVarObj(&amd_mem_obj_, &device_ptr_, &size_, name.c_str())) { + LogPrintfError("Cannot create Global Var obj for symbol: %s \n", name.c_str()); + guarantee(false, "Cannot create GlobalVar Obj"); + } + + // Handle size 0 symbols + if (size_ != 0) { + if (amd_mem_obj_ == nullptr || device_ptr_ == nullptr) { + LogPrintfError("Cannot get memory for creating device Var: %s", name.c_str()); + guarantee(false, "Cannot get memory for creating device var"); + } + amd::MemObjMap::AddMemObj(device_ptr_, amd_mem_obj_); + } +} + +DeviceVar::~DeviceVar() { + if (amd_mem_obj_ != nullptr) { + amd::MemObjMap::RemoveMemObj(device_ptr_); + amd_mem_obj_->release(); + } + + if (shadowVptr != nullptr) { + textureReference* texRef = reinterpret_cast(shadowVptr); + delete texRef; + shadowVptr = nullptr; + } + + device_ptr_ = nullptr; + size_ = 0; +} + +//Device Functions +DeviceFunc::DeviceFunc(std::string name, hipModule_t hmod) : dflock_("function lock"), + name_(name), kernel_(nullptr) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol *symbol = program->findSymbol(name.c_str()); + if (symbol == nullptr) { + LogPrintfError("Cannot find Symbol with name: %s \n", name.c_str()); + guarantee(false, "Cannot find Symbol"); + } + + kernel_ = new amd::Kernel(*program, *symbol, name); + if (kernel_ == nullptr) { + LogPrintfError("Cannot create kernel with name: %s \n", name.c_str()); + guarantee(false, "Cannot Create kernel"); + } +} + +DeviceFunc::~DeviceFunc() { + if (kernel_ != nullptr) { + kernel_->release(); + } +} + +//Abstract functions +Function::Function(std::string name, FatBinaryInfo** modules) + : name_(name), modules_(modules) { + dFunc_.resize(g_devices.size()); +} + +Function::~Function() { + for (auto& elem : dFunc_) { + delete elem; + } + name_ = ""; + modules_ = nullptr; +} + +hipError_t Function::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod) { + guarantee((dFunc_.size() == g_devices.size()), "dFunc Size mismatch"); + if (dFunc_[ihipGetDevice()] == nullptr) { + dFunc_[ihipGetDevice()] = new DeviceFunc(name_, hmod); + } + *hfunc = dFunc_[ihipGetDevice()]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFunc(hipFunction_t* hfunc, int deviceId) { + guarantee(modules_ != nullptr, "Module not initialized"); + + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, hmod); + } + *hfunc = dFunc_[deviceId]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId) { + guarantee((modules_ != nullptr), "Module not initialized"); + + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, hmod); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + amd::Kernel* kernel = dFunc_[deviceId]->kernel(); + const device::Kernel::WorkGroupInfo* wginfo = kernel->getDeviceKernel(*devices[deviceId])->workGroupInfo(); + func_attr->sharedSizeBytes = static_cast(wginfo->localMemSize_); + func_attr->binaryVersion = static_cast(kernel->signature().version()); + func_attr->cacheModeCA = 0; + func_attr->constSizeBytes = 0; + func_attr->localSizeBytes = wginfo->privateMemSize_; + func_attr->maxDynamicSharedSizeBytes = static_cast(wginfo->availableLDSSize_ + - wginfo->localMemSize_); + + func_attr->maxThreadsPerBlock = static_cast(wginfo->size_); + func_attr->numRegs = static_cast(wginfo->usedVGPRs_); + func_attr->preferredShmemCarveout = 0; + func_attr->ptxVersion = 30; + + + return hipSuccess; +} + +//Abstract Vars +Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), size_(size), + type_(type), norm_(norm), modules_(modules) { + dVar_.resize(g_devices.size()); +} + +Var::Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, + unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), + size_(size), modules_(modules), managedVarPtr_(pointer), align_(align) { + dVar_.resize(g_devices.size()); +} + +Var::~Var() { + for (auto& elem : dVar_) { + delete elem; + } + modules_ = nullptr; +} + +hipError_t Var::getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod) { + guarantee((deviceId >= 0), "Invalid DeviceId, less than zero"); + guarantee((static_cast(deviceId) < g_devices.size()), + "Invalid DeviceId, greater than no of code objects"); + guarantee((dVar_.size() == g_devices.size()), + "Device Var not initialized to size"); + + if (dVar_[deviceId] == nullptr) { + dVar_[deviceId] = new DeviceVar(name_, hmod); + } + + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) { + guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero"); + guarantee((static_cast(deviceId) < g_devices.size()), + "Invalid DeviceId, greater than no of code objects"); + if (dVar_[deviceId] == nullptr) { + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + dVar_[deviceId] = new DeviceVar(name_, hmod); + } + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +}; //namespace: hip diff --git a/rocclr/hip_global.hpp b/rocclr/hip_global.hpp index 83fcfab817..55f0027d58 100755 --- a/rocclr/hip_global.hpp +++ b/rocclr/hip_global.hpp @@ -1,119 +1,119 @@ -#ifndef HIP_GLOBAL_HPP -#define HIP_GLOBAL_HPP - -#include -#include - -#include "hip/hip_runtime_api.h" -#include "hip/hip_runtime.h" -#include "hip_internal.hpp" -#include "hip_fatbin.hpp" -#include "platform/program.hpp" - -namespace hip { - -//Forward Declaration -class CodeObject; - -//Device Structures -class DeviceVar { -public: - DeviceVar(std::string name, hipModule_t hmod); - ~DeviceVar(); - - //Accessors for device ptr and size, populated during constructor. - hipDeviceptr_t device_ptr() const { return device_ptr_; } - size_t size() const { return size_; } - std::string name() const { return name_; } - void* shadowVptr; - -private: - std::string name_; //Name of the var - amd::Memory* amd_mem_obj_; //amd_mem_obj abstraction - hipDeviceptr_t device_ptr_; //Device Pointer - size_t size_; //Size of the var -}; - -class DeviceFunc { -public: - DeviceFunc(std::string name, hipModule_t hmod); - ~DeviceFunc(); - - amd::Monitor dflock_; - - //Converts DeviceFunc to hipFunction_t(used by app) and vice versa. - hipFunction_t asHipFunction() { return reinterpret_cast(this); } - static DeviceFunc* asFunction(hipFunction_t f) { return reinterpret_cast(f); } - - //Accessor for kernel_ and name_ populated during constructor. - std::string name() const { return name_; } - amd::Kernel* kernel() const { return kernel_; } - -private: - std::string name_; //name of the func(not unique identifier) - amd::Kernel* kernel_; //Kernel ptr referencing to ROCclr Symbol -}; - -//Abstract Structures -class Function { -public: - Function(std::string name, FatBinaryInfo** modules=nullptr); - ~Function(); - - //Return DeviceFunc for this this dynamically loaded module - hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod); - - //Return Device Func & attr . Generate/build if not already done so. - hipError_t getStatFunc(hipFunction_t *hfunc, int deviceId); - hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId); - void resize_dFunc(size_t size) { dFunc_.resize(size); } - FatBinaryInfo** moduleInfo() { return modules_; }; - -private: - std::vector dFunc_; //DeviceFuncObj per Device - std::string name_; //name of the func(not unique identifier) - FatBinaryInfo** modules_; // static module where it is referenced -}; - -class Var { -public: - //Types of variable - enum DeviceVarKind { - DVK_Variable = 0, - DVK_Surface, - DVK_Texture, - DVK_Managed - }; - - Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, - FatBinaryInfo** modules = nullptr); - - Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align, - FatBinaryInfo** modules = nullptr); - - ~Var(); - - //Return DeviceVar for this dynamically loaded module - hipError_t getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod); - - //Return DeviceVar for module Generate/build if not already done so. - hipError_t getStatDeviceVar(DeviceVar** dvar, int deviceId); - void resize_dVar(size_t size) { dVar_.resize(size); } - - FatBinaryInfo** moduleInfo() { return modules_; }; - void* getManagedVarPtr() { return managedVarPtr_; }; -private: - std::vector dVar_; // DeviceVarObj per Device - std::string name_; // Variable name (not unique identifier) - DeviceVarKind dVarKind_; // Variable kind - size_t size_; // Size of the variable - int type_; // Type(Textures/Surfaces only) - int norm_; // Type(Textures/Surfaces only) - FatBinaryInfo** modules_; // static module where it is referenced - - void *managedVarPtr_; // Managed memory pointer with size_ & align_ - unsigned int align_; // Managed memory alignment -}; - -}; //namespace: hip -#endif /* HIP_GLOBAL_HPP */ +#ifndef HIP_GLOBAL_HPP +#define HIP_GLOBAL_HPP + +#include +#include + +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_fatbin.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Forward Declaration +class CodeObject; + +//Device Structures +class DeviceVar { +public: + DeviceVar(std::string name, hipModule_t hmod); + ~DeviceVar(); + + //Accessors for device ptr and size, populated during constructor. + hipDeviceptr_t device_ptr() const { return device_ptr_; } + size_t size() const { return size_; } + std::string name() const { return name_; } + void* shadowVptr; + +private: + std::string name_; //Name of the var + amd::Memory* amd_mem_obj_; //amd_mem_obj abstraction + hipDeviceptr_t device_ptr_; //Device Pointer + size_t size_; //Size of the var +}; + +class DeviceFunc { +public: + DeviceFunc(std::string name, hipModule_t hmod); + ~DeviceFunc(); + + amd::Monitor dflock_; + + //Converts DeviceFunc to hipFunction_t(used by app) and vice versa. + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + static DeviceFunc* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + + //Accessor for kernel_ and name_ populated during constructor. + std::string name() const { return name_; } + amd::Kernel* kernel() const { return kernel_; } + +private: + std::string name_; //name of the func(not unique identifier) + amd::Kernel* kernel_; //Kernel ptr referencing to ROCclr Symbol +}; + +//Abstract Structures +class Function { +public: + Function(std::string name, FatBinaryInfo** modules=nullptr); + ~Function(); + + //Return DeviceFunc for this this dynamically loaded module + hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod); + + //Return Device Func & attr . Generate/build if not already done so. + hipError_t getStatFunc(hipFunction_t *hfunc, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId); + void resize_dFunc(size_t size) { dFunc_.resize(size); } + FatBinaryInfo** moduleInfo() { return modules_; }; + +private: + std::vector dFunc_; //DeviceFuncObj per Device + std::string name_; //name of the func(not unique identifier) + FatBinaryInfo** modules_; // static module where it is referenced +}; + +class Var { +public: + //Types of variable + enum DeviceVarKind { + DVK_Variable = 0, + DVK_Surface, + DVK_Texture, + DVK_Managed + }; + + Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfo** modules = nullptr); + + Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align, + FatBinaryInfo** modules = nullptr); + + ~Var(); + + //Return DeviceVar for this dynamically loaded module + hipError_t getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod); + + //Return DeviceVar for module Generate/build if not already done so. + hipError_t getStatDeviceVar(DeviceVar** dvar, int deviceId); + void resize_dVar(size_t size) { dVar_.resize(size); } + + FatBinaryInfo** moduleInfo() { return modules_; }; + void* getManagedVarPtr() { return managedVarPtr_; }; +private: + std::vector dVar_; // DeviceVarObj per Device + std::string name_; // Variable name (not unique identifier) + DeviceVarKind dVarKind_; // Variable kind + size_t size_; // Size of the variable + int type_; // Type(Textures/Surfaces only) + int norm_; // Type(Textures/Surfaces only) + FatBinaryInfo** modules_; // static module where it is referenced + + void *managedVarPtr_; // Managed memory pointer with size_ & align_ + unsigned int align_; // Managed memory alignment +}; + +}; //namespace: hip +#endif /* HIP_GLOBAL_HPP */