diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index 65896e97a7..02e2f1e524 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -80,5 +80,7 @@ namespace hip_impl std::unordered_map& globals(); hsa_executable_t load_executable( - hsa_executable_t executable, hsa_agent_t agent, std::istream& file); + const std::string& file, + hsa_executable_t executable, + hsa_agent_t agent); } // Namespace hip_impl. \ No newline at end of file diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 3fd09630d9..d3f4c8d584 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -372,16 +372,16 @@ public: }; -class ihipModule_t { -public: - hsa_executable_t executable; - hsa_code_object_t object; +struct ihipModule_t { std::string fileName; - void *ptr; - size_t size; - std::list funcTrack; - std::unordered_map coGlobals; - ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {} + hsa_executable_t executable = {}; + hsa_code_object_reader_t coReader = {}; + + ~ihipModule_t() + { + if (executable.handle) hsa_executable_destroy(executable); + if (coReader.handle) hsa_code_object_reader_destroy(coReader); + } }; @@ -669,11 +669,11 @@ template class ihipEventCriticalBase_t : LockedBase { public: - ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) : + ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) : _parent(parentEvent) {} ~ihipEventCriticalBase_t() {}; - + // Keep data in structure so it can be easily copied into snapshots // (used to reduce lock contention and preserve correct lock order) ihipEventData_t _eventData; @@ -698,7 +698,7 @@ public: // Return a copy of the critical state. The critical data is locked during the copy. ihipEventData_t locked_copyCrit() { LockedAccessor_EventCrit_t crit(_criticalData); - return _criticalData._eventData; + return _criticalData._eventData; }; ihipEventCritical_t &criticalData() { return _criticalData; }; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 1477247ae2..45a44b3666 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -20,63 +20,65 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "elfio/elfio.hpp" +#include "hip/hip_runtime.h" +#include "hip/hcc_detail/program_state.hpp" +#include "hip_hcc_internal.h" +#include "hsa_helpers.hpp" +#include "trace_helper.h" #include #include #include -#include "elfio/elfio.hpp" -#include "hip/hip_runtime.h" -#include "hip/hcc_detail/program_state.hpp" -#include "hip_hcc_internal.h" -#include "trace_helper.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include //TODO Use Pool APIs from HCC to get memory regions. -#include +using namespace ELFIO; +using namespace hip_impl; +using namespace std; + inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { assert(Align != 0u && "Align can't be 0."); Skew %= Align; return (Value + Align - 1 - Skew) / Align * Align + Skew; } + struct ihipKernArgInfo{ - std::vector Size; - std::vector Align; - std::vector ArgType; - std::vector ArgName; + vector Size; + vector Align; + vector ArgType; + vector ArgName; uint32_t totalSize; }; -std::map kernelArguments; - -struct MyElfNote { - uint32_t n_namesz = 0; - uint32_t n_descsz = 0; - uint32_t n_type = 0; - - MyElfNote() = default; -}; +map kernelArguments; struct ihipModuleSymbol_t{ - uint64_t _object; // The kernel object. - uint32_t _groupSegmentSize; - uint32_t _privateSegmentSize; - std::string _name; // TODO - review for performance cost. Name is just used for debug. + uint64_t _object; // The kernel object. + uint32_t _groupSegmentSize; + uint32_t _privateSegmentSize; + string _name; // TODO - review for performance cost. Name is just used for debug. }; template <> -std::string ToString(hipFunction_t v) +string ToString(hipFunction_t v) { std::ostringstream ss; ss << "0x" << std::hex << v->_object; @@ -94,113 +96,20 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\ return ihipLogStatus(hipStatus);\ } -namespace hipdrv { - - hsa_status_t findSystemRegions(hsa_region_t region, void *data){ - hsa_region_segment_t segment_id; - hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); - - if(segment_id != HSA_REGION_SEGMENT_GLOBAL){ - return HSA_STATUS_SUCCESS; - } - - hsa_region_global_flag_t flags; - hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); - - hsa_region_t *reg = (hsa_region_t*)data; - - if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){ - *reg = region; - } - - return HSA_STATUS_SUCCESS; - } - -} // End namespace hipdrv - -uint64_t PrintSymbolSizes(const void *emi, const char *name){ - using namespace ELFIO; - - const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi; - if(NULL == ehdr || EV_CURRENT != ehdr->e_version){} - const Elf64_Shdr * shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff); - for(uint16_t i=0;ie_shnum;++i){ - if(shdr[i].sh_type == SHT_SYMTAB){ - const Elf64_Sym *syms = (const Elf64_Sym*)((char*)emi + shdr[i].sh_offset); - assert(syms); - uint64_t numSyms = shdr[i].sh_size/shdr[i].sh_entsize; - const char* strtab = (const char*)((char*)emi + shdr[shdr[i].sh_link].sh_offset); - assert(strtab); - for(uint64_t i=0;ie_shoff); - - uint64_t max_offset = ehdr->e_shoff; - uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum; - - for(uint16_t i=0;i < ehdr->e_shnum;++i){ - uint64_t cur_offset = static_cast(shdr[i].sh_offset); - if(max_offset < cur_offset){ - max_offset = cur_offset; - total_size = max_offset; - if(SHT_NOBITS != shdr[i].sh_type){ - total_size += static_cast(shdr[i].sh_size); - } - } - } - return total_size; -} - hipError_t hipModuleLoad(hipModule_t *module, const char *fname) { HIP_INIT_API(module, fname); - hipError_t ret = hipSuccess; - *module = new ihipModule_t; - if(module == NULL){ - return ihipLogStatus(hipErrorInvalidValue); - } + if (!fname) return ihipLogStatus(hipErrorInvalidValue); - auto ctx = ihipGetTlsDefaultCtx(); - if(ctx == nullptr){ - ret = hipErrorInvalidContext; + ifstream file{fname}; - }else{ - int deviceId = ctx->getDevice()->_deviceId; - ihipDevice_t *currentDevice = ihipGetDevice(deviceId); + if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound); - hsa_executable_create_alt( - HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - nullptr, - &(*module)->executable); + vector tmp{ + istreambuf_iterator{file}, istreambuf_iterator{}}; - std::ifstream file{fname}; - - if (!file.is_open()) { - return ihipLogStatus(hipErrorFileNotFound); - } - (*module)->executable = hip_impl::load_executable( - (*module)->executable, currentDevice->_hsaAgent, file); - ret = (*module)->executable.handle ? hipSuccess : hipErrorUnknown; - } - - return ihipLogStatus(ret); + return hipModuleLoadData(module, tmp.data()); } @@ -212,92 +121,13 @@ hipError_t hipModuleUnload(hipModule_t hmod) // Currently we want for all inflight activity to complete, but don't prevent another // thread from launching new kernels before we finish this operation. ihipSynchronize(); - hipError_t ret = hipSuccess; - hsa_status_t status = hsa_executable_destroy(hmod->executable); - if(status != HSA_STATUS_SUCCESS) - { - ret = hipErrorInvalidValue; - } - // status = hsa_code_object_destroy(hmod->object); - // if(status != HSA_STATUS_SUCCESS) - // { - // ret = hipErrorInvalidValue; - // } - // status = hsa_memory_free(hmod->ptr); - // if(status != HSA_STATUS_SUCCESS) - // { - // ret = hipErrorInvalidValue; - // } - for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { - delete *f; - } - delete hmod; - return ihipLogStatus(ret); + + delete hmod; // The ihipModule_t dtor will clean everything up. + hmod = nullptr; + + return ihipLogStatus(hipSuccess); } - -hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char *name) -{ - auto ctx = ihipGetTlsDefaultCtx(); - hipError_t ret = hipSuccess; - - if (name == nullptr){ - return (hipErrorInvalidValue); - } - - if (ctx == nullptr){ - ret = hipErrorInvalidContext; - - } else { - std::string str(name); - for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) { - if((*f)->_name == str) { - *func = *f; - return ret; - } - } - ihipModuleSymbol_t *sym = new ihipModuleSymbol_t; - int deviceId = ctx->getDevice()->_deviceId; - ihipDevice_t *currentDevice = ihipGetDevice(deviceId); - hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; - - hsa_status_t status; - hsa_executable_symbol_t symbol; - status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol); - if(status != HSA_STATUS_SUCCESS){ - return hipErrorNotFound; - } - - status = hsa_executable_symbol_get_info(symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &sym->_object); - CHECK_HSA(status, hipErrorNotFound); - - status = hsa_executable_symbol_get_info(symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &sym->_groupSegmentSize); - CHECK_HSA(status, hipErrorNotFound); - - status = hsa_executable_symbol_get_info(symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &sym->_privateSegmentSize); - CHECK_HSA(status, hipErrorNotFound); - - sym->_name = name; - *func = sym; - hmod->funcTrack.push_back(*func); - } - return ret; -} - - -hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, - const char *name){ - HIP_INIT_API(hfunc, hmod, name); - return ihipLogStatus(ihipModuleGetSymbol(hfunc, hmod, name)); -} - - hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, @@ -448,45 +278,11 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, namespace { struct Agent_global { - std::string name; + string name; hipDeviceptr_t address; - std::uint32_t byte_cnt; + uint32_t byte_cnt; }; - inline - void* address(hsa_executable_symbol_t x) - { - void* r = nullptr; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r); - - return r; - } - - inline - std::string name(hsa_executable_symbol_t x) - { - uint32_t sz = 0u; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); - - std::string r(sz, '\0'); - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); - - return r; - } - - inline - std::uint32_t size(hsa_executable_symbol_t x) - { - std::uint32_t r = 0; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r); - - return r; - } - inline void track(const Agent_global& x) { @@ -511,7 +307,7 @@ namespace hc::am_memtracker_update(x.address, device->_deviceId, 0u); } - template> + template> inline hsa_status_t copy_agent_global_variables( hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out) @@ -536,26 +332,24 @@ namespace { auto ctx = ihipGetTlsDefaultCtx(); - if (!ctx) throw std::runtime_error{"No active HIP context."}; + if (!ctx) throw runtime_error{"No active HIP context."}; auto device = ctx->getDevice(); - if (!device) throw std::runtime_error{"No device available for HIP."}; + if (!device) throw runtime_error{"No device available for HIP."}; ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId); - if (!currentDevice) { - throw std::runtime_error{"No active device for HIP"}; - } + if (!currentDevice) throw runtime_error{"No active device for HIP."}; return currentDevice->_hsaAgent; } inline - std::vector read_agent_globals( + vector read_agent_globals( hsa_agent_t agent, hsa_executable_t executable) { - std::vector r; + vector r; hsa_executable_iterate_agent_symbols( executable, agent, copy_agent_global_variables, &r); @@ -564,15 +358,14 @@ namespace } template - std::pair read_global_description( + pair read_global_description( ForwardIterator f, ForwardIterator l, const char* name) { const auto it = std::find_if( f, l, [=](const Agent_global& x) { return x.name == name; }); return it == l ? - std::make_pair(nullptr, 0u) : - std::make_pair(it->address, it->byte_cnt); + make_pair(nullptr, 0u) : make_pair(it->address, it->byte_cnt); } hipError_t read_agent_global_from_module( @@ -581,13 +374,12 @@ namespace hipModule_t hmod, const char* name) { - static std::unordered_map< - hipModule_t, std::vector> agent_globals; + static unordered_map> agent_globals; // TODO: this is not particularly robust. if (agent_globals.count(hmod) == 0) { - static std::mutex mtx; - std::lock_guard lck{mtx}; + static mutex mtx; + lock_guard lck{mtx}; if (agent_globals.count(hmod) == 0) { agent_globals.emplace( @@ -599,10 +391,10 @@ namespace // It will have to be properly fleshed out in the future. const auto it0 = agent_globals.find(hmod); if (it0 == agent_globals.cend()) { - throw std::runtime_error{"agent_globals data structure corrupted."}; + throw runtime_error{"agent_globals data structure corrupted."}; } - std::tie(*dptr, *bytes) = read_global_description( + tie(*dptr, *bytes) = read_global_description( it0->second.cbegin(), it0->second.cend(), name); return dptr ? hipSuccess : hipErrorNotFound; @@ -611,22 +403,21 @@ namespace hipError_t read_agent_global_from_process( hipDeviceptr_t *dptr, size_t* bytes, const char* name) { - static std::unordered_map< - hsa_agent_t, std::vector> agent_globals; + static unordered_map> agent_globals; static std::once_flag f; - std::call_once(f, []() { + call_once(f, []() { for (auto&& agent_executables : hip_impl::executables()) { - std::vector tmp0; + vector tmp0; for (auto&& executable : agent_executables.second) { auto tmp1 = read_agent_globals( agent_executables.first, executable); tmp0.insert( tmp0.end(), - std::make_move_iterator(tmp1.begin()), - std::make_move_iterator(tmp1.end())); + make_move_iterator(tmp1.begin()), + make_move_iterator(tmp1.end())); } - agent_globals.emplace(agent_executables.first, std::move(tmp0)); + agent_globals.emplace(agent_executables.first, move(tmp0)); } }); @@ -634,81 +425,129 @@ namespace if (it == agent_globals.cend()) return hipErrorNotInitialized; - std::tie(*dptr, *bytes) = read_global_description( + tie(*dptr, *bytes) = read_global_description( it->second.cbegin(), it->second.cend(), name); return dptr ? hipSuccess : hipErrorNotFound; } + + hsa_executable_symbol_t find_kernel_by_name( + hsa_executable_t executable, const char* kname) + { + pair r{kname, {}}; + + hsa_executable_iterate_agent_symbols( + executable, + this_agent(), + [](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) { + auto p = + static_cast*>(s); + + if (type(x) != HSA_SYMBOL_KIND_KERNEL) { + return HSA_STATUS_SUCCESS; + } + if (name(x) != p->first) return HSA_STATUS_SUCCESS; + + p->second = x; + + return HSA_STATUS_INFO_BREAK; + }, &r); + + return r.second; + } + + string read_elf_file_as_string(const void* file) + { // Precondition: file points to an ELF image that was BITWISE loaded + // into process accessible memory, and not one loaded by + // the loader. This is because in the latter case + // alignment may differ, which will break the size + // computation. + // the image is Elf64, and matches endianness i.e. it is + // Little Endian. + if (!file) return {}; + + auto h = static_cast(file); + auto s = static_cast(file); + // This assumes the common case of SHT being the last part of the ELF. + auto sz = sizeof(Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum; + + return string{s, s + sz}; + } +} // Anonymous namespace, internal linkage. + +hipError_t ihipModuleGetFunction( + hipFunction_t *func, hipModule_t hmod, const char *name) +{ + HIP_INIT_API(func, hmod, name); + + if (!func || !name) return ihipLogStatus(hipErrorInvalidValue); + + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) return ihipLogStatus(hipErrorInvalidContext); + + hipError_t ret = hipSuccess; + + *func = new ihipModuleSymbol_t; + + if (!*func) return ihipLogStatus(hipErrorInvalidValue); + + auto kernel = find_kernel_by_name(hmod->executable, name); + + if (kernel.handle == 0u) return ihipLogStatus(hipErrorNotFound); + + (*func)->_object = kernel_object(kernel); + (*func)->_groupSegmentSize = group_size(kernel); + (*func)->_privateSegmentSize = private_size(kernel); + (*func)->_name = name; + + return ihipLogStatus(hipSuccess); +} + +hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, + const char *name){ + HIP_INIT_API(hfunc, hmod, name); + return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name)); } hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char* name) { HIP_INIT_API(dptr, bytes, hmod, name); - hipError_t ret = hipSuccess; - if(dptr == NULL || bytes == NULL){ - return ihipLogStatus(hipErrorInvalidValue); - } - if(name == NULL){ - return ihipLogStatus(hipErrorNotInitialized); - } - else{ - ret = hmod ? - read_agent_global_from_module(dptr, bytes, hmod, name) : - read_agent_global_from_process(dptr, bytes, name); - return ihipLogStatus(ret); - } + if(!dptr || !bytes) return ihipLogStatus(hipErrorInvalidValue); + + if(!name) return ihipLogStatus(hipErrorNotInitialized); + + const auto r = hmod ? + read_agent_global_from_module(dptr, bytes, hmod, name) : + read_agent_global_from_process(dptr, bytes, name); + + return ihipLogStatus(r); } hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(module, image); - hipError_t ret = hipSuccess; - if(image == NULL || module == NULL){ - return ihipLogStatus(hipErrorNotInitialized); - } else { - auto ctx = ihipGetTlsDefaultCtx(); - *module = new ihipModule_t; - int deviceId = ctx->getDevice()->_deviceId; - ihipDevice_t *currentDevice = ihipGetDevice(deviceId); - void *p; - uint64_t size = ElfSize(image); - hsa_agent_t agent = currentDevice->_hsaAgent; - hsa_region_t sysRegion; - hsa_status_t status = hsa_agent_iterate_regions(agent, hipdrv::findSystemRegions, &sysRegion); - status = hsa_memory_allocate(sysRegion, size, (void**)&p); + if (!module) return ihipLogStatus(hipErrorInvalidValue); - if(status != HSA_STATUS_SUCCESS){ - return ihipLogStatus(hipErrorOutOfMemory); - } + *module = new ihipModule_t; - char *ptr = (char*)p; - if(!ptr){ - return ihipLogStatus(hipErrorOutOfMemory); - } - (*module)->ptr = p; - (*module)->size = size; + auto ctx = ihipGetTlsDefaultCtx(); + if (!ctx) return ihipLogStatus(hipErrorInvalidContext); - memcpy(ptr, image, size); + hsa_executable_create_alt( + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + nullptr, + &(*module)->executable); - status = hsa_code_object_deserialize(ptr, size, NULL, &(*module)->object); + (*module)->executable = hip_impl::load_executable( + read_elf_file_as_string(image), (*module)->executable, this_agent()); - if(status != HSA_STATUS_SUCCESS){ - return ihipLogStatus(hipErrorSharedObjectInitFailed); - } - - status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &(*module)->executable); - CHECKLOG_HSA(status, hipErrorNotInitialized); - - status = hsa_executable_load_code_object((*module)->executable, agent, (*module)->object, NULL); - CHECKLOG_HSA(status, hipErrorNotInitialized); - - status = hsa_executable_freeze((*module)->executable, NULL); - CHECKLOG_HSA(status, hipErrorNotInitialized); - } - return ihipLogStatus(ret); + return ihipLogStatus( + (*module)->executable.handle ? hipSuccess : hipErrorUnknown); } hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues) @@ -716,21 +555,20 @@ hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned return hipModuleLoadData(module, image); } -hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) +hipError_t hipModuleGetTexRef( + textureReference** texRef, hipModule_t hmod, const char* name) { HIP_INIT_API(texRef, hmod, name); + hipError_t ret = hipErrorNotFound; - if(texRef == NULL){ - ret = hipErrorInvalidValue; - } else { - if(name == NULL || hmod == NULL){ - ret = hipErrorNotInitialized; - } else{ - const auto it = hip_impl::globals().find(name); - if (it == hip_impl::globals().end()) return ihipLogStatus(hipErrorInvalidValue); - *texRef = reinterpret_cast(it->second.get()); - ret = hipSuccess; - } - } - return ihipLogStatus(ret); + if(!texRef) return ihipLogStatus(hipErrorInvalidValue); + + if(!hmod || !name) return ihipLogStatus(hipErrorNotInitialized); + + const auto it = globals().find(name); + if (it == globals().end()) return ihipLogStatus(hipErrorInvalidValue); + + *texRef = static_cast(it->second.get()); + + return ihipLogStatus(hipSuccess); } diff --git a/src/hsa_helpers.hpp b/src/hsa_helpers.hpp new file mode 100644 index 0000000000..d8e09b7aa9 --- /dev/null +++ b/src/hsa_helpers.hpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include + +#include +#include + +namespace hip_impl +{ + inline + void* address(hsa_executable_symbol_t x) + { + void* r = nullptr; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r); + + return r; + } + + inline + hsa_agent_t agent(hsa_executable_symbol_t x) + { + hsa_agent_t r = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r); + + return r; + } + + inline + std::uint32_t group_size(hsa_executable_symbol_t x) + { + std::uint32_t r = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r); + + return r; + } + + inline + std::uint64_t kernel_object(hsa_executable_symbol_t x) + { + std::uint64_t r = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r); + + return r; + } + + inline + std::string name(hsa_executable_symbol_t x) + { + std::uint32_t sz = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); + + std::string r(sz, '\0'); + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); + + return r; + } + + inline + std::uint32_t private_size(hsa_executable_symbol_t x) + { + std::uint32_t r = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r); + + return r; + } + + inline + std::uint32_t size(hsa_executable_symbol_t x) + { + std::uint32_t r = 0; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r); + + return r; + } + + inline + hsa_symbol_kind_t type(hsa_executable_symbol_t x) + { + hsa_symbol_kind_t r = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r); + + return r; + } +} \ No newline at end of file diff --git a/src/program_state.cpp b/src/program_state.cpp index 47071d0236..e867887da2 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -3,6 +3,7 @@ #include "../include/hip/hcc_detail/code_object_bundle.hpp" #include "hip_hcc_internal.h" +#include "hsa_helpers.hpp" #include "trace_helper.h" #include "elfio/elfio.hpp" @@ -146,13 +147,11 @@ namespace void associate_code_object_symbols_with_host_allocation( const elfio& reader, - const elfio& self_reader, section* code_object_dynsym, - section* process_symtab, hsa_agent_t agent, hsa_executable_t executable) { - if (!code_object_dynsym || !process_symtab) return; + if (!code_object_dynsym) return; const auto undefined_symbols = copy_names_of_undefined_symbols( symbol_section_accessor{reader, code_object_dynsym}); @@ -294,68 +293,6 @@ namespace return r; } - inline - hsa_agent_t agent(hsa_executable_symbol_t x) - { - hsa_agent_t r = {}; - hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r); - - return r; - } - - inline - uint32_t group_size(hsa_executable_symbol_t x) - { - uint32_t r = 0u; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r); - - return r; - } - - inline - uint64_t kernel_object(hsa_executable_symbol_t x) - { - uint64_t r = 0u; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r); - - return r; - } - - inline - string name(hsa_executable_symbol_t x) - { - uint32_t sz = 0u; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); - - string r(sz, '\0'); - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); - - return r; - } - - inline - uint32_t private_size(hsa_executable_symbol_t x) - { - uint32_t r = 0u; - hsa_executable_symbol_get_info( - x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r); - - return r; - } - - inline - hsa_symbol_kind_t type(hsa_executable_symbol_t x) - { - hsa_symbol_kind_t r = {}; - hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r); - - return r; - } - const unordered_map>& kernels() { static unordered_map> r; @@ -384,42 +321,43 @@ namespace } void load_code_object_and_freeze_executable( - istream& file, hsa_agent_t agent, hsa_executable_t executable) + const string& file, hsa_agent_t agent, hsa_executable_t executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. static const auto cor_deleter = [](hsa_code_object_reader_t* p) { - hsa_code_object_reader_destroy(*p); + if (p) { + hsa_code_object_reader_destroy(*p); + delete p; + } }; using RAII_code_reader = unique_ptr< hsa_code_object_reader_t, decltype(cor_deleter)>; - file.seekg(0); + if (!file.empty()) { + RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter}; + hsa_code_object_reader_create_from_memory( + file.data(), file.size(), tmp.get()); - vector blob{ - istreambuf_iterator{file}, istreambuf_iterator{}}; - RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter}; - hsa_code_object_reader_create_from_memory( - blob.data(), blob.size(), tmp.get()); + hsa_executable_load_agent_code_object( + executable, agent, *tmp, nullptr, nullptr); - hsa_executable_load_agent_code_object( - executable, agent, *tmp, nullptr, nullptr); + hsa_executable_freeze(executable, nullptr); - hsa_executable_freeze(executable, nullptr); + static vector code_readers; + static mutex mtx; - static vector code_readers; - static mutex mtx; - - lock_guard lck{mtx}; - code_readers.push_back(move(tmp)); + lock_guard lck{mtx}; + code_readers.push_back(move(tmp)); + } } } namespace hip_impl { const unordered_map>& executables() - { + { // TODO: This leaks the hsa_executable_ts, it should use RAII. static unordered_map> r; static once_flag f; @@ -449,8 +387,7 @@ namespace hip_impl // TODO: this is massively inefficient and only // meant for illustration. string blob_to_str{blob.cbegin(), blob.cend()}; - stringstream istr{blob_to_str}; - tmp = load_executable(tmp, a, istr); + tmp = load_executable(blob_to_str, tmp, a); if (tmp.handle) r[a].push_back(tmp); } @@ -535,33 +472,23 @@ namespace hip_impl } hsa_executable_t load_executable( - hsa_executable_t executable, hsa_agent_t agent, istream& file) + const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader; - if (!reader.load(file)) { - return hsa_executable_t{}; - } - else { - // TODO: this may benefit from caching as well. - elfio self_reader; - self_reader.load("/proc/self/exe"); + stringstream tmp{file}; - const auto symtab = - find_section_if(self_reader, [](const ELFIO::section* x) { - return x->get_type() == SHT_SYMTAB; - }); + if (!reader.load(tmp)) return hsa_executable_t{}; - const auto code_object_dynsym = - find_section_if(reader, [](const ELFIO::section* x) { + const auto code_object_dynsym = + find_section_if(reader, [](const ELFIO::section* x) { return x->get_type() == SHT_DYNSYM; - }); + }); - associate_code_object_symbols_with_host_allocation( - reader, self_reader, code_object_dynsym, symtab, agent, executable); + associate_code_object_symbols_with_host_allocation( + reader, code_object_dynsym, agent, executable); - load_code_object_and_freeze_executable(file, agent, executable); + load_code_object_and_freeze_executable(file, agent, executable); - return executable; - } + return executable; } } // Namespace hip_impl. \ No newline at end of file