diff --git a/CMakeLists.txt b/CMakeLists.txt index 10a5393755..b9b27ecc5e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -240,7 +240,6 @@ if(HIP_PLATFORM STREQUAL "hcc") src/hip_surface.cpp src/hip_intercept.cpp src/env.cpp - src/program_state.cpp src/h2f.cpp) execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) diff --git a/src/elfio/elf_types.hpp b/include/hip/hcc_detail/elfio/elf_types.hpp similarity index 100% rename from src/elfio/elf_types.hpp rename to include/hip/hcc_detail/elfio/elf_types.hpp diff --git a/src/elfio/elfio.hpp b/include/hip/hcc_detail/elfio/elfio.hpp similarity index 100% rename from src/elfio/elfio.hpp rename to include/hip/hcc_detail/elfio/elfio.hpp diff --git a/src/elfio/elfio_dump.hpp b/include/hip/hcc_detail/elfio/elfio_dump.hpp similarity index 100% rename from src/elfio/elfio_dump.hpp rename to include/hip/hcc_detail/elfio/elfio_dump.hpp diff --git a/src/elfio/elfio_dynamic.hpp b/include/hip/hcc_detail/elfio/elfio_dynamic.hpp similarity index 100% rename from src/elfio/elfio_dynamic.hpp rename to include/hip/hcc_detail/elfio/elfio_dynamic.hpp diff --git a/src/elfio/elfio_header.hpp b/include/hip/hcc_detail/elfio/elfio_header.hpp similarity index 100% rename from src/elfio/elfio_header.hpp rename to include/hip/hcc_detail/elfio/elfio_header.hpp diff --git a/src/elfio/elfio_note.hpp b/include/hip/hcc_detail/elfio/elfio_note.hpp similarity index 100% rename from src/elfio/elfio_note.hpp rename to include/hip/hcc_detail/elfio/elfio_note.hpp diff --git a/src/elfio/elfio_relocation.hpp b/include/hip/hcc_detail/elfio/elfio_relocation.hpp similarity index 100% rename from src/elfio/elfio_relocation.hpp rename to include/hip/hcc_detail/elfio/elfio_relocation.hpp diff --git a/src/elfio/elfio_section.hpp b/include/hip/hcc_detail/elfio/elfio_section.hpp similarity index 100% rename from src/elfio/elfio_section.hpp rename to include/hip/hcc_detail/elfio/elfio_section.hpp diff --git a/src/elfio/elfio_segment.hpp b/include/hip/hcc_detail/elfio/elfio_segment.hpp similarity index 100% rename from src/elfio/elfio_segment.hpp rename to include/hip/hcc_detail/elfio/elfio_segment.hpp diff --git a/src/elfio/elfio_strings.hpp b/include/hip/hcc_detail/elfio/elfio_strings.hpp similarity index 100% rename from src/elfio/elfio_strings.hpp rename to include/hip/hcc_detail/elfio/elfio_strings.hpp diff --git a/src/elfio/elfio_symbols.hpp b/include/hip/hcc_detail/elfio/elfio_symbols.hpp similarity index 100% rename from src/elfio/elfio_symbols.hpp rename to include/hip/hcc_detail/elfio/elfio_symbols.hpp diff --git a/src/elfio/elfio_utils.hpp b/include/hip/hcc_detail/elfio/elfio_utils.hpp similarity index 100% rename from src/elfio/elfio_utils.hpp rename to include/hip/hcc_detail/elfio/elfio_utils.hpp diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index 2fbda48629..29a389f642 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -31,6 +31,7 @@ THE SOFTWARE. #include "hip/hip_hcc.h" #include "hip_runtime.h" +#include #include #include #include @@ -105,21 +106,13 @@ inline std::vector make_kernarg( auto it = function_names().find(reinterpret_cast(kernel)); if (it == function_names().cend()) { - it = - function_names(true).find(reinterpret_cast(kernel)); - if (it == function_names().cend()) { - throw std::runtime_error{"Undefined __global__ function."}; - } + hip_throw(std::runtime_error{"Undefined __global__ function."}); } auto it1 = kernargs().find(it->second); if (it1 == kernargs().end()) { - it1 = kernargs(true).find(it->second); - - if (it1 == kernargs().end()) { - throw std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}; - } + hip_throw(std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}); } std::tuple to_formals{std::move(actuals)}; @@ -129,23 +122,87 @@ inline std::vector make_kernarg( return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); } -void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, - const dim3& dimBlocks, std::uint32_t sharedMemBytes, hipStream_t stream, - void** kernarg); -} // Namespace hip_impl. +inline +std::string name(std::uintptr_t function_address) +{ + const auto it = function_names().find(function_address); + + if (it == function_names().cend()) { + hip_throw(std::runtime_error{ + "Invalid function passed to hipLaunchKernelGGL."}); + } + + return it->second; +} + +inline +std::string name(hsa_agent_t agent) +{ + char n[64]{}; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, n); + + return std::string{n}; +} + +hsa_agent_t target_agent(hipStream_t stream); + +inline +__attribute__((visibility("hidden"))) +void hipLaunchKernelGGLImpl( + std::uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + std::uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) { + auto it0 = functions().find(function_address); + + if (it0 == functions().cend()) { + hip_throw(std::runtime_error{ + "No device code available for function: " + + name(function_address)}); + } + + auto agent = target_agent(stream); + + const auto it1 = std::find_if( + it0->second.cbegin(), + it0->second.cend(), + [=](const std::pair& x) { + return x.first == agent; + }); + + if (it1 == it0->second.cend()) { + hip_throw(std::runtime_error{ + "No code available for function: " + name(function_address) + + ", for agent: " + name(agent)}); + } + + hipModuleLaunchKernel(it1->second, numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, + stream, nullptr, kernarg); +} +} // Namespace hip_impl. template -inline void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { +inline +void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + Args... args) { auto kernarg = hip_impl::make_kernarg( kernel, std::tuple{std::move(args)...}); std::size_t kernarg_size = kernarg.size(); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, kernarg.data(), HIP_LAUNCH_PARAM_BUFFER_SIZE, - &kernarg_size, HIP_LAUNCH_PARAM_END}; + void* config[]{ + HIP_LAUNCH_PARAM_BUFFER_POINTER, + kernarg.data(), + HIP_LAUNCH_PARAM_BUFFER_SIZE, + &kernarg_size, + HIP_LAUNCH_PARAM_END}; - hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast(kernel), numBlocks, dimBlocks, - sharedMemBytes, stream, &config[0]); + hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast(kernel), + numBlocks, dimBlocks, sharedMemBytes, + stream, &config[0]); } template diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 73996982d1..cb4e073e4e 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -35,11 +35,14 @@ THE SOFTWARE. #define GENERIC_GRID_LAUNCH 1 #endif +#include + #include #include #include #include #include +#include #if defined(_MSC_VER) #define DEPRECATED(msg) __declspec(deprecated(msg)) @@ -58,6 +61,11 @@ THE SOFTWARE. #define HIP_LAUNCH_PARAM_END ((void*)0x03) #ifdef __cplusplus + #include + #include + #include + #include + #define __dparm(x) \ = x #else @@ -1363,6 +1371,61 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); +__attribute__((visibility("hidden"))) +hipError_t hipModuleGetGlobal(void**, size_t*, hipModule_t, const char*); + + +/** + * @brief Copies the memory address of symbol @p symbolName to @p devPtr + * + * @param[in] symbolName - Symbol on device + * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound + * + * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +inline +__attribute__((visibility("hidden"))) +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + //HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName); + + size_t size = 0; + return hipModuleGetGlobal(devPtr, &size, 0, (const char*)symbolName); +} + + +/** + * @brief Copies the size of symbol @p symbolName to @p size + * + * @param[in] symbolName - Symbol on device + * @param[out] size - Pointer to the size of the symbol + * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound + * + * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +inline +__attribute__((visibility("hidden"))) +hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { + // HIP_INIT_API(hipGetSymbolSize, size, symbolName); + + void* devPtr = nullptr; + return hipModuleGetGlobal(&devPtr, size, 0, (const char*)symbolName); +} + +#if defined(__cplusplus) +} // extern "C" +#endif + +namespace hip_impl { +hipError_t hipMemcpyToSymbol(void*, const void*, size_t, size_t, hipMemcpyKind, + const char*); +} // Namespace hip_impl. + +#if defined(__cplusplus) +extern "C" { +#endif /** * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area @@ -1387,35 +1450,36 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, * hipMemcpyFromSymbolAsync */ -hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t sizeBytes, - size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); +inline +__attribute__((visibility("hidden"))) +hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + if (!symbolName) return hipErrorInvalidSymbol; + hipDeviceptr_t dst = NULL; + hipGetSymbolAddress(&dst, (const char*)symbolName); -/** - * @brief Copies the memory address of symbol @p symbolName to @p devPtr - * - * @param[in] symbolName - Symbol on device - * @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol - * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound - * - * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName); + return hip_impl::hipMemcpyToSymbol(dst, src, sizeBytes, offset, kind, + (const char*)symbolName); +} +#if defined(__cplusplus) +} // extern "C" +#endif -/** - * @brief Copies the size of symbol @p symbolName to @p size - * - * @param[in] symbolName - Symbol on device - * @param[out] size - Pointer to the size of the symbol - * @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound - * - * @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, - * hipMemcpyFromSymbolAsync - */ -hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); +namespace hip_impl { +hipError_t hipMemcpyToSymbolAsync(void*, const void*, size_t, size_t, + hipMemcpyKind, hipStream_t, const char*); +hipError_t hipMemcpyFromSymbol(void*, const void*, size_t, size_t, + hipMemcpyKind, const char*); +hipError_t hipMemcpyFromSymbolAsync(void*, const void*, size_t, size_t, + hipMemcpyKind, hipStream_t, const char*); +} // Namespace hip_impl. +#if defined(__cplusplus) +extern "C" { +#endif /** * @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area @@ -1442,14 +1506,50 @@ hipError_t hipGetSymbolSize(size_t* size, const void* symbolName); * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, * hipMemcpyFromSymbolAsync */ -hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t sizeBytes, - size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +inline +__attribute__((visibility("hidden"))) +hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + if (!symbolName) return hipErrorInvalidSymbol; -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t sizeBytes, - size_t offset __dparm(0), hipMemcpyKind kind __dparm( hipMemcpyDeviceToHost )); + hipDeviceptr_t dst = NULL; + hipGetSymbolAddress(&dst, symbolName); -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t sizeBytes, - size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + return hip_impl::hipMemcpyToSymbolAsync(dst, src, sizeBytes, offset, kind, + stream, + (const char*)symbolName); +} + +inline +__attribute__((visibility("hidden"))) +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + if (!symbolName) return hipErrorInvalidSymbol; + + hipDeviceptr_t src = NULL; + hipGetSymbolAddress(&src, symbolName); + + return hip_impl::hipMemcpyFromSymbol(dst, src, sizeBytes, offset, kind, + (const char*)symbolName); +} + +inline +__attribute__((visibility("hidden"))) +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, + hipStream_t stream __dparm(0)) { + if (!symbolName) return hipErrorInvalidSymbol; + + hipDeviceptr_t src = NULL; + hipGetSymbolAddress(&src, symbolName); + + return hip_impl::hipMemcpyFromSymbolAsync(dst, src, sizeBytes, offset, kind, + stream, + (const char*)symbolName); +} /** * @brief Copy data from src to dst asynchronously. @@ -2397,6 +2497,103 @@ hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, con hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); +struct Agent_global { + std::string name; + hipDeviceptr_t address; + uint32_t byte_cnt; +}; +#if defined(__cplusplus) +} // extern "C" +#endif + +namespace hip_impl { +hsa_executable_t executable_for(hipModule_t); +const std::string& hash_for(hipModule_t); + +template +std::pair read_global_description( + ForwardIterator f, ForwardIterator l, const char* name) { + const auto it = std::find_if(f, l, [=](const Agent_global& x) { + return x.name == name; + }); + + return it == l ? + std::make_pair(nullptr, 0u) : std::make_pair(it->address, it->byte_cnt); +} + +std::vector read_agent_globals(hsa_agent_t agent, + hsa_executable_t executable); +hsa_agent_t this_agent(); + +inline +__attribute__((visibility("hidden"))) +hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name) { + // the key of the map would the hash of code object associated with the + // hipModule_t instance + static std::unordered_map< + std::string, std::vector> agent_globals; + auto key = hash_for(hmod); + + if (agent_globals.count(key) == 0) { + static std::mutex mtx; + std::lock_guard lck{mtx}; + + if (agent_globals.count(key) == 0) { + agent_globals.emplace( + key, read_agent_globals(this_agent(), executable_for(hmod))); + } + } + + const auto it0 = agent_globals.find(key); + if (it0 == agent_globals.cend()) { + hip_throw( + std::runtime_error{"agent_globals data structure corrupted."}); + } + + std::tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), + it0->second.cend(), name); + + return *dptr ? hipSuccess : hipErrorNotFound; +} + +inline +__attribute__((visibility("hidden"))) +hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, + const char* name) { + static std::unordered_map< + hsa_agent_t, std::vector> agent_globals; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& agent_executables : executables()) { + std::vector tmp0; + for (auto&& executable : agent_executables.second) { + auto tmp1 = read_agent_globals(agent_executables.first, + executable); + + tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()), + make_move_iterator(tmp1.end())); + } + agent_globals.emplace(agent_executables.first, move(tmp0)); + } + }); + + const auto it = agent_globals.find(this_agent()); + + if (it == agent_globals.cend()) return hipErrorNotInitialized; + + std::tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), + it->second.cend(), name); + + return *dptr ? hipSuccess : hipErrorNotFound; +} +} // Namespace hip_impl. + +#if defined(__cplusplus) +extern "C" { +#endif + /** * @brief returns device memory pointer and size of the kernel present in the module with symbol @p * name @@ -2408,11 +2605,20 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); * * @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized */ -hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name); +inline +__attribute__((visibility("hidden"))) +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name) { + if (!dptr || !bytes) return hipErrorInvalidValue; -hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name); + if (!name) return hipErrorNotInitialized; + + const auto r = hmod ? + hip_impl::read_agent_global_from_module(dptr, bytes, hmod, name) : + hip_impl::read_agent_global_from_process(dptr, bytes, name); + + return r; +} hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); /** diff --git a/src/hsa_helpers.hpp b/include/hip/hcc_detail/hsa_helpers.hpp similarity index 91% rename from src/hsa_helpers.hpp rename to include/hip/hcc_detail/hsa_helpers.hpp index 74d455edb9..af4f0c93ab 100644 --- a/src/hsa_helpers.hpp +++ b/include/hip/hcc_detail/hsa_helpers.hpp @@ -27,15 +27,6 @@ THE SOFTWARE. #include #include -inline constexpr bool operator==(hsa_isa_t x, hsa_isa_t y) { return x.handle == y.handle; } - -namespace std { -template <> -struct hash { - size_t operator()(hsa_isa_t x) const { return hash{}(x.handle); } -}; -} // namespace std - namespace hip_impl { inline void* address(hsa_executable_symbol_t x) { void* r = nullptr; diff --git a/include/hip/hcc_detail/macro_based_grid_launch.hpp b/include/hip/hcc_detail/macro_based_grid_launch.hpp index 8726b60dc2..ca52e5b614 100644 --- a/include/hip/hcc_detail/macro_based_grid_launch.hpp +++ b/include/hip/hcc_detail/macro_based_grid_launch.hpp @@ -85,7 +85,7 @@ requires(Domain == hc::parallel_for_each(acc_v, d, k); } catch (std::exception& ex) { std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; - throw; + hip_throw(ex); } } @@ -113,7 +113,7 @@ requires(Domain == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch group_mem_bytes, acc_v, std::move(k)); } catch (std::exception& ex) { std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; - throw; + hip_throw(ex); } } diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index da13c7c3db..f49ed44930 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -22,14 +22,35 @@ THE SOFTWARE. #pragma once +#include "code_object_bundle.hpp" +#include "hsa_helpers.hpp" + +#if !defined(__cpp_exceptions) + #define try if (true) + #define catch(...) if (false) +#endif +#include "elfio/elfio.hpp" +#if !defined(__cpp_exceptions) + #undef try + #undef catch +#endif + #include #include #include #include +#include + +#include #include +#include +#include #include #include +#include +#include +#include #include #include #include @@ -39,13 +60,27 @@ struct ihipModuleSymbol_t; using hipFunction_t = ihipModuleSymbol_t*; namespace std { -template <> +template<> struct hash { - size_t operator()(hsa_agent_t x) const { return hash{}(x.handle); } + size_t operator()(hsa_agent_t x) const { + return hash{}(x.handle); + } +}; + +template<> +struct hash { + size_t operator()(hsa_isa_t x) const { + return hash{}(x.handle); + } }; } // namespace std -inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; } +inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { + return x.handle == y.handle; +} +inline constexpr bool operator==(hsa_isa_t x, hsa_isa_t y) { + return x.handle == y.handle; +} namespace hip_impl { class Kernel_descriptor { @@ -93,16 +128,517 @@ public: } }; -const std::unordered_map>& executables( - bool rebuild = false); -const std::unordered_map>>& -functions(bool rebuild = false); -const std::unordered_map& function_names(bool rebuild = false); -std::unordered_map& globals(bool rebuild = false); -const std::unordered_map< - std::string, std::vector>>& - kernargs(bool rebuild = false); +template +inline +ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) { + const auto it = std::find_if( + reader.sections.begin(), reader.sections.end(), std::move(p)); -hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, - hsa_agent_t agent); + return it != reader.sections.end() ? *it : nullptr; +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + hsa_isa_t, std::vector>>& code_object_blobs() { + static std::unordered_map>> r; + static std::once_flag f; + + std::call_once(f, []() { + static std::vector> blobs{}; + + dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + + const auto elf = + info->dlpi_addr ? info->dlpi_name : "/proc/self/exe"; + + if (!tmp.load(elf)) return 0; + + const auto it = find_section_if(tmp, [](const ELFIO::section* x) { + return x->get_name() == ".kernel"; + }); + + if (!it) return 0; + + blobs.emplace_back(it->get_data(), it->get_data() + it->get_size()); + + return 0; + }, nullptr); + + for (auto&& multi_arch_blob : blobs) { + auto it = multi_arch_blob.begin(); + while (it != multi_arch_blob.end()) { + Bundled_code_header tmp{it, multi_arch_blob.end()}; + + if (!valid(tmp)) break; + + for (auto&& bundle : bundles(tmp)) { + r[triple_to_hsa_isa(bundle.triple)].push_back(bundle.blob); + } + + it += tmp.bundled_code_size; + }; + } + }); + + return r; +} + +struct Symbol { + std::string name; + ELFIO::Elf64_Addr value = 0; + ELFIO::Elf_Xword size = 0; + ELFIO::Elf_Half sect_idx = 0; + std::uint8_t bind = 0; + std::uint8_t type = 0; + std::uint8_t other = 0; +}; + +inline +Symbol read_symbol(const ELFIO::symbol_section_accessor& section, + unsigned int idx) { + assert(idx < section.get_symbols_num()); + + Symbol r; + section.get_symbol( + idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); + + return r; +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + std::string, + std::pair>& symbol_addresses() { + static std::unordered_map< + std::string, std::pair> r; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + const auto elf = + info->dlpi_addr ? info->dlpi_name : "/proc/self/exe"; + + if (!tmp.load(elf)) return 0; + + auto it = find_section_if(tmp, [](const ELFIO::section* x) { + return x->get_type() == SHT_SYMTAB; + }); + + if (!it) return 0; + + const ELFIO::symbol_section_accessor symtab{tmp, it}; + + for (auto i = 0u; i != symtab.get_symbols_num(); ++i) { + auto s = read_symbol(symtab, i); + + if (s.type != STT_OBJECT || s.sect_idx == SHN_UNDEF) continue; + + const auto addr = s.value + info->dlpi_addr; + r.emplace(std::move(s.name), std::make_pair(addr, s.size)); + } + + return 0; + }, nullptr); + }); + + return r; +} + +inline +__attribute__((visibility("hidden"))) +std::unordered_map& globals() { + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { r.reserve(symbol_addresses().size()); }); + + return r; +} + +inline +std::vector copy_names_of_undefined_symbols( + const ELFIO::symbol_section_accessor& section) { + std::vector r; + + for (auto i = 0u; i != section.get_symbols_num(); ++i) { + // TODO: this is boyscout code, caching the temporaries + // may be of worth. + auto tmp = read_symbol(section, i); + if (tmp.sect_idx != SHN_UNDEF || tmp.name.empty()) continue; + + r.push_back(std::move(tmp.name)); + } + + return r; +} + +[[noreturn]] +void hip_throw(const std::exception&); + +inline +void associate_code_object_symbols_with_host_allocation( + const ELFIO::elfio& reader, + ELFIO::section* code_object_dynsym, + hsa_agent_t agent, + hsa_executable_t executable) { + if (!code_object_dynsym) return; + + const auto undefined_symbols = copy_names_of_undefined_symbols( + ELFIO::symbol_section_accessor{reader, code_object_dynsym}); + + for (auto&& x : undefined_symbols) { + if (globals().find(x) != globals().cend()) return; + + const auto it1 = symbol_addresses().find(x); + + if (it1 == symbol_addresses().cend()) { + hip_throw(std::runtime_error{ + "Global symbol: " + x + " is undefined."}); + } + + static std::mutex mtx; + std::lock_guard lck{mtx}; + + if (globals().find(x) != globals().cend()) return; + + globals().emplace(x, (void*)(it1->second.first)); + void* p = nullptr; + hsa_amd_memory_lock( + reinterpret_cast(it1->second.first), + it1->second.second, + nullptr, // All agents. + 0, + &p); + + hsa_executable_agent_global_variable_define( + executable, agent, x.c_str(), p); + } +} + +inline +void load_code_object_and_freeze_executable( + const std::string& file, hsa_agent_t agent, hsa_executable_t executable) { + // TODO: the following sequence is inefficient, should be refactored + // into a single load of the file and subsequent ELFIO + // processing. + static const auto cor_deleter = [](hsa_code_object_reader_t* p) { + if (!p) return; + + hsa_code_object_reader_destroy(*p); + delete p; + }; + + using RAII_code_reader = + std::unique_ptr; + + if (file.empty()) return; + + RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter}; + hsa_code_object_reader_create_from_memory( + file.data(), file.size(), tmp.get()); + + hsa_executable_load_agent_code_object( + executable, agent, *tmp, nullptr, nullptr); + + hsa_executable_freeze(executable, nullptr); + + static std::vector code_readers; + static std::mutex mtx; + + std::lock_guard lck{mtx}; + code_readers.push_back(move(tmp)); +} + +inline +hsa_executable_t load_executable(const std::string& file, + hsa_executable_t executable, + hsa_agent_t agent) { + ELFIO::elfio reader; + std::stringstream tmp{file}; + + if (!reader.load(tmp)) return hsa_executable_t{}; + + const auto code_object_dynsym = find_section_if( + reader, [](const ELFIO::section* x) { + return x->get_type() == SHT_DYNSYM; + }); + + associate_code_object_symbols_with_host_allocation(reader, + code_object_dynsym, + agent, executable); + + load_code_object_and_freeze_executable(file, agent, executable); + + return executable; +} + +std::vector all_hsa_agents(); + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + hsa_agent_t, std::vector>& executables() { + static std::unordered_map> r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& agent : hip_impl::all_hsa_agents()) { + hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) { + const auto it = code_object_blobs().find(x); + + if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS; + + hsa_agent_t a = *static_cast(pa); + + for (auto&& blob : it->second) { + hsa_executable_t tmp = {}; + + hsa_executable_create_alt( + HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + nullptr, + &tmp); + + // TODO: this is massively inefficient and only meant for + // illustration. + std::string blob_to_str{blob.cbegin(), blob.cend()}; + tmp = load_executable(blob_to_str, tmp, a); + + if (tmp.handle) r[a].push_back(tmp); + } + + return HSA_STATUS_SUCCESS; + }, &agent); + } + }); + + return r; +} + +inline +std::vector> function_names_for( + const ELFIO::elfio& reader, ELFIO::section* symtab) { + std::vector> r; + ELFIO::symbol_section_accessor symbols{reader, symtab}; + + for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { + // TODO: this is boyscout code, caching the temporaries + // may be of worth. + auto tmp = read_symbol(symbols, i); + + if (tmp.type != STT_FUNC) continue; + if (tmp.type == SHN_UNDEF) continue; + if (tmp.name.empty()) continue; + + r.emplace_back(tmp.value, tmp.name); + } + + return r; +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map& function_names() { + static std::unordered_map r; + static std::once_flag f; + + std::call_once(f, []() { + dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) { + ELFIO::elfio tmp; + const auto elf = + info->dlpi_addr ? info->dlpi_name : "/proc/self/exe"; + + if (!tmp.load(elf)) return 0; + + const auto it = find_section_if(tmp, [](const ELFIO::section* x) { + return x->get_type() == SHT_SYMTAB; + }); + + if (!it) return 0; + + auto names = function_names_for(tmp, it); + for (auto&& x : names) x.first += info->dlpi_addr; + + r.insert( + std::make_move_iterator(names.begin()), + std::make_move_iterator(names.end())); + + return 0; + }, nullptr); + }); + + return r; +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + std::string, std::vector>& kernels() { + static std::unordered_map< + std::string, std::vector> r; + static std::once_flag f; + + std::call_once(f, []() { + static const auto copy_kernels = []( + hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void*) { + if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[name(x)].push_back(x); + + return HSA_STATUS_SUCCESS; + }; + + for (auto&& agent_executables : executables()) { + for (auto&& executable : agent_executables.second) { + hsa_executable_iterate_agent_symbols( + executable, agent_executables.first, copy_kernels, nullptr); + } + } + }); + + return r; +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + std::uintptr_t, + std::vector>>& functions() { + static std::unordered_map< + std::uintptr_t, + std::vector>> r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& function : function_names()) { + const auto it = kernels().find(function.second); + + if (it == kernels().cend()) continue; + + for (auto&& kernel_symbol : it->second) { + r[function.first].emplace_back( + agent(kernel_symbol), + Kernel_descriptor{kernel_object(kernel_symbol), it->first}); + } + } + }); + + return r; +} + +inline +std::size_t parse_args( + const std::string& metadata, + std::size_t f, + std::size_t l, + std::vector>& size_align) { + if (f == l) return f; + if (!size_align.empty()) return l; + + do { + static constexpr size_t size_sz{5}; + f = metadata.find("Size:", f) + size_sz; + + if (l <= f) return f; + + auto size = std::strtoul(&metadata[f], nullptr, 10); + + static constexpr size_t align_sz{6}; + f = metadata.find("Align:", f) + align_sz; + + char* l{}; + auto align = std::strtoul(&metadata[f], &l, 10); + + f += (l - &metadata[f]) + 1; + + size_align.emplace_back(size, align); + } while (true); +} + +inline +void read_kernarg_metadata( + ELFIO::elfio& reader, + std::unordered_map< + std::string, + std::vector>>& kernargs) { + // TODO: this is inefficient. + auto it = find_section_if(reader, [](const ELFIO::section* x) { + return x->get_type() == SHT_NOTE; + }); + + if (!it) return; + + const ELFIO::note_section_accessor acc{reader, it}; + for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { + ELFIO::Elf_Word type{}; + std::string name{}; + void* desc{}; + ELFIO::Elf_Word desc_size{}; + + acc.get_note(i, type, name, desc, desc_size); + + if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. + + std::string tmp{ + static_cast(desc), static_cast(desc) + desc_size}; + + auto dx = tmp.find("Kernels:"); + + if (dx == std::string::npos) continue; + + static constexpr decltype(tmp.size()) kernels_sz{8}; + dx += kernels_sz; + + do { + dx = tmp.find("Name:", dx); + + if (dx == std::string::npos) break; + + static constexpr decltype(tmp.size()) name_sz{5}; + dx = tmp.find_first_not_of(" '", dx + name_sz); + + auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); + dx += fn.size(); + + auto dx1 = tmp.find("CodeProps", dx); + dx = tmp.find("Args:", dx); + + if (dx1 < dx) { + dx = dx1; + continue; + } + if (dx == std::string::npos) break; + + static constexpr decltype(tmp.size()) args_sz{5}; + dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); + } while (true); + } +} + +inline +__attribute__((visibility("hidden"))) +const std::unordered_map< + std::string, std::vector>>& kernargs() { + static std::unordered_map< + std::string, std::vector>> r; + static std::once_flag f; + + std::call_once(f, []() { + for (auto&& isa_blobs : code_object_blobs()) { + for (auto&& blob : isa_blobs.second) { + std::stringstream tmp{std::string{blob.cbegin(), blob.cend()}}; + + ELFIO::elfio reader; + + if (!reader.load(tmp)) continue; + + read_kernarg_metadata(reader, r); + } + } + }); + + return r; +} } // Namespace hip_impl. diff --git a/lpl_ca/lpl.hpp b/lpl_ca/lpl.hpp index c3992e43c0..3eeb88bd22 100644 --- a/lpl_ca/lpl.hpp +++ b/lpl_ca/lpl.hpp @@ -4,7 +4,7 @@ #include "clara/clara.hpp" #include "pstreams/pstream.h" -#include "../src/elfio/elfio.hpp" +#include "../include/hip/hcc_detail/elfio/elfio.hpp" #include diff --git a/src/functional_grid_launch.inl b/src/functional_grid_launch.inl index 6283d1aaba..13679dcab3 100644 --- a/src/functional_grid_launch.inl +++ b/src/functional_grid_launch.inl @@ -41,99 +41,20 @@ using namespace std; namespace hip_impl { - namespace + hsa_agent_t target_agent(hipStream_t stream) { - inline - string name(uintptr_t function_address) - { - const auto it = function_names().find(function_address); - - if (it == function_names().cend()) { - throw runtime_error{ - "Invalid function passed to hipLaunchKernelGGL."}; - } - - return it->second; + if (stream) { + return *static_cast( + stream->locked_getAv()->get_hsa_agent()); } - - inline - string name(hsa_agent_t agent) - { - char n[64] = {}; - hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, n); - - return string{n}; + else if ( + ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) { + return ihipGetDevice( + ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent; } - - inline - hsa_agent_t target_agent(hipStream_t stream) - { - if (stream) { - return *static_cast( - stream->locked_getAv()->get_hsa_agent()); - } - else if ( - ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) { - return ihipGetDevice( - ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent; - } - else { - return *static_cast( - accelerator{}.get_default_view().get_hsa_agent()); - } + else { + return *static_cast( + accelerator{}.get_default_view().get_hsa_agent()); } } - - void hipLaunchKernelGGLImpl( - uintptr_t function_address, - const dim3& numBlocks, - const dim3& dimBlocks, - uint32_t sharedMemBytes, - hipStream_t stream, - void** kernarg) - { - auto it0 = functions().find(function_address); - - if (it0 == functions().cend()) { - // Re-init device code maps once again to help locate kernels - // loaded after HIP runtime initialization via means such as - // dlopen(). - it0 = functions(true).find(function_address); - if (it0 == functions().cend()) { - throw runtime_error{ - "No device code available for function: " + - name(function_address) - }; - } - } - - auto agent = target_agent(stream); - - const auto it1 = find_if( - it0->second.cbegin(), - it0->second.cend(), - [=](const pair& x) { - return x.first == agent; - }); - - if (it1 == it0->second.cend()) { - throw runtime_error{ - "No code available for function: " + name(function_address) + - ", for agent: " + name(agent) - }; - } - - hipModuleLaunchKernel( - it1->second, - numBlocks.x, - numBlocks.y, - numBlocks.z, - dimBlocks.x, - dimBlocks.y, - dimBlocks.z, - sharedMemBytes, - stream, - nullptr, - kernarg); - } } diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 3fcdda4181..3f0c4ba6b8 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -27,6 +27,7 @@ THE SOFTWARE. * everywhere. This file is compiled and linked into apps running HIP / HCC path. */ #include +#include #include #include #include @@ -237,7 +238,7 @@ hipError_t ihipSynchronize(void) { //================================================================================================= TidInfo::TidInfo() : _apiSeqNum(0) { _shortTid = g_lastShortTid.fetch_add(1); - _pid = getpid(); + _pid = getpid(); if (COMPILE_HIP_DB && HIP_TRACE_API) { std::stringstream tid_ss; @@ -2397,7 +2398,7 @@ void ihipStream_t::locked_copy2DAsync(void* dst, const void* src, size_t width, crit->_av.copy2d_ext(src, dst, width, height, srcPitch, dstPitch, hcCopyDir, srcPtrInfo, dstPtrInfo, copyDevice ? ©Device->getDevice()->_acc : nullptr, forceUnpinnedCopy); - } + } } //------------------------------------------------------------------------------------------------- @@ -2460,3 +2461,28 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a //// TODO - add identifier numbers for streams and devices to help with debugging. // TODO - add a contect sequence number for debug. Print operator<< ctx:0.1 (device.ctx) + +namespace hip_impl { + std::vector all_hsa_agents() { + std::vector r{}; + for (auto&& acc : hc::accelerator::get_all()) { + const auto agent = acc.get_hsa_agent(); + + if (!agent || !acc.is_hsa_accelerator()) continue; + + r.emplace_back(*static_cast(agent)); + } + + return r; + } + + [[noreturn]] + void hip_throw(const std::exception& ex) { + #if defined(__cpp_exceptions) + throw ex; + #else + std::cerr << ex.what() << std::endl; + std::terminate(); + #endif + } +} // Namespace hip_impl. \ No newline at end of file diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index d8a9bd5708..9eebcfb28c 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -955,37 +955,14 @@ hipError_t hipHostUnregister(void* hostPtr) { return ihipLogStatus(hip_status); } -namespace { -inline hipDeviceptr_t agent_address_for_symbol(const char* symbolName) { - hipDeviceptr_t r = nullptr; +namespace hip_impl { +hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, + const char* symbol_name) { + HIP_INIT_SPECIAL_API(hipMemcpyToSymbol, (TRACE_MCMD), symbol_name, src, + count, offset, kind); -#if __hcc_workweek__ >= 17481 - size_t byte_cnt = 0u; - ihipModuleGetGlobal(&r, &byte_cnt, 0, symbolName); -#else - auto ctx = ihipGetTlsDefaultCtx(); - auto acc = ctx->getDevice()->_acc; - r = acc.get_symbol_address(symbolName); -#endif - - return r; -} -} // namespace - -hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t count, size_t offset, - hipMemcpyKind kind) { - HIP_INIT_SPECIAL_API(hipMemcpyToSymbol, (TRACE_MCMD), symbolName, src, count, offset, kind); - - if (symbolName == nullptr) { - return ihipLogStatus(hipErrorInvalidSymbol); - } - - auto ctx = ihipGetTlsDefaultCtx(); - - hc::accelerator acc = ctx->getDevice()->_acc; - - hipDeviceptr_t dst = agent_address_for_symbol(static_cast(symbolName)); - tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbolName, dst); + tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); @@ -1003,21 +980,13 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou return ihipLogStatus(hipSuccess); } +hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, + const char* symbol_name) { + HIP_INIT_SPECIAL_API(hipMemcpyFromSymbol, (TRACE_MCMD), symbol_name, dst, + count, offset, kind); -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, size_t offset, - hipMemcpyKind kind) { - HIP_INIT_SPECIAL_API(hipMemcpyFromSymbol, (TRACE_MCMD), symbolName, dst, count, offset, kind); - - if (symbolName == nullptr) { - return ihipLogStatus(hipErrorInvalidSymbol); - } - - auto ctx = ihipGetTlsDefaultCtx(); - - hc::accelerator acc = ctx->getDevice()->_acc; - - hipDeviceptr_t src = agent_address_for_symbol(static_cast(symbolName)); - tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbolName, dst); + tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); @@ -1036,27 +1005,19 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, } -hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, - size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_SPECIAL_API(hipMemcpyToSymbolAsync, (TRACE_MCMD), symbolName, src, count, offset, kind, stream); +hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, + hipStream_t stream, const char* symbol_name) { + HIP_INIT_SPECIAL_API(hipMemcpyToSymbolAsync, (TRACE_MCMD), symbol_name, src, + count, offset, kind, stream); - if (symbolName == nullptr) { - return ihipLogStatus(hipErrorInvalidSymbol); - } - - hipError_t e = hipSuccess; - - auto ctx = ihipGetTlsDefaultCtx(); - - hc::accelerator acc = ctx->getDevice()->_acc; - - hipDeviceptr_t dst = agent_address_for_symbol(static_cast(symbolName)); - tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbolName, dst); + tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } + hipError_t e = hipSuccess; if (stream) { try { hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream); @@ -1070,28 +1031,19 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ return ihipLogStatus(e); } +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count, + size_t offset, hipMemcpyKind kind, + hipStream_t stream, const char* symbol_name) { + HIP_INIT_SPECIAL_API(hipMemcpyFromSymbolAsync, (TRACE_MCMD), symbol_name, + dst, count, offset, kind, stream); -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, size_t offset, - hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_SPECIAL_API(hipMemcpyFromSymbolAsync, (TRACE_MCMD), symbolName, dst, count, offset, kind, stream); - - if (symbolName == nullptr) { - return ihipLogStatus(hipErrorInvalidSymbol); - } - - hipError_t e = hipSuccess; - - auto ctx = ihipGetTlsDefaultCtx(); - - hc::accelerator acc = ctx->getDevice()->_acc; - - hipDeviceptr_t src = agent_address_for_symbol(static_cast(symbolName)); - tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbolName, src); + tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } + hipError_t e = hipSuccess; stream = ihipSyncAndResolveStream(stream); if (stream) { try { @@ -1105,23 +1057,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co return ihipLogStatus(e); } - - -hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { - HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName); - - size_t size = 0; - return ihipModuleGetGlobal(devPtr, &size, 0, static_cast(symbolName)); -} - - -hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { - HIP_INIT_API(hipGetSymbolSize, size, symbolName); - - void* devPtr = nullptr; - return ihipModuleGetGlobal(&devPtr, size, 0, static_cast(symbolName)); -} - +} // Namespace hip_impl. //--- hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 780240c067..1d4e9f0685 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -20,11 +20,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "elfio/elfio.hpp" #include "hip/hip_runtime.h" +#include "hip/hcc_detail/elfio/elfio.hpp" +#include "hip/hcc_detail/hsa_helpers.hpp" #include "hip/hcc_detail/program_state.hpp" #include "hip_hcc_internal.h" -#include "hsa_helpers.hpp" #include "trace_helper.h" #include @@ -52,7 +52,6 @@ THE SOFTWARE. // TODO Use Pool APIs from HCC to get memory regions. using namespace ELFIO; -using namespace hip_impl; using namespace std; // calculate MD5 checksum @@ -268,13 +267,33 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } -namespace { -struct Agent_global { - string name; - hipDeviceptr_t address; - uint32_t byte_cnt; -}; +namespace hip_impl { + hsa_executable_t executable_for(hipModule_t hmod) { + return hmod->executable; + } + const std::string& hash_for(hipModule_t hmod) { + return hmod->hash; + } + + hsa_agent_t this_agent() { + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) throw runtime_error{"No active HIP context."}; + + auto device = ctx->getDevice(); + + if (!device) throw runtime_error{"No device available for HIP."}; + + ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId); + + if (!currentDevice) throw runtime_error{"No active device for HIP."}; + + return currentDevice->_hsaAgent; + } +} // Namespace hip_impl. + +namespace { inline void track(const Agent_global& x, hsa_agent_t agent) { tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(), x.address, x.byte_cnt); @@ -299,6 +318,8 @@ inline void track(const Agent_global& x, hsa_agent_t agent) { template > inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent, hsa_executable_symbol_t x, void* out) { + using namespace hip_impl; + assert(out); hsa_symbol_kind_t t = {}; @@ -313,90 +334,9 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t ag return HSA_STATUS_SUCCESS; } -inline hsa_agent_t this_agent() { - auto ctx = ihipGetTlsDefaultCtx(); - - if (!ctx) throw runtime_error{"No active HIP context."}; - - auto device = ctx->getDevice(); - - if (!device) throw runtime_error{"No device available for HIP."}; - - ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId); - - if (!currentDevice) throw runtime_error{"No active device for HIP."}; - - return currentDevice->_hsaAgent; -} - -inline vector read_agent_globals(hsa_agent_t agent, hsa_executable_t executable) { - vector r; - - hsa_executable_iterate_agent_symbols(executable, agent, copy_agent_global_variables, &r); - - return r; -} - -template -pair read_global_description(ForwardIterator f, ForwardIterator l, - const char* name) { - const auto it = std::find_if(f, l, [=](const Agent_global& x) { return x.name == name; }); - - return it == l ? make_pair(nullptr, 0u) : make_pair(it->address, it->byte_cnt); -} - -hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name) { - // the key of the map would the hash of code object associated with the - // hipModule_t instance - static unordered_map> agent_globals; - auto key = hmod->hash; - - if (agent_globals.count(key) == 0) { - static mutex mtx; - lock_guard lck{mtx}; - - if (agent_globals.count(key) == 0) { - agent_globals.emplace(key, read_agent_globals(this_agent(), hmod->executable)); - } - } - - const auto it0 = agent_globals.find(key); - if (it0 == agent_globals.cend()) { - throw runtime_error{"agent_globals data structure corrupted."}; - } - - tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), it0->second.cend(), name); - - return *dptr ? hipSuccess : hipErrorNotFound; -} - -hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) { - static unordered_map> agent_globals; - static std::once_flag f; - - call_once(f, []() { - for (auto&& agent_executables : hip_impl::executables()) { - vector tmp0; - for (auto&& executable : agent_executables.second) { - auto tmp1 = read_agent_globals(agent_executables.first, executable); - tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()), - make_move_iterator(tmp1.end())); - } - agent_globals.emplace(agent_executables.first, move(tmp0)); - } - }); - - const auto it = agent_globals.find(this_agent()); - - if (it == agent_globals.cend()) return hipErrorNotInitialized; - - tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), it->second.cend(), name); - - return *dptr ? hipSuccess : hipErrorNotFound; -} - hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) { + using namespace hip_impl; + pair r{kname, {}}; hsa_executable_iterate_agent_symbols( @@ -418,8 +358,8 @@ hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const c return r.second; } -string read_elf_file_as_string( - const void* file) { // Precondition: file points to an ELF image that was BITWISE loaded +string read_elf_file_as_string(const void* file) { + // Precondition: file points to an ELF image that was BITWISE loaded // into process accessible memory, and not one loaded by // the loader. This is because in the latter case // alignment may differ, which will break the size @@ -428,15 +368,18 @@ string read_elf_file_as_string( // Little Endian. if (!file) return {}; - auto h = static_cast(file); + auto h = static_cast(file); auto s = static_cast(file); // This assumes the common case of SHT being the last part of the ELF. - auto sz = sizeof(Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum; + auto sz = + sizeof(ELFIO::Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum; return string{s, s + sz}; } string code_object_blob_for_agent(const void* maybe_bundled_code, hsa_agent_t agent) { + using namespace hip_impl; + if (!maybe_bundled_code) return {}; Bundled_code_header tmp{maybe_bundled_code}; @@ -454,9 +397,22 @@ string code_object_blob_for_agent(const void* maybe_bundled_code, hsa_agent_t ag return string{it->blob.cbegin(), it->blob.cend()}; } -} // namespace +} // Unnamed namespace. + +namespace hip_impl { + vector read_agent_globals(hsa_agent_t agent, + hsa_executable_t executable) { + vector r; + + hsa_executable_iterate_agent_symbols( + executable, agent, copy_agent_global_variables, &r); + + return r; + } +} // Namespace hip_impl. hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name) { + using namespace hip_impl; if (!func || !name) return hipErrorInvalidValue; @@ -485,58 +441,36 @@ hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const ch return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name)); } -hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name) { - HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); +namespace { +hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) { + hipFuncAttributes r{}; - return ihipLogStatus(ihipModuleGetGlobal(dptr, bytes, hmod, name)); -} + hipDeviceProp_t prop{}; + hipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + // TODO: at the moment there is no way to query the count of registers + // available per CU, therefore we hardcode it to 64 KiRegisters. + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; -hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name) { - if (!dptr || !bytes) return hipErrorInvalidValue; - - if (!name) return hipErrorNotInitialized; - - const auto r = hmod ? read_agent_global_from_module(dptr, bytes, hmod, name) - : read_agent_global_from_process(dptr, bytes, name); + r.localSizeBytes = header.workitem_private_segment_byte_size; + r.sharedSizeBytes = header.workgroup_group_segment_byte_size; + r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; + r.numRegs = header.workitem_vgpr_count; + r.maxThreadsPerBlock = r.numRegs ? + std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : + prop.maxThreadsPerBlock; + r.binaryVersion = + header.amd_machine_version_major * 10 + + header.amd_machine_version_minor; + r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. return r; } - -namespace -{ - inline - hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) - { - hipFuncAttributes r{}; - - hipDeviceProp_t prop{}; - hipGetDeviceProperties( - &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); - // TODO: at the moment there is no way to query the count of registers - // available per CU, therefore we hardcode it to 64 KiRegisters. - prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; - - r.localSizeBytes = header.workitem_private_segment_byte_size; - r.sharedSizeBytes = header.workgroup_group_segment_byte_size; - r.maxDynamicSharedSizeBytes = - prop.sharedMemPerBlock - r.sharedSizeBytes; - r.numRegs = header.workitem_vgpr_count; - r.maxThreadsPerBlock = r.numRegs ? - std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : - prop.maxThreadsPerBlock; - r.binaryVersion = - header.amd_machine_version_major * 10 + - header.amd_machine_version_minor; - r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. - - return r; - } -} +} // Unnamed namespace. hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + using namespace hip_impl; + if (!attr) return hipErrorInvalidValue; if (!func) return hipErrorInvalidDeviceFunction; @@ -564,6 +498,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) } hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { + using namespace hip_impl; if (!module) return hipErrorInvalidValue; @@ -585,9 +520,8 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { auto content = tmp.empty() ? read_elf_file_as_string(image) : tmp; - (*module)->executable = hip_impl::load_executable(content, - (*module)->executable, - this_agent()); + (*module)->executable = load_executable(content, (*module)->executable, + this_agent()); // compute the hash of the code object (*module)->hash = checksum(content.length(), content.data()); @@ -621,6 +555,8 @@ hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned } hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + using namespace hip_impl; + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); hipError_t ret = hipErrorNotFound; diff --git a/src/program_state.cpp b/src/program_state.cpp deleted file mode 100644 index 601096b48f..0000000000 --- a/src/program_state.cpp +++ /dev/null @@ -1,659 +0,0 @@ -#include "../include/hip/hcc_detail/program_state.hpp" - -#include "../include/hip/hcc_detail/code_object_bundle.hpp" - -#include "hip_hcc_internal.h" -#include "hsa_helpers.hpp" -#include "trace_helper.h" - -#include "elfio/elfio.hpp" - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace ELFIO; -using namespace hip_impl; -using namespace std; - -namespace { -struct Symbol { - string name; - ELFIO::Elf64_Addr value = 0; - Elf_Xword size = 0; - Elf_Half sect_idx = 0; - uint8_t bind = 0; - uint8_t type = 0; - uint8_t other = 0; -}; - -inline Symbol read_symbol(const symbol_section_accessor& section, unsigned int idx) { - assert(idx < section.get_symbols_num()); - - Symbol r; - section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other); - - return r; -} - -template -inline section* find_section_if(elfio& reader, P p) { - const auto it = find_if(reader.sections.begin(), reader.sections.end(), move(p)); - - return it != reader.sections.end() ? *it : nullptr; -} - -vector copy_names_of_undefined_symbols(const symbol_section_accessor& section) { - vector r; - - for (auto i = 0u; i != section.get_symbols_num(); ++i) { - // TODO: this is boyscout code, caching the temporaries - // may be of worth. - - auto tmp = read_symbol(section, i); - if (tmp.sect_idx == SHN_UNDEF && !tmp.name.empty()) { - r.push_back(std::move(tmp.name)); - } - } - - return r; -} - -const std::unordered_map>& -symbol_addresses(bool rebuild = false) { - static unordered_map> r; - static once_flag f; - - auto cons = [rebuild]() { - if (rebuild) { - r.clear(); - } - - dl_iterate_phdr( - [](dl_phdr_info* info, size_t, void*) { - static constexpr const char self[] = "/proc/self/exe"; - elfio reader; - - static unsigned int iter = 0u; - if (reader.load(!iter ? self : info->dlpi_name)) { - auto it = find_section_if( - reader, [](const class section* x) { return x->get_type() == SHT_SYMTAB; }); - - if (it) { - const symbol_section_accessor symtab{reader, it}; - - for (auto i = 0u; i != symtab.get_symbols_num(); ++i) { - auto tmp = read_symbol(symtab, i); - - if (tmp.type == STT_OBJECT && tmp.sect_idx != SHN_UNDEF) { - const auto addr = tmp.value + (iter ? info->dlpi_addr : 0); - r.emplace(move(tmp.name), make_pair(addr, tmp.size)); - } - } - } - - ++iter; - } - - return 0; - }, - nullptr); - }; - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -void associate_code_object_symbols_with_host_allocation(const elfio& reader, - section* code_object_dynsym, - hsa_agent_t agent, - hsa_executable_t executable) { - if (!code_object_dynsym) return; - - const auto undefined_symbols = - copy_names_of_undefined_symbols(symbol_section_accessor{reader, code_object_dynsym}); - - for (auto&& x : undefined_symbols) { - if (globals().find(x) != globals().cend()) return; - - const auto it1 = symbol_addresses().find(x); - - if (it1 == symbol_addresses().cend()) { - throw runtime_error{"Global symbol: " + x + " is undefined."}; - } - - static mutex mtx; - lock_guard lck{mtx}; - - if (globals().find(x) != globals().cend()) return; - globals().emplace(x, (void*)(it1->second.first)); - void* p = nullptr; - hsa_amd_memory_lock(reinterpret_cast(it1->second.first), it1->second.second, - nullptr, // All agents. - 0, &p); - - hsa_executable_agent_global_variable_define(executable, agent, x.c_str(), p); - } -} - -vector code_object_blob_for_process() { - static constexpr const char self[] = "/proc/self/exe"; - static constexpr const char kernel_section[] = ".kernel"; - - elfio reader; - - if (!reader.load(self)) { - throw runtime_error{"Failed to load ELF file for current process."}; - } - - auto kernels = - find_section_if(reader, [](const section* x) { return x->get_name() == kernel_section; }); - - vector r; - if (kernels) { - r.insert(r.end(), kernels->get_data(), kernels->get_data() + kernels->get_size()); - } - - return r; -} - -const unordered_map>>& code_object_blobs(bool rebuild = false) { - static unordered_map>> r; - static once_flag f; - - auto cons = [rebuild]() { - // names of shared libraries who .kernel sections already loaded - static unordered_set lib_names; - static vector> blobs{code_object_blob_for_process()}; - - if (rebuild) { - r.clear(); - blobs.clear(); - } - - dl_iterate_phdr( - [](dl_phdr_info* info, std::size_t, void*) { - elfio tmp; - if ((lib_names.find(info->dlpi_name) == lib_names.end()) && - (tmp.load(info->dlpi_name))) { - const auto it = find_section_if( - tmp, [](const section* x) { return x->get_name() == ".kernel"; }); - - if (it) { - blobs.emplace_back( - it->get_data(), it->get_data() + it->get_size()); - // register the shared library as already loaded - lib_names.emplace(info->dlpi_name); - } - } - return 0; - }, - nullptr); - - for (auto&& blob : blobs) { - for (auto sub_blob = blob.begin(); sub_blob != blob.end(); ) { - Bundled_code_header tmp(sub_blob, blob.end()); - if (valid(tmp)) { - for (auto&& bundle : bundles(tmp)) { - r[triple_to_hsa_isa(bundle.triple)].push_back(bundle.blob); - } - sub_blob+=tmp.bundled_code_size; - } - else { - break; - } - } - } - }; - - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -vector> function_names_for(const elfio& reader, section* symtab) { - vector> r; - symbol_section_accessor symbols{reader, symtab}; - - for (auto i = 0u; i != symbols.get_symbols_num(); ++i) { - // TODO: this is boyscout code, caching the temporaries - // may be of worth. - auto tmp = read_symbol(symbols, i); - - if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) { - r.emplace_back(tmp.value, tmp.name); - } - } - - return r; -} - -const vector>& function_names_for_process(bool rebuild = false) { - static constexpr const char self[] = "/proc/self/exe"; - - static vector> r; - static once_flag f; - - auto cons = [rebuild]() { - elfio reader; - - if (!reader.load(self)) { - throw runtime_error{"Failed to load the ELF file for the current process."}; - } - - auto symtab = - find_section_if(reader, [](const section* x) { return x->get_type() == SHT_SYMTAB; }); - - if (symtab) r = function_names_for(reader, symtab); - }; - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -const unordered_map>& kernels(bool rebuild = false) { - static unordered_map> r; - static once_flag f; - - auto cons = [rebuild]() { - if (rebuild) { - r.clear(); - executables(rebuild); - } - - static const auto copy_kernels = [](hsa_executable_t, hsa_agent_t, - hsa_executable_symbol_t s, void*) { - if (type(s) == HSA_SYMBOL_KIND_KERNEL) r[name(s)].push_back(s); - - return HSA_STATUS_SUCCESS; - }; - - for (auto&& agent_executables : executables()) { - for (auto&& executable : agent_executables.second) { - hsa_executable_iterate_agent_symbols(executable, agent_executables.first, - copy_kernels, nullptr); - } - } - }; - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -void load_code_object_and_freeze_executable( - const string& file, hsa_agent_t agent, - hsa_executable_t - executable) { // TODO: the following sequence is inefficient, should be refactored - // into a single load of the file and subsequent ELFIO - // processing. - static const auto cor_deleter = [](hsa_code_object_reader_t* p) { - if (p) { - hsa_code_object_reader_destroy(*p); - delete p; - } - }; - - using RAII_code_reader = unique_ptr; - - if (!file.empty()) { - RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter}; - hsa_code_object_reader_create_from_memory(file.data(), file.size(), tmp.get()); - - hsa_executable_load_agent_code_object(executable, agent, *tmp, nullptr, nullptr); - - hsa_executable_freeze(executable, nullptr); - - static vector code_readers; - static mutex mtx; - - lock_guard lck{mtx}; - code_readers.push_back(move(tmp)); - } -} - -size_t parse_args( - const string& metadata, - size_t f, - size_t l, - vector>& size_align) { - if (f == l) return f; - if (!size_align.empty()) return l; - - do { - static constexpr size_t size_sz{5}; - f = metadata.find("Size:", f) + size_sz; - - if (l <= f) return f; - - auto size = strtoul(&metadata[f], nullptr, 10); - - static constexpr size_t align_sz{6}; - f = metadata.find("Align:", f) + align_sz; - - char* l{}; - auto align = strtoul(&metadata[f], &l, 10); - - f += (l - &metadata[f]) + 1; - - size_align.emplace_back(size, align); - } while (true); -} - -void read_kernarg_metadata( - elfio& reader, - unordered_map>>& kernargs) -{ // TODO: this is inefficient. - auto it = find_section_if( - reader, [](const section* x) { return x->get_type() == SHT_NOTE; }); - - if (!it) return; - - const note_section_accessor acc{reader, it}; - for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { - ELFIO::Elf_Word type{}; - string name{}; - void* desc{}; - Elf_Word desc_size{}; - - acc.get_note(i, type, name, desc, desc_size); - - if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. - - string tmp{ - static_cast(desc), static_cast(desc) + desc_size}; - - auto dx = tmp.find("Kernels:"); - - if (dx == string::npos) continue; - - static constexpr decltype(tmp.size()) kernels_sz{8}; - dx += kernels_sz; - - do { - dx = tmp.find("Name:", dx); - - if (dx == string::npos) break; - - static constexpr decltype(tmp.size()) name_sz{5}; - dx = tmp.find_first_not_of(" '", dx + name_sz); - - auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); - dx += fn.size(); - - auto dx1 = tmp.find("CodeProps", dx); - dx = tmp.find("Args:", dx); - - if (dx1 < dx) { - dx = dx1; - continue; - } - if (dx == string::npos) break; - - static constexpr decltype(tmp.size()) args_sz{5}; - dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); - } while (true); - } -} -} // namespace - -namespace hip_impl { -const unordered_map>& -executables(bool rebuild) { // TODO: This leaks the hsa_executable_ts, it should use RAII. - static unordered_map> r; - static once_flag f; - - auto cons = [rebuild]() { - static const auto accelerators = hc::accelerator::get_all(); - - if (rebuild) { - // do NOT clear r so we reuse instances of hsa_executable_t - // created previously - code_object_blobs(rebuild); - } - - for (auto&& acc : accelerators) { - auto agent = static_cast(acc.get_hsa_agent()); - - if (!agent || !acc.is_hsa_accelerator()) continue; - - hsa_agent_iterate_isas(*agent, - [](hsa_isa_t x, void* pa) { - const auto it = code_object_blobs().find(x); - - if (it != code_object_blobs().cend()) { - hsa_agent_t a = *static_cast(pa); - - for (auto&& blob : it->second) { - hsa_executable_t tmp = {}; - - hsa_executable_create_alt( - HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr, - &tmp); - - // TODO: this is massively inefficient and only - // meant for illustration. - string blob_to_str{blob.cbegin(), blob.cend()}; - tmp = load_executable(blob_to_str, tmp, a); - - if (tmp.handle) r[a].push_back(tmp); - } - } - - return HSA_STATUS_SUCCESS; - }, - agent); - } - }; - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -const unordered_map& function_names(bool rebuild) { - static unordered_map r{function_names_for_process().cbegin(), - function_names_for_process().cend()}; - static once_flag f; - - auto cons = [rebuild]() { - if (rebuild) { - r.clear(); - function_names_for_process(rebuild); - r.insert(function_names_for_process().cbegin(), - function_names_for_process().cend()); - } - - dl_iterate_phdr( - [](dl_phdr_info* info, size_t, void*) { - elfio tmp; - if (tmp.load(info->dlpi_name)) { - const auto it = find_section_if( - tmp, [](const section* x) { return x->get_type() == SHT_SYMTAB; }); - - if (it) { - auto n = function_names_for(tmp, it); - - for (auto&& f : n) f.first += info->dlpi_addr; - - r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end())); - } - } - - return 0; - }, - nullptr); - }; - - call_once(f, cons); - if (rebuild) { - static mutex mtx; - lock_guard lck{mtx}; - cons(); - } - - return r; -} - -const unordered_map>>& functions(bool rebuild) { - static unordered_map>> r; - static once_flag f; - - auto cons = [rebuild]() { - if (rebuild) { - // do NOT clear r so we reuse instances of pair - // created previously - - function_names(rebuild); - kernargs(rebuild); - kernels(rebuild); - globals(rebuild); - } - - for (auto&& function : function_names()) { - const auto it = kernels().find(function.second); - - if (it != kernels().cend()) { - for (auto&& kernel_symbol : it->second) { - r[function.first].emplace_back( - agent(kernel_symbol), - Kernel_descriptor{kernel_object(kernel_symbol), it->first}); - } - } - } - }; - - call_once(f, cons); - if (rebuild) { - static mutex mtx; - lock_guard lck{mtx}; - cons(); - } - - return r; -} - -unordered_map& globals(bool rebuild) { - static unordered_map r; - static once_flag f; - auto cons =[rebuild]() { - if (rebuild) { - r.clear(); - symbol_addresses(rebuild); - } - - r.reserve(symbol_addresses().size()); - }; - - call_once(f, cons); - if (rebuild) { - cons(); - } - - return r; -} - -const unordered_map>>& kernargs( - bool rebuild) { - static unordered_map>> r; - static once_flag f; - - static const auto build_map = [](decltype(r)& x) { - for (auto&& isa_blobs : code_object_blobs()) { - for (auto&& blob : isa_blobs.second) { - stringstream tmp{std::string{blob.cbegin(), blob.cend()}}; - - elfio reader; - if (!reader.load(tmp)) continue; - - read_kernarg_metadata(reader, x); - } - } - }; - call_once(f, []() { r.reserve(function_names().size()); build_map(r); }); - - if (rebuild) { - static mutex mtx; - thread_local static decltype(r) tmp; - - { - lock_guard lck{mtx}; - - tmp.insert(r.cbegin(), r.cend()); // Should use merge in C++17. - } - - build_map(tmp); - - lock_guard lck{mtx}; - - r.insert(tmp.cbegin(), tmp.cend()); - } - - return r; -} - -hsa_executable_t load_executable(const string& file, hsa_executable_t executable, - hsa_agent_t agent) { - elfio reader; - stringstream tmp{file}; - - if (!reader.load(tmp)) return hsa_executable_t{}; - - const auto code_object_dynsym = find_section_if( - reader, [](const ELFIO::section* x) { return x->get_type() == SHT_DYNSYM; }); - - associate_code_object_symbols_with_host_allocation(reader, code_object_dynsym, agent, - executable); - - load_code_object_and_freeze_executable(file, agent, executable); - - return executable; -} - -// HIP startup kernel loader logic -// When enabled HIP_STARTUP_LOADER, HIP will load the kernels and setup -// the function symbol map on program startup -extern "C" void __attribute__((constructor)) __startup_kernel_loader_init() { - int hip_startup_loader=0; - if (std::getenv("HIP_STARTUP_LOADER")) - hip_startup_loader = atoi(std::getenv("HIP_STARTUP_LOADER")); - if (hip_startup_loader) functions(true); -} - -extern "C" void __attribute__((destructor)) __startup_kernel_loader_fini() { -} - -} // Namespace hip_impl.