diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-runtime/core/CMakeLists.txt index ec0816ca19..ee066d132b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/CMakeLists.txt @@ -133,6 +133,7 @@ set ( CORE_SRCS ${CORE_SRCS} runtime/amd_gpu_agent.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/amd_aql_queue.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/amd_loader_context.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ven_amd_loaded_code_object.cpp ) +set ( CORE_SRCS ${CORE_SRCS} runtime/hsa_ven_amd_loader.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/amd_memory_region.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/amd_topology.cpp ) set ( CORE_SRCS ${CORE_SRCS} runtime/default_signal.cpp ) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_elf_image.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_elf_image.hpp index 8bc811e173..763c5c831c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_elf_image.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_elf_image.hpp @@ -103,6 +103,7 @@ namespace amd { virtual uint64_t imageSize() const = 0; virtual uint64_t vaddr() const = 0; virtual uint64_t flags() const = 0; + virtual uint64_t offset() const = 0; virtual const char* data() const = 0; virtual uint16_t getSegmentIndex() = 0; virtual bool updateAddSection(Section *section) = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp index 5b9cd4d92e..251df841ac 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp @@ -47,6 +47,7 @@ #include #include "hsa.h" #include "hsa_ext_image.h" +#include "hsa_ven_amd_loader.h" #include "amd_hsa_elf.h" #include #include @@ -317,6 +318,13 @@ public: void *data), void *data) = 0; + virtual size_t GetNumSegmentDescriptors() = 0; + + virtual size_t QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t total_num_segment_descriptors, + size_t first_empty_segment_descriptor) = 0; + virtual uint64_t FindHostAddress(uint64_t device_address) = 0; virtual void Print(std::ostream& out) = 0; @@ -368,6 +376,11 @@ public: void *data), void *data) = 0; + /// @brief same as hsa_ven_amd_loader_query_segment_descriptors. + virtual hsa_status_t QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors) = 0; + /// @brief Returns host address given @p device_address. If @p device_address /// is already host address, returns null pointer. If @p device_address is /// invalid address, returns null pointer. diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp index 8e8f0ecebf..6b4cd50b71 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -56,6 +56,7 @@ #include "core/inc/interrupt_signal.h" #include "core/inc/amd_loader_context.hpp" #include "inc/hsa_ven_amd_loaded_code_object.h" +#include "inc/hsa_ven_amd_loader.h" using namespace amd::hsa::code; @@ -168,9 +169,7 @@ hsa_status_t uint16_t version_minor, bool* result) { IS_OPEN(); - if ((extension > HSA_EXTENSION_AMD_PROFILER && - extension != HSA_EXTENSION_AMD_LOADED_CODE_OBJECT) || - (result == NULL)) { + if (extension >= HSA_EXTENSION_COUNT || result == NULL) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } @@ -213,9 +212,6 @@ hsa_status_t } if (supported) { - ExtTable& runtime_ext_table = - core::Runtime::runtime_singleton_->extensions_.table; - if (extension == HSA_EXTENSION_IMAGES) { // Currently there is only version 1.00. hsa_ext_images_1_00_pfn_t* ext_table = @@ -253,6 +249,14 @@ hsa_status_t hsa_ven_amd_loaded_code_object_query_host_address; return HSA_STATUS_SUCCESS; + } else if (extension == HSA_EXTENSION_AMD_LOADER) { + // Currently there is only version 1.00. + hsa_ven_amd_loader_1_00_pfn_t* ext_table = + reinterpret_cast(table); + ext_table->hsa_ven_amd_loader_query_segment_descriptors = + hsa_ven_amd_loader_query_segment_descriptors; + ext_table->hsa_ven_amd_loader_query_host_address = + hsa_ven_amd_loader_query_host_address; } else { return HSA_STATUS_ERROR; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp new file mode 100644 index 0000000000..15ea754b3a --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp @@ -0,0 +1,82 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include "hsa_ven_amd_loader.h" + +#include "core/inc/amd_hsa_loader.hpp" +#include "core/inc/runtime.h" + +using namespace core; + +hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors) { + if (false == core::Runtime::runtime_singleton_->IsOpen()) { + return HSA_STATUS_ERROR_NOT_INITIALIZED; + } + + // Arguments are checked by the loader. + return Runtime::runtime_singleton_->loader()->QuerySegmentDescriptors(segment_descriptors, num_segment_descriptors); +} + +hsa_status_t HSA_API hsa_ven_amd_loader_query_host_address( + const void *device_address, + const void **host_address) { + if (false == core::Runtime::runtime_singleton_->IsOpen()) { + return HSA_STATUS_ERROR_NOT_INITIALIZED; + } + if (nullptr == device_address) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + if (nullptr == host_address) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + uint64_t udaddr = reinterpret_cast(device_address); + uint64_t uhaddr = Runtime::runtime_singleton_->loader()->FindHostAddress(udaddr); + if (0 == uhaddr) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + *host_address = reinterpret_cast(uhaddr); + return HSA_STATUS_SUCCESS; +} diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h index f80768dbf1..c4de41fbf4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h @@ -462,9 +462,17 @@ typedef enum { */ HSA_EXTENSION_AMD_PROFILER = 2, /** - * Loaded code object extension. + * @deprecated Loaded code object extension. */ - HSA_EXTENSION_AMD_LOADED_CODE_OBJECT = 3 + HSA_EXTENSION_AMD_LOADED_CODE_OBJECT = 3, + /** + * Loader extension. + */ + HSA_EXTENSION_AMD_LOADER = 4, + /** + * Extension count. + */ + HSA_EXTENSION_COUNT } hsa_extension_t; /** diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h new file mode 100644 index 0000000000..a566f2ee25 --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h @@ -0,0 +1,249 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +// HSA AMD extension for additional loader functionality. + +#ifndef HSA_VEN_AMD_LOADER_H +#define HSA_VEN_AMD_LOADER_H + +#include "hsa.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @brief The storage type of the code object that is backing loaded memory + * segment. + */ +typedef enum { + /** + * Loaded memory segment is not backed by any code object (anonymous), as the + * case would be with BSS (uninitialized data). + */ + HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0, + /** + * Loaded memory segment is backed by the code object that is stored in the + * file. + */ + HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1, + /** + * Loaded memory segment is backed by the code object that is stored in the + * memory. + */ + HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2 +} hsa_ven_amd_loader_code_object_storage_type_t; + +/** + * @brief Loaded memory segment descriptor. + * + * + * @details Loaded memory segment descriptor describes underlying loaded memory + * segment. Loaded memory segment is created/allocated by the executable during + * the loading of the code object that is backing underlying memory segment. + * + * The lifetime of underlying memory segment is limited by the lifetime of the + * executable that is managing underlying memory segment. + */ +typedef struct hsa_ven_amd_loader_segment_descriptor_s { + /** + * Agent underlying memory segment is allocated on. If the code object that is + * backing underlying memory segment is program code object, then 0. + */ + hsa_agent_t agent; + /** + * Executable that is managing this underlying memory segment. + */ + hsa_executable_t executable; + /** + * Storage type of the code object that is backing underlying memory segment. + */ + hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type; + /** + * If the storage type of the code object that is backing underlying memory + * segment is: + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null; + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated + * filepath to the code object; + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host + * accessible pointer to the first byte of the code object. + */ + const void *code_object_storage_base; + /** + * If the storage type of the code object that is backing underlying memory + * segment is: + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0; + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of + * the filepath to the code object (including null-terminating character); + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in + * bytes, of the memory occupied by the code object. + */ + size_t code_object_storage_size; + /** + * If the storage type of the code object that is backing underlying memory + * segment is: + * - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0; + * - other, then offset, in bytes, from the beginning of the code object to + * the first byte in the code object data is copied from. + */ + size_t code_object_storage_offset; + /** + * Starting address of the underlying memory segment. + */ + const void *segment_base; + /** + * Size, in bytes, of the underlying memory segment. + */ + size_t segment_size; +} hsa_ven_amd_loader_segment_descriptor_t; + +/** + * @brief Either queries loaded memory segment descriptors, or total number of + * loaded memory segment descriptors. + * + * + * @details If @p segment_descriptors is not null and @p num_segment_descriptors + * points to number that exactly matches total number of loaded memory segment + * descriptors, then queries loaded memory segment descriptors, and records them + * in @p segment_descriptors. If @p segment_descriptors is null and @p + * num_segment_descriptors points to zero, then queries total number of loaded + * memory segment descriptors, and records it in @p num_segment_descriptors. In + * all other cases returns appropriate error code (see below). + * + * The caller of this function is responsible for the allocation/deallocation + * and the lifetime of @p segment_descriptors and @p num_segment_descriptors. + * + * The lifetime of loaded memory segments that are described by queried loaded + * memory segment descriptors is limited by the lifetime of the executable that + * is managing loaded memory segments. + * + * Queried loaded memory segment descriptors are always self-consistent: they + * describe a complete set of loaded memory segments that are being backed by + * fully loaded code objects that are present at the time (i.e. this function + * is blocked until all executable manipulations are fully complete). + * + * + * @param[out] segment_descriptors Pointer to application-allocated buffer to + * record queried loaded memory segment descriptors in. Can be null if @p + * num_segment_descriptors points to zero. + * + * @param[in,out] num_segment_descriptors Pointer to application-allocated + * buffer that contains either total number of loaded memory segment descriptors + * or zero. + * + * + * @retval HSA_STATUS_SUCCESS Function is executed successfully. + * + * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized. + * + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null + * while @p num_segment_descriptors points to non-zero number, @p + * segment_descriptors is not null while @p num_segment_descriptors points to + * zero, or @p num_segment_descriptors is null. + * + * @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors + * does not point to number that exactly matches total number of loaded memory + * segment descriptors. + */ +hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors); + +/** + * @brief Queries equivalent host address for given @p device_address, and + * records it in @p host_address. + * + * + * @details Contents of memory pointed to by @p host_address would be identical + * to contents of memory pointed to by @p device_address. Only difference + * between the two is host accessibility: @p host_address is always accessible + * from host, @p device_address might not be accessible from host. + * + * If @p device_address already points to host accessible memory, then the value + * of @p device_address is simply copied into @p host_address. + * + * The lifetime of @p host_address is the same as the lifetime of @p + * device_address, and both lifetimes are limited by the lifetime of the + * executable that is managing these addresses. + * + * + * @param[in] device_address Device address to query equivalent host address + * for. + * + * @param[out] host_address Pointer to application-allocated buffer to record + * queried equivalent host address in. + * + * + * @retval HSA_STATUS_SUCCESS Function is executed successfully. + * + * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized. + * + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or + * null, or @p host_address is null. + */ +hsa_status_t HSA_API hsa_ven_amd_loader_query_host_address( + const void *device_address, + const void **host_address); + +/** + * @brief Extension version. + */ +#define hsa_ven_amd_loader 001000 + +/** + * @brief Extension function table. + */ +typedef struct hsa_ven_amd_loader_1_00_pfn_s { + hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors); + + hsa_status_t (*hsa_ven_amd_loader_query_host_address)( + const void *device_address, + const void **host_address); +} hsa_ven_amd_loader_1_00_pfn_t; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* HSA_VEN_AMD_LOADER_H */ diff --git a/projects/rocr-runtime/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp b/projects/rocr-runtime/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp index 3f735c82be..fb36d6234c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp @@ -456,6 +456,7 @@ namespace amd { uint64_t imageSize() const override { return phdr.p_filesz; } uint64_t vaddr() const override { return phdr.p_vaddr; } uint64_t flags() const override { return phdr.p_flags; } + uint64_t offset() const override { return phdr.p_offset; } const char* data() const override; uint16_t getSegmentIndex() override; bool updateAddSection(Section *section) override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp index 45c6abd619..17eb7c1ca9 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp @@ -137,7 +137,7 @@ void Loader::Destroy(Loader *loader) Executable* AmdHsaCodeLoader::CreateExecutable( hsa_profile_t profile, const char *options) { - std::lock_guard lock(executables_mutex); + WriterLockGuard writer_lock(rw_lock_); executables.push_back(new ExecutableImpl(profile, context, executables.size())); return executables.back(); @@ -145,7 +145,8 @@ Executable* AmdHsaCodeLoader::CreateExecutable( void AmdHsaCodeLoader::DestroyExecutable(Executable *executable) { - std::lock_guard lock(executables_mutex); + WriterLockGuard writer_lock(rw_lock_); + executables[((ExecutableImpl*)executable)->id()] = nullptr; delete executable; } @@ -156,7 +157,7 @@ hsa_status_t AmdHsaCodeLoader::IterateExecutables( void *data), void *data) { - std::lock_guard lock(executables_mutex); + WriterLockGuard writer_lock(rw_lock_); assert(callback); for (auto &exec : executables) { @@ -169,12 +170,57 @@ hsa_status_t AmdHsaCodeLoader::IterateExecutables( return HSA_STATUS_SUCCESS; } +hsa_status_t AmdHsaCodeLoader::QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors) +{ + if (!num_segment_descriptors) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + if (*num_segment_descriptors == 0 && segment_descriptors) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + if (*num_segment_descriptors != 0 && !segment_descriptors) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + this->EnableReadOnlyMode(); + + size_t actual_num_segment_descriptors = 0; + for (auto &executable : executables) { + if (executable) { + actual_num_segment_descriptors += executable->GetNumSegmentDescriptors(); + } + } + + if (*num_segment_descriptors == 0) { + *num_segment_descriptors = actual_num_segment_descriptors; + this->DisableReadOnlyMode(); + return HSA_STATUS_SUCCESS; + } + if (*num_segment_descriptors != actual_num_segment_descriptors) { + this->DisableReadOnlyMode(); + return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS; + } + + size_t i = 0; + for (auto &executable : executables) { + if (executable) { + i += executable->QuerySegmentDescriptors(segment_descriptors, actual_num_segment_descriptors, i); + } + } + + this->DisableReadOnlyMode(); + return HSA_STATUS_SUCCESS; +} + uint64_t AmdHsaCodeLoader::FindHostAddress(uint64_t device_address) { + ReaderLockGuard reader_lock(rw_lock_); if (device_address == 0) { return 0; } - std::lock_guard lock(executables_mutex); + for (auto &exec : executables) { if (exec != nullptr) { uint64_t host_address = exec->FindHostAddress(device_address); @@ -186,6 +232,26 @@ uint64_t AmdHsaCodeLoader::FindHostAddress(uint64_t device_address) return 0; } +void AmdHsaCodeLoader::EnableReadOnlyMode() +{ + rw_lock_.ReaderLock(); + for (auto &executable : executables) { + if (executable) { + ((ExecutableImpl*)executable)->EnableReadOnlyMode(); + } + } +} + +void AmdHsaCodeLoader::DisableReadOnlyMode() +{ + rw_lock_.ReaderUnlock(); + for (auto &executable : executables) { + if (executable) { + ((ExecutableImpl*)executable)->DisableReadOnlyMode(); + } + } +} + //===----------------------------------------------------------------------===// // SymbolImpl. // //===----------------------------------------------------------------------===// @@ -754,6 +820,44 @@ hsa_status_t ExecutableImpl::IterateLoadedCodeObjects( return HSA_STATUS_SUCCESS; } +size_t ExecutableImpl::GetNumSegmentDescriptors() +{ + // assuming we are in readonly mode. + size_t actual_num_segment_descriptors = 0; + for (auto &obj : loaded_code_objects) { + actual_num_segment_descriptors += obj->LoadedSegments().size(); + } + return actual_num_segment_descriptors; +} + +size_t ExecutableImpl::QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t total_num_segment_descriptors, + size_t first_empty_segment_descriptor) +{ + // assuming we are in readonly mode. + assert(segment_descriptors); + assert(first_empty_segment_descriptor < total_num_segment_descriptors); + + size_t i = first_empty_segment_descriptor; + for (auto &obj : loaded_code_objects) { + assert(i < total_num_segment_descriptors); + for (auto &seg : obj->LoadedSegments()) { + segment_descriptors[i].agent = seg->Agent(); + segment_descriptors[i].executable = Executable::Handle(seg->Owner()); + segment_descriptors[i].code_object_storage_type = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY; + segment_descriptors[i].code_object_storage_base = obj->ElfData(); + segment_descriptors[i].code_object_storage_size = obj->ElfSize(); + segment_descriptors[i].code_object_storage_offset = seg->StorageOffset(); + segment_descriptors[i].segment_base = seg->Address(seg->VAddr()); + segment_descriptors[i].segment_size = seg->Size(); + ++i; + } + } + + return i - first_empty_segment_descriptor; +} + uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address) { for (auto &obj : loaded_code_objects) { @@ -771,6 +875,16 @@ uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address) return 0; } +void ExecutableImpl::EnableReadOnlyMode() +{ + rw_lock_.ReaderLock(); +} + +void ExecutableImpl::DisableReadOnlyMode() +{ + rw_lock_.ReaderUnlock(); +} + #define HSAERRCHECK(hsc) \ if (hsc != HSA_STATUS_SUCCESS) { \ assert(false); \ @@ -854,7 +968,7 @@ hsa_status_t ExecutableImpl::LoadCodeObject( } if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpCode()->is_set()) { - if (!code->SaveToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "co", dumpNum))) { + if (!code->SaveToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "hsaco", dumpNum))) { // Ignore error. } } @@ -946,7 +1060,7 @@ hsa_status_t ExecutableImpl::LoadSegmentV1(hsa_agent_t agent, code::Segment* s) if (need_alloc) { void* ptr = context_->SegmentAlloc(segment, agent, s->memSize(), s->align(), true); if (!ptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr()); + new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr(), s->offset()); new_seg->Copy(s->vaddr(), s->data(), s->imageSize()); objects.push_back(new_seg); @@ -1422,7 +1536,7 @@ hsa_status_t ExecutableImpl::LoadSegmentV2(hsa_agent_t agent, code::Segment* s, void* ptr = context_->SegmentAlloc(segment, agent, s->memSize(), s->align(), true); if (!ptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - Segment *new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr()); + Segment *new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr(), s->offset()); new_seg->Copy(s->vaddr(), s->data(), s->imageSize()); objects.push_back(new_seg); assert(new_seg); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp index 6801be698b..478a03ffdb 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp @@ -273,16 +273,18 @@ private: size_t size; uint64_t vaddr; bool frozen; + size_t storage_offset; public: - Segment(ExecutableImpl *owner_, hsa_agent_t agent_, amdgpu_hsa_elf_segment_t segment_, void* ptr_, size_t size_, uint64_t vaddr_) + Segment(ExecutableImpl *owner_, hsa_agent_t agent_, amdgpu_hsa_elf_segment_t segment_, void* ptr_, size_t size_, uint64_t vaddr_, size_t storage_offset_) : ExecutableObject(owner_, agent_), segment(segment_), - ptr(ptr_), size(size_), vaddr(vaddr_), frozen(false) { } + ptr(ptr_), size(size_), vaddr(vaddr_), frozen(false), storage_offset(storage_offset_) { } amdgpu_hsa_elf_segment_t ElfSegment() const { return segment; } void* Ptr() const { return ptr; } size_t Size() const { return size; } uint64_t VAddr() const { return vaddr; } + size_t StorageOffset() const { return storage_offset; } bool GetInfo(amd_loaded_segment_info_t attribute, void *value) override; @@ -399,8 +401,18 @@ public: void *data), void *data); + size_t GetNumSegmentDescriptors() override; + + size_t QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t total_num_segment_descriptors, + size_t first_empty_segment_descriptor) override; + uint64_t FindHostAddress(uint64_t device_address) override; + void EnableReadOnlyMode(); + void DisableReadOnlyMode(); + void Print(std::ostream& out) override; bool PrintToFile(const std::string& filename) override; @@ -455,7 +467,7 @@ class AmdHsaCodeLoader : public Loader { private: Context* context; std::vector executables; - std::mutex executables_mutex; + amd::hsa::common::ReaderWriterLock rw_lock_; public: AmdHsaCodeLoader(Context* context_) @@ -473,7 +485,14 @@ public: void *data), void *data) override; + hsa_status_t QuerySegmentDescriptors( + hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, + size_t *num_segment_descriptors) override; + uint64_t FindHostAddress(uint64_t device_address) override; + + void EnableReadOnlyMode(); + void DisableReadOnlyMode(); }; } // namespace loader