From eb2efb83d1a6990fb251abe6bfa2a33a3a0c03ed Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Wed, 5 Oct 2016 00:15:13 -0500 Subject: [PATCH] Initial set of changes for ThreadTrace Change-Id: I07ce31f9b4f508cef0fc9ca6dadcf26b6c90361e --- runtime/hsa-runtime-tools/CMakeLists.txt | 13 ++++++------ .../hsa-runtime/core/inc/amd_hsa_loader.hpp | 4 ++++ runtime/hsa-runtime/core/runtime/hsa.cpp | 1 + .../core/runtime/hsa_ven_amd_loader.cpp | 21 +++++++++++++++++++ runtime/hsa-runtime/core/util/flag.h | 12 +++++++++++ runtime/hsa-runtime/inc/hsa_ven_amd_loader.h | 20 ++++++++++++++++++ runtime/hsa-runtime/loader/executable.cpp | 19 +++++++++++++++++ runtime/hsa-runtime/loader/executable.hpp | 2 ++ 8 files changed, 86 insertions(+), 6 deletions(-) diff --git a/runtime/hsa-runtime-tools/CMakeLists.txt b/runtime/hsa-runtime-tools/CMakeLists.txt index 1c93c3ff1c..559094ccd7 100755 --- a/runtime/hsa-runtime-tools/CMakeLists.txt +++ b/runtime/hsa-runtime-tools/CMakeLists.txt @@ -146,16 +146,17 @@ set ( INTERCEPT_SRC ${TOOLS_SOURCE_DIR}/intercept/amd_sw_aql_command_processor ${TOOLS_SOURCE_DIR}/intercept/hsa_amd_tools.cpp ${TOOLS_SOURCE_DIR}/intercept/profiler.cpp ) -set ( PROFILER_SRC ${TOOLS_SOURCE_DIR}/profiler/ci_blockinfo.cpp - ${TOOLS_SOURCE_DIR}/profiler/ci_pmu.cpp - ${TOOLS_SOURCE_DIR}/profiler/gpu_countergroup.cpp +set ( PROFILER_SRC ${TOOLS_SOURCE_DIR}/profiler/gpu_countergroup.cpp ${TOOLS_SOURCE_DIR}/profiler/gpu_counter.cpp - ${TOOLS_SOURCE_DIR}/profiler/hsa_ext_profiler.cpp + ${TOOLS_SOURCE_DIR}/profiler/var_data.cpp ${TOOLS_SOURCE_DIR}/profiler/info_set.cpp ${TOOLS_SOURCE_DIR}/profiler/parameter_set.cpp - ${TOOLS_SOURCE_DIR}/profiler/var_data.cpp + ${TOOLS_SOURCE_DIR}/profiler/thread_trace.cpp + ${TOOLS_SOURCE_DIR}/profiler/ci_blockinfo.cpp + ${TOOLS_SOURCE_DIR}/profiler/ci_pmu.cpp ${TOOLS_SOURCE_DIR}/profiler/vi_blockinfo.cpp - ${TOOLS_SOURCE_DIR}/profiler/vi_pmu.cpp ) + ${TOOLS_SOURCE_DIR}/profiler/vi_pmu.cpp + ${TOOLS_SOURCE_DIR}/profiler/hsa_ext_profiler.cpp ) set ( SP3_R1000_SRC ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-asic.c ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-dispatch.c diff --git a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp b/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp index 364cad54c3..5934612348 100644 --- a/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp +++ b/runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp @@ -399,6 +399,10 @@ public: hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors) = 0; + /// @brief Finds the handle of @p executable to which the device + /// address belongs. Return NULL handle if device address is invalid + virtual hsa_executable_t FindExecutable(uint64_t device_address) = 0; + /// @brief Returns host address given @p device_address. If @p device_address /// is already host address, returns null pointer. If @p device_address is /// invalid address, returns null pointer. diff --git a/runtime/hsa-runtime/core/runtime/hsa.cpp b/runtime/hsa-runtime/core/runtime/hsa.cpp index cd14411c6c..66927b5fe3 100644 --- a/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -396,6 +396,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v ext_table.hsa_ven_amd_loader_query_host_address = hsa_ven_amd_loader_query_host_address; ext_table.hsa_ven_amd_loader_query_segment_descriptors = hsa_ven_amd_loader_query_segment_descriptors; + ext_table.hsa_ven_amd_loader_query_executable = hsa_ven_amd_loader_query_executable; memcpy(table, &ext_table, Min(sizeof(ext_table), table_length)); diff --git a/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp b/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp index ba951053e3..c95b62d141 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp @@ -80,3 +80,24 @@ hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( // Arguments are checked by the loader. return Runtime::runtime_singleton_->loader()->QuerySegmentDescriptors(segment_descriptors, num_segment_descriptors); } + +hsa_status_t HSA_API hsa_ven_amd_loader_query_executable( + const void *device_address, + hsa_executable_t *executable) { + + if (false == core::Runtime::runtime_singleton_->IsOpen()) { + return HSA_STATUS_ERROR_NOT_INITIALIZED; + } + if ((nullptr == device_address) || (nullptr == executable)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + uint64_t udaddr = reinterpret_cast(device_address); + hsa_executable_t exec = Runtime::runtime_singleton_->loader()->FindExecutable(udaddr); + if (0 == exec.handle) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + *executable = exec; + return HSA_STATUS_SUCCESS; +} diff --git a/runtime/hsa-runtime/core/util/flag.h b/runtime/hsa-runtime/core/util/flag.h index 2e1fb88672..1e1fe0f833 100644 --- a/runtime/hsa-runtime/core/util/flag.h +++ b/runtime/hsa-runtime/core/util/flag.h @@ -69,6 +69,12 @@ class Flag { var = os::GetEnvVar("HSA_ENABLE_INTERRUPT"); enable_interrupt_ = (var == "0") ? false : true; + var = os::GetEnvVar("HSA_ENABLE_THREAD_TRACE"); + enable_thread_trace_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_THREAD_TRACE_MEM_SIZE"); + thread_trace_buff_size_ = atoi(var.c_str()); + var = os::GetEnvVar("HSA_ENABLE_SDMA"); enable_sdma_ = (var == "0") ? false : true; @@ -98,6 +104,9 @@ class Flag { bool enable_interrupt() const { return enable_interrupt_; } + bool enable_thread_trace() const { return enable_thread_trace_; } + bool thread_trace_buff_size() const { return thread_trace_buff_size_; } + bool enable_sdma() const { return enable_sdma_; } bool emulate_aql() const { return emulate_aql_; } @@ -122,6 +131,9 @@ class Flag { bool sdma_wait_idle_; bool enable_queue_fault_message_; + bool enable_thread_trace_; + size_t thread_trace_buff_size_; + uint32_t max_queues_; size_t scratch_mem_size_; diff --git a/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h b/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h index 804a360a2b..cab0881c69 100644 --- a/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h +++ b/runtime/hsa-runtime/inc/hsa_ven_amd_loader.h @@ -224,6 +224,22 @@ hsa_status_t HSA_API hsa_ven_amd_loader_query_segment_descriptors( hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors); +/** + * @brief Obtains the handle of executable to which the device address belongs. + * This method should not be used to obtain executable handle by using a host + * address. + * + * @retval HSA_STATUS_SUCCESS Function is executed successfully. + * + * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized. + * + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT the input is invalid or there + * is no exectuable found for this kernel code object. + */ +hsa_status_t hsa_ven_amd_loader_query_executable( + const void *device_address, + hsa_executable_t *executable); + /** * @brief Extension version. */ @@ -240,6 +256,10 @@ typedef struct hsa_ven_amd_loader_1_00_pfn_s { hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)( hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors); + + hsa_status_t (*hsa_ven_amd_loader_query_executable)( + const void *device_address, + hsa_executable_t *executable); } hsa_ven_amd_loader_1_00_pfn_t; #ifdef __cplusplus diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index b154790020..c1465da948 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -915,6 +915,25 @@ size_t ExecutableImpl::QuerySegmentDescriptors( return i - first_empty_segment_descriptor; } +hsa_executable_t AmdHsaCodeLoader::FindExecutable(uint64_t device_address) +{ + hsa_executable_t execHandle = {0}; + ReaderLockGuard reader_lock(rw_lock_); + if (device_address == 0) { + return execHandle; + } + + for (auto &exec : executables) { + if (exec != nullptr) { + uint64_t host_address = exec->FindHostAddress(device_address); + if (host_address != 0) { + return Executable::Handle(exec); + } + } + } + return execHandle; +} + uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address) { for (auto &obj : loaded_code_objects) { diff --git a/runtime/hsa-runtime/loader/executable.hpp b/runtime/hsa-runtime/loader/executable.hpp index 79fcdabfd5..ff897f3323 100644 --- a/runtime/hsa-runtime/loader/executable.hpp +++ b/runtime/hsa-runtime/loader/executable.hpp @@ -522,6 +522,8 @@ public: hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors, size_t *num_segment_descriptors) override; + hsa_executable_t FindExecutable(uint64_t device_address) override; + uint64_t FindHostAddress(uint64_t device_address) override; void EnableReadOnlyMode();