diff --git a/cmake/Findroctracer.cmake b/cmake/Findroctracer.cmake index c35c5c9cbb..cce0225dc0 100644 --- a/cmake/Findroctracer.cmake +++ b/cmake/Findroctracer.cmake @@ -27,6 +27,14 @@ find_path(roctracer_INCLUDE_DIR mark_as_advanced(roctracer_INCLUDE_DIR) +find_path(roctracer_hsa_INCLUDE_DIR + NAMES hsa.h + HINTS ${roctracer_ROOT_DIR} ${_ROCM_PATHS} + PATHS ${roctracer_ROOT_DIR} ${_ROCM_PATHS} + PATH_SUFFIXES include include/hsa) + +mark_as_advanced(roctracer_hsa_INCLUDE_DIR) + #----------------------------------------------------------------------------------------# find_library(roctracer_LIBRARY @@ -64,6 +72,7 @@ mark_as_advanced(roctracer_LIBRARY roctracer_roctx_LIBRARY) find_package_handle_standard_args(roctracer DEFAULT_MSG roctracer_ROOT_DIR roctracer_INCLUDE_DIR + roctracer_hsa_INCLUDE_DIR roctracer_LIBRARY roctracer_roctx_LIBRARY) @@ -72,12 +81,12 @@ find_package_handle_standard_args(roctracer DEFAULT_MSG if(roctracer_FOUND) add_library(roctracer::roctracer INTERFACE IMPORTED) add_library(roctracer::roctx INTERFACE IMPORTED) - set(roctracer_INCLUDE_DIRS ${roctracer_INCLUDE_DIR}) + set(roctracer_INCLUDE_DIRS ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR}) set(roctracer_LIBRARIES ${roctracer_LIBRARY} ${roctracer_roctx_LIBRARY}) set(roctracer_LIBRARY_DIRS ${roctracer_LIBRARY_DIR}) - target_include_directories(roctracer::roctracer INTERFACE ${roctracer_INCLUDE_DIR}) - target_include_directories(roctracer::roctx INTERFACE ${roctracer_INCLUDE_DIR}) + target_include_directories(roctracer::roctracer INTERFACE ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR}) + target_include_directories(roctracer::roctx INTERFACE ${roctracer_INCLUDE_DIR} ${roctracer_hsa_INCLUDE_DIR}) target_link_libraries(roctracer::roctracer INTERFACE ${roctracer_LIBRARY}) target_link_libraries(roctracer::roctx INTERFACE ${roctracer_roctx_LIBRARY}) diff --git a/external/timemory b/external/timemory index cc0cfd6101..acc0c0a98e 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit cc0cfd610169a65d36b6e9058b9166e7e1647e74 +Subproject commit acc0c0a98e286bb8f9b878b01257d629cb34df94 diff --git a/src/hosttrace.cpp b/src/hosttrace.cpp index 56ae45b553..a998a310cc 100644 --- a/src/hosttrace.cpp +++ b/src/hosttrace.cpp @@ -59,6 +59,9 @@ static std::string modfunc_dump_dir = "hosttrace-module-functions"; std::string get_absolute_exe_filepath(std::string exe_name); +std::string +get_absolute_lib_filepath(std::string lib_name); + //======================================================================================// // // entry point @@ -798,8 +801,9 @@ main(int argc, char** argv) bool result = false; // track the tried library names string_t _tried_libs; - for(const auto& _libname : _libnames) + for(auto _libname : _libnames) { + _libname = get_absolute_lib_filepath(_libname); _tried_libs += string_t("|") + _libname; verbprintf(0, "loading library: '%s'...\n", _libname.c_str()); result = (addr_space->loadLibrary(_libname.c_str()) != nullptr); @@ -2218,6 +2222,41 @@ get_absolute_exe_filepath(std::string exe_name) return exe_name; } +//======================================================================================// +// +std::string +get_absolute_lib_filepath(std::string lib_name) +{ + auto file_exists = [](const std::string& name) { + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); + }; + + if(!lib_name.empty() && (!file_exists(lib_name) || + std::regex_match(lib_name, std::regex("^[A-Za-z0-9].*")))) + { + auto _lib_orig = lib_name; + auto _paths = tim::delimit(tim::get_env("LD_LIBRARY_PATH", ""), ":"); + for(auto& pitr : _paths) + { + if(file_exists(TIMEMORY_JOIN('/', pitr, lib_name))) + { + lib_name = TIMEMORY_JOIN('/', pitr, lib_name); + verbprintf(0, "Resolved '%s' to '%s'...\n", _lib_orig.c_str(), + lib_name.c_str()); + break; + } + } + + if(!file_exists(lib_name)) + { + verbprintf(0, "Warning! File path to '%s' could not be determined...\n", + lib_name.c_str()); + } + } + return lib_name; +} + //======================================================================================// // inline void diff --git a/src/roctracer.cpp b/src/roctracer.cpp index e8c0b1fe30..608bf0ec3b 100644 --- a/src/roctracer.cpp +++ b/src/roctracer.cpp @@ -3,8 +3,16 @@ #include "library.hpp" #include +#include #include #include +#include + +#define AMD_INTERNAL_BUILD 1 +#include +#include + +#include // Macro to check ROC-tracer calls status #define ROCTRACER_CALL(call) \ @@ -23,40 +31,170 @@ namespace units = tim::units; namespace { -static auto& +auto& get_roctracer_kernels() { static auto _v = std::unordered_set{}; return _v; } -static auto& -get_roctracer_data_map() +auto& +get_roctracer_hip_data() { static auto _v = std::unordered_map{}; return _v; } -static auto& -get_roctracer_key_map() +auto& +get_roctracer_key_data() { static auto _v = std::unordered_map{}; return _v; } -using data_type_mutex_t = std::decay_t; +using data_type_mutex_t = std::decay_t; +using hsa_timer_t = hsa_rt_utils::Timer; +using timestamp_t = hsa_timer_t::timestamp_t; + +auto& +get_hsa_timer() +{ + static auto _v = std::unique_ptr{}; + return _v; +} + +// HSA API callback function +void +hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) +{ + if(get_state() != State::Active || !trait::runtime_enabled::get()) + return; + + (void) arg; + static auto _scope = scope::config{} + scope::flat{}; + const hsa_api_data_t* data = reinterpret_cast(callback_data); + HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", + roctracer_op_string(domain, cid, 0), cid, data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + + static thread_local timestamp_t hsa_begin_timestamp = 0; + static auto& timer = get_hsa_timer(); + + if(!timer) + return; + + switch(cid) + { + case HSA_API_ID_hsa_init: + case HSA_API_ID_hsa_shut_down: + case HSA_API_ID_hsa_agent_get_exception_policies: + case HSA_API_ID_hsa_agent_get_info: + case HSA_API_ID_hsa_amd_agent_iterate_memory_pools: + case HSA_API_ID_hsa_amd_agent_memory_pool_get_info: + case HSA_API_ID_hsa_amd_coherency_get_type: + case HSA_API_ID_hsa_amd_memory_pool_get_info: + case HSA_API_ID_hsa_amd_pointer_info: + case HSA_API_ID_hsa_amd_pointer_info_set_userdata: + case HSA_API_ID_hsa_amd_profiling_async_copy_enable: + case HSA_API_ID_hsa_amd_profiling_get_async_copy_time: + case HSA_API_ID_hsa_amd_profiling_get_dispatch_time: + case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled: + case HSA_API_ID_hsa_cache_get_info: + case HSA_API_ID_hsa_code_object_get_info: + case HSA_API_ID_hsa_code_object_get_symbol: + case HSA_API_ID_hsa_code_object_get_symbol_from_name: + case HSA_API_ID_hsa_code_object_reader_create_from_memory: + case HSA_API_ID_hsa_code_symbol_get_info: + case HSA_API_ID_hsa_executable_create_alt: + case HSA_API_ID_hsa_executable_freeze: + case HSA_API_ID_hsa_executable_get_info: + case HSA_API_ID_hsa_executable_get_symbol: + case HSA_API_ID_hsa_executable_get_symbol_by_name: + case HSA_API_ID_hsa_executable_symbol_get_info: + case HSA_API_ID_hsa_extension_get_name: + case HSA_API_ID_hsa_ext_image_data_get_info: + case HSA_API_ID_hsa_ext_image_data_get_info_with_layout: + case HSA_API_ID_hsa_ext_image_get_capability: + case HSA_API_ID_hsa_ext_image_get_capability_with_layout: + case HSA_API_ID_hsa_isa_get_exception_policies: + case HSA_API_ID_hsa_isa_get_info: + case HSA_API_ID_hsa_isa_get_info_alt: + case HSA_API_ID_hsa_isa_get_round_method: + case HSA_API_ID_hsa_region_get_info: + case HSA_API_ID_hsa_system_extension_supported: + case HSA_API_ID_hsa_system_get_extension_table: + case HSA_API_ID_hsa_system_get_info: + case HSA_API_ID_hsa_system_get_major_extension_table: + case HSA_API_ID_hsa_wavefront_get_info: break; + default: { + if(data->phase == ACTIVITY_API_PHASE_ENTER) + { + hsa_begin_timestamp = timer->timestamp_fn_ns(); + } + else + { + auto _name = roctracer_op_string(domain, cid, 0); + const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) + ? hsa_begin_timestamp + : timer->timestamp_fn_ns(); + + if(get_use_perfetto()) + { + TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name }, + hsa_begin_timestamp); + TRACE_EVENT_END("device", end_timestamp); + } + + // timemory is disabled in this callback because collecting data in this + // thread causes strange segmentation faults + } + } + } +} + +void +hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) +{ + static const char* copy_op_name = "hsa_async_copy"; + static const char* dispatch_op_name = "hsa_dispatch"; + static const char* barrier_op_name = "hsa_barrier"; + const char** _name = nullptr; + + switch(op) + { + case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break; + case HSA_OP_ID_COPY: _name = ©_op_name; break; + case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break; + default: break; + } + + if(!_name) + return; + + if(get_use_perfetto()) + { + TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, record->begin_ns); + TRACE_EVENT_END("device", record->end_ns); + } + + // timemory is disabled in this callback because collecting data in this thread + // causes strange segmentation faults + tim::consume_parameters(arg); +} // HIP API callback function void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { - if(!trait::runtime_enabled::get()) + if(get_state() != State::Active || !trait::runtime_enabled::get()) return; - const hip_api_data_t* data = reinterpret_cast(callback_data); + + static auto _scope = scope::flat() + scope::timeline(); + const hip_api_data_t* data = reinterpret_cast(callback_data); HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", - roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, - data->correlation_id, + roctracer_op_string(domain, cid, 0), cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + if(data->phase == ACTIVITY_API_PHASE_ENTER) { switch(cid) @@ -71,13 +209,13 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* get_roctracer_kernels().emplace(data->correlation_id); if(get_use_perfetto()) { - get_roctracer_key_map().emplace(data->correlation_id, _name); + get_roctracer_key_data().emplace(data->correlation_id, _name); } if(get_use_timemory()) { - get_roctracer_data_map().emplace( + get_roctracer_hip_data().emplace( data->correlation_id, - roctracer_bundle_t{ tim::static_string{ _name }() }); + roctracer_bundle_t{ tim::static_string{ _name }(), _scope }); } break; } @@ -87,41 +225,41 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* get_roctracer_kernels().emplace(data->correlation_id); if(get_use_perfetto()) { - get_roctracer_key_map().emplace(data->correlation_id, _name); + get_roctracer_key_data().emplace(data->correlation_id, _name); } if(get_use_timemory()) { - get_roctracer_data_map().emplace( + get_roctracer_hip_data().emplace( data->correlation_id, - roctracer_bundle_t{ tim::static_string{ _name }(), - tim::scope::get_default() }); + roctracer_bundle_t{ tim::static_string{ _name }(), _scope }); } break; } default: { tim::auto_lock_t _lk{ tim::type_mutex() }; - const char* _name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0); + const char* _name = roctracer_op_string(domain, cid, 0); if(get_use_perfetto()) { - get_roctracer_key_map().emplace(data->correlation_id, _name); + get_roctracer_key_data().emplace(data->correlation_id, _name); } if(get_use_timemory()) { - get_roctracer_data_map().emplace( + get_roctracer_hip_data().emplace( data->correlation_id, - roctracer_bundle_t{ tim::static_string{ _name }(), - tim::scope::get_default() }); + roctracer_bundle_t{ tim::static_string{ _name }(), _scope }); } break; } } } + else if(data->phase == ACTIVITY_API_PHASE_EXIT) + {} tim::consume_parameters(domain, arg); } // Activity tracing callback void -activity_callback(const char* begin, const char* end, void*) +hip_activity_callback(const char* begin, const char* end, void*) { if(!trait::runtime_enabled::get()) return; @@ -132,8 +270,8 @@ activity_callback(const char* begin, const char* end, void*) std::unordered_set _indexes{}; tim::auto_lock_t _lk{ tim::type_mutex() }; - auto& _data = get_roctracer_data_map(); - auto& _keys = get_roctracer_key_map(); + auto& _data = get_roctracer_hip_data(); + auto& _keys = get_roctracer_key_data(); auto& _kernels = get_roctracer_kernels(); HOSTTRACE_DEBUG("Activity records:\n"); @@ -175,7 +313,7 @@ activity_callback(const char* begin, const char* end, void*) .store(std::plus{}, static_cast(record->end_ns - record->begin_ns)) .stop(); - _indexes.emplace(kitr->first); + _indexes.emplace(itr->first); } // code ROCTRACER_CALL(roctracer_next_record(record, &record)); @@ -206,9 +344,41 @@ roctracer_is_setup() } // namespace #if !defined(HOSTTRACE_ROCTRACER_LIBKFDWRAPPER) -# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "libkfdwrapper64.so" +# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "/opt/rocm/roctracer/lib/libkfdwrapper64.so" #endif +struct dynamic_library +{ + dynamic_library() = delete; + dynamic_library(const dynamic_library&) = delete; + dynamic_library(dynamic_library&&) noexcept = default; + dynamic_library& operator=(const dynamic_library&) = delete; + dynamic_library& operator=(dynamic_library&&) noexcept = default; + + dynamic_library(const char* _env, const char* _fname, + int _flags = (RTLD_NOW | RTLD_GLOBAL), bool _store = false) + : envname{ _env } + , filename{ tim::get_env(_env, _fname, _store) } + , flags{ _flags } + { + handle = dlopen(filename.c_str(), flags); + if(!handle) + fprintf(stderr, "%s\n", dlerror()); + dlerror(); // Clear any existing error + } + + ~dynamic_library() + { + if(handle) + dlclose(handle); + } + + std::string envname = {}; + std::string filename = {}; + int flags = 0; + void* handle = nullptr; +}; + namespace tim { namespace component @@ -233,38 +403,22 @@ roctracer::setup() roctracer_is_setup() = true; HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__); - auto libpath = tim::get_env("HOSTTRACE_ROCTRACER_LIBKFDWRAPPER", - HOSTTRACE_ROCTRACER_LIBKFDWRAPPER, false); - auto libhandle = dlopen(libpath.c_str(), RTLD_NOW | RTLD_GLOBAL); - if(!libhandle) - fprintf(stderr, "%s\n", dlerror()); - dlerror(); // Clear any existing error + tim::set_env("HSA_TOOLS_LIB", "libhosttrace.so", 0); + + auto _kfdwrapper = dynamic_library{ "HOSTTRACE_ROCTRACER_LIBKFDWRAPPER", + HOSTTRACE_ROCTRACER_LIBKFDWRAPPER }; // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x1000; - properties.buffer_callback_fun = activity_callback; + properties.buffer_callback_fun = hip_activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr)); // Enable API callbacks, all domains ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, nullptr)); - /*ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, - HIP_API_ID_hipModuleLaunchKernel, - hip_api_callback, nullptr)); - ROCTRACER_CALL(roctracer_enable_op_callback( - ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipLaunchKernel, hip_api_callback, nullptr)); - ROCTRACER_CALL(roctracer_enable_op_callback( - ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMalloc, hip_api_callback, nullptr)); - ROCTRACER_CALL(roctracer_enable_op_callback( - ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMemcpy, hip_api_callback, nullptr)); - ROCTRACER_CALL(roctracer_enable_op_callback( - ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipFree, hip_api_callback, nullptr));*/ // Enable activity tracing, all domains ROCTRACER_CALL(roctracer_enable_activity()); - - if(libhandle) - dlclose(libhandle); } void @@ -278,21 +432,21 @@ roctracer::tear_down() // flush all the activity ROCTRACER_CALL(roctracer_flush_activity()); + + // flush all buffers + roctracer_flush_buf(); + + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); + + ROCTRACER_CALL( + roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); + // Disable tracing and closing the pool ROCTRACER_CALL(roctracer_disable_callback()); - /*ROCTRACER_CALL(roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, - HIP_API_ID_hipModuleLaunchKernel)); - ROCTRACER_CALL(roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, - HIP_API_ID_hipLaunchKernel)); - ROCTRACER_CALL( - roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMemcpy)); - ROCTRACER_CALL( - roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMalloc)); - ROCTRACER_CALL( - roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipFree)); - */ ROCTRACER_CALL(roctracer_disable_activity()); - ROCTRACER_CALL(roctracer_close_pool()); + + // closing the pool with HSA enabled causes segfaults + // ROCTRACER_CALL(roctracer_close_pool()); } void @@ -315,3 +469,86 @@ roctracer::stop() TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void) TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double) + +// HSA-runtime tool on-load method +extern "C" TIMEMORY_VISIBILITY("default") bool OnLoad( + HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) +{ + puts(__FUNCTION__); + tim::consume_parameters(table, runtime_version, failed_tool_count, failed_tool_names); + + // ONLOAD_TRACE_BEG(); + // on_exit(exit_handler, nullptr); + + get_hsa_timer() = std::make_unique(table->core_->hsa_system_get_info_fn); + + // const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); + const char* output_prefix = nullptr; + + // App begin timestamp begin_ts_file.txt + // begin_ts_file_handle = open_output_file(output_prefix, "begin_ts_file.txt"); + // const timestamp_t app_start_time = timer->timestamp_fn_ns(); + // fprintf(begin_ts_file_handle, "%lu\n", app_start_time); + + bool trace_hsa_api = tim::get_env("HOSTTRACE_ROCTRACER_HSA_API", true); + std::vector hsa_api_vec = + tim::delimit(tim::get_env("HOSTTRACE_ROCTRACER_HSA_API_TYPES", "")); + + // Enable HSA API callbacks/activity + if(trace_hsa_api) + { + // hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); + + // initialize HSA tracing + roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*) table); + + fprintf(stdout, " HSA-trace("); + fflush(stdout); + if(!hsa_api_vec.empty()) + { + for(unsigned i = 0; i < hsa_api_vec.size(); ++i) + { + uint32_t cid = HSA_API_ID_NUMBER; + const char* api = hsa_api_vec[i].c_str(); + ROCTRACER_CALL( + roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr)); + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, + hsa_api_callback, nullptr)); + printf(" %s", api); + } + } + else + { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, + hsa_api_callback, nullptr)); + } + printf(")\n"); + } + + bool trace_hsa_activity = tim::get_env("HOSTTRACE_ROCTRACER_HSA_ACTIVITY", true); + // Enable HSA GPU activity + if(trace_hsa_activity) + { + // initialize HSA tracing + roctracer::hsa_ops_properties_t ops_properties{ + table, reinterpret_cast(hsa_activity_callback), + nullptr, output_prefix + }; + roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); + + fprintf(stdout, " HSA-activity-trace()\n"); + fflush(stdout); + ROCTRACER_CALL( + roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); + } + + return true; +} + +// HSA-runtime on-unload method +extern "C" TIMEMORY_VISIBILITY("default") void OnUnload() +{ + puts(__FUNCTION__); + // ONLOAD_TRACE(""); +}