diff --git a/cmake/MacroUtilities.cmake b/cmake/MacroUtilities.cmake index e6976f5688..77c9921079 100644 --- a/cmake/MacroUtilities.cmake +++ b/cmake/MacroUtilities.cmake @@ -118,20 +118,14 @@ function(OMNITRACE_STRIP_TARGET _TARGET) TARGET ${_TARGET} POST_BUILD COMMAND - ${CMAKE_STRIP} --keep-symbol="omnitrace_init" + ${CMAKE_STRIP} -w --keep-symbol="omnitrace_init" --keep-symbol="omnitrace_finalize" --keep-symbol="omnitrace_push_trace" --keep-symbol="omnitrace_pop_trace" --keep-symbol="omnitrace_push_region" --keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env" - --keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_configure" - --keep-symbol="omnitrace_user_get_callbacks" - --keep-symbol="omnitrace_user_error_string" - --keep-symbol="omnitrace_user_start_trace" - --keep-symbol="omnitrace_user_stop_trace" - --keep-symbol="omnitrace_user_start_thread_trace" - --keep-symbol="omnitrace_user_stop_thread_trace" - --keep-symbol="omnitrace_user_push_region" - --keep-symbol="omnitrace_user_pop_region" --keep-symbol="ompt_start_tool" - ${ARGN} $ + --keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_*" + --keep-symbol="ompt_start_tool" --keep-symbol="kokkosp_*" + --keep-symbol="OnLoad" --keep-symbol="OnUnload" ${ARGN} + $ WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMENT "Stripping ${_TARGET}...") endif() diff --git a/scripts/dl-gen.py b/scripts/dl-gen.py new file mode 100755 index 0000000000..60e63cd335 --- /dev/null +++ b/scripts/dl-gen.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import os +import sys +import glob + +""" +This script reads in function prototypes can generates the implementation pieces +needed to dlsym the function in libomnitrace + +Example input file: + + bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names); + void OnUnload(); + +generates: + + ##### declaration: + + bool OnLoad(HsaApiTable*, uint64_t, uint64_t, const char* const*) OMNITRACE_PUBLIC_API; + void OnUnload() OMNITRACE_PUBLIC_API; + + ##### dlsym: + + OMNITRACE_DLSYM(OnLoad_f, m_omnihandle, "OnLoad"); + OMNITRACE_DLSYM(OnUnload_f, m_omnihandle, "OnUnload"); + + ##### member variables: + + bool (*OnLoad_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr; + void (*OnUnload_f)() = nullptr; + + ##### callers: + + bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) + { + return OMNITRACE_DL_INVOKE(get_indirect().OnLoad_f, table, runtime_version, failed_tool_count, failed_tool_names); + } + + void OnUnload() + { + return OMNITRACE_DL_INVOKE(get_indirect().OnUnload_f); + } +""" + + +class function: + def __init__(self, _f): + self.return_type = _f.split(" ", 1)[0] + _f = "".join(_f.split(" ", 1)[1:]) + self.func_name = _f.split("(", 1)[0] + _f = "".join(_f.split("(", 1)[1:]).rstrip(")") + self.params = [x.strip() for x in _f.split(",")] + self.param_types = [] + self.param_names = [] + for itr in self.params: + _fields = itr.split(" ") + _len = len(_fields) + self.param_types.append(" ".join(_fields[0 : (_len - 1)])) + self.param_names.append(_fields[-1]) + + def valid(self): + return len(self.func_name) > 0 + + def member_variables(self): + return " {} (*{}_f)({}) = nullptr;".format( + self.return_type, self.func_name, ", ".join(self.param_types) + ) + + def function_decl(self): + return " {} {}({}) OMNITRACE_PUBLIC_API;".format( + self.return_type, self.func_name, ", ".join(self.param_types) + ) + + def dlsym_function(self): + return ' OMNITRACE_DLSYM({0}_f, m_omnihandle, "{0}");'.format(self.func_name) + + def call_dlsym_function(self): + _param_names = ", ".join(self.param_names) + if _param_names and _param_names != ", ": + _param_names = f", {_param_names}" + return " {} {}({})\n {}\n return OMNITRACE_DL_INVOKE(get_indirect().{}_f{});\n {}".format( + self.return_type, + self.func_name, + ", ".join(self.params), + "{", + self.func_name, + _param_names, + "}", + ) + + +def run(fname): + with open(fname, "r") as f: + _str = "" + for itr in f.read(): + _str += itr.replace("\n", " ") + + while " " in _str: + _str = _str.replace(" ", " ") + data = [x.strip(" ") for x in _str.split(";")] + + funcs = [] + for itr in data: + f = function(itr) + if f.valid(): + funcs.append(f) + + return funcs + + +if __name__ == "__main__": + funcs = [] + for inp in sys.argv[1:]: + if os.path.exists(inp): + funcs += run(inp) + else: + for itr in glob.glob(f"{inp}*"): + if os.path.exists(itr): + funcs += run(itr) + else: + printf(f"No file matched {itr}") + + if funcs: + print(f"\n##### declaration:\n") + for itr in funcs: + print("{}".format(itr.function_decl())) + + print(f"\n##### dlsym:\n") + for itr in funcs: + print("{}".format(itr.dlsym_function())) + + print(f"\n##### member variables:\n") + for itr in funcs: + print("{}".format(itr.member_variables())) + + print(f"\n##### callers:") + for itr in funcs: + print("") + print("{}".format(itr.call_dlsym_function())) + + print("") diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index eaa6eaa8e1..d5e69c6ecd 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -55,6 +55,13 @@ //--------------------------------------------------------------------------------------// +std::ostream& +operator<<(std::ostream& _os, const SpaceHandle& _handle) +{ + _os << _handle.name; + return _os; +} + namespace omnitrace { inline namespace dl @@ -206,6 +213,57 @@ struct OMNITRACE_HIDDEN_API indirect OMNITRACE_DLSYM(omnitrace_register_coverage_f, m_omnihandle, "omnitrace_register_coverage"); + OMNITRACE_DLSYM(kokkosp_print_help_f, m_omnihandle, "kokkosp_print_help"); + OMNITRACE_DLSYM(kokkosp_parse_args_f, m_omnihandle, "kokkosp_parse_args"); + OMNITRACE_DLSYM(kokkosp_declare_metadata_f, m_omnihandle, + "kokkosp_declare_metadata"); + OMNITRACE_DLSYM(kokkosp_request_tool_settings_f, m_omnihandle, + "kokkosp_request_tool_settings"); + OMNITRACE_DLSYM(kokkosp_init_library_f, m_omnihandle, "kokkosp_init_library"); + OMNITRACE_DLSYM(kokkosp_finalize_library_f, m_omnihandle, + "kokkosp_finalize_library"); + OMNITRACE_DLSYM(kokkosp_begin_parallel_for_f, m_omnihandle, + "kokkosp_begin_parallel_for"); + OMNITRACE_DLSYM(kokkosp_end_parallel_for_f, m_omnihandle, + "kokkosp_end_parallel_for"); + OMNITRACE_DLSYM(kokkosp_begin_parallel_reduce_f, m_omnihandle, + "kokkosp_begin_parallel_reduce"); + OMNITRACE_DLSYM(kokkosp_end_parallel_reduce_f, m_omnihandle, + "kokkosp_end_parallel_reduce"); + OMNITRACE_DLSYM(kokkosp_begin_parallel_scan_f, m_omnihandle, + "kokkosp_begin_parallel_scan"); + OMNITRACE_DLSYM(kokkosp_end_parallel_scan_f, m_omnihandle, + "kokkosp_end_parallel_scan"); + OMNITRACE_DLSYM(kokkosp_begin_fence_f, m_omnihandle, "kokkosp_begin_fence"); + OMNITRACE_DLSYM(kokkosp_end_fence_f, m_omnihandle, "kokkosp_end_fence"); + OMNITRACE_DLSYM(kokkosp_push_profile_region_f, m_omnihandle, + "kokkosp_push_profile_region"); + OMNITRACE_DLSYM(kokkosp_pop_profile_region_f, m_omnihandle, + "kokkosp_pop_profile_region"); + OMNITRACE_DLSYM(kokkosp_create_profile_section_f, m_omnihandle, + "kokkosp_create_profile_section"); + OMNITRACE_DLSYM(kokkosp_destroy_profile_section_f, m_omnihandle, + "kokkosp_destroy_profile_section"); + OMNITRACE_DLSYM(kokkosp_start_profile_section_f, m_omnihandle, + "kokkosp_start_profile_section"); + OMNITRACE_DLSYM(kokkosp_stop_profile_section_f, m_omnihandle, + "kokkosp_stop_profile_section"); + OMNITRACE_DLSYM(kokkosp_allocate_data_f, m_omnihandle, "kokkosp_allocate_data"); + OMNITRACE_DLSYM(kokkosp_deallocate_data_f, m_omnihandle, + "kokkosp_deallocate_data"); + OMNITRACE_DLSYM(kokkosp_begin_deep_copy_f, m_omnihandle, + "kokkosp_begin_deep_copy"); + OMNITRACE_DLSYM(kokkosp_end_deep_copy_f, m_omnihandle, "kokkosp_end_deep_copy"); + OMNITRACE_DLSYM(kokkosp_profile_event_f, m_omnihandle, "kokkosp_profile_event"); + OMNITRACE_DLSYM(kokkosp_dual_view_sync_f, m_omnihandle, "kokkosp_dual_view_sync"); + OMNITRACE_DLSYM(kokkosp_dual_view_modify_f, m_omnihandle, + "kokkosp_dual_view_modify"); + +#if OMNITRACE_USE_ROCTRACER > 0 + OMNITRACE_DLSYM(hsa_on_load_f, m_omnihandle, "OnLoad"); + OMNITRACE_DLSYM(hsa_on_unload_f, m_omnihandle, "OnUnload"); +#endif + #if OMNITRACE_USE_OMPT == 0 _warn_verbose = 5; #else @@ -256,6 +314,7 @@ struct OMNITRACE_HIDDEN_API indirect } public: + // omnitrace functions void (*omnitrace_init_library_f)(void) = nullptr; void (*omnitrace_init_f)(const char*, bool, const char*) = nullptr; void (*omnitrace_finalize_f)(void) = nullptr; @@ -269,6 +328,48 @@ public: int (*omnitrace_push_region_f)(const char*) = nullptr; int (*omnitrace_pop_region_f)(const char*) = nullptr; int (*omnitrace_user_configure_f)(int, void*, void*) = nullptr; + + // KokkosP functions + void (*kokkosp_print_help_f)(char*) = nullptr; + void (*kokkosp_parse_args_f)(int, char**) = nullptr; + void (*kokkosp_declare_metadata_f)(const char*, const char*) = nullptr; + void (*kokkosp_request_tool_settings_f)(const uint32_t, + Kokkos_Tools_ToolSettings*) = nullptr; + void (*kokkosp_init_library_f)(const int, const uint64_t, const uint32_t, + void*) = nullptr; + void (*kokkosp_finalize_library_f)() = nullptr; + void (*kokkosp_begin_parallel_for_f)(const char*, uint32_t, uint64_t*) = nullptr; + void (*kokkosp_end_parallel_for_f)(uint64_t) = nullptr; + void (*kokkosp_begin_parallel_reduce_f)(const char*, uint32_t, uint64_t*) = nullptr; + void (*kokkosp_end_parallel_reduce_f)(uint64_t) = nullptr; + void (*kokkosp_begin_parallel_scan_f)(const char*, uint32_t, uint64_t*) = nullptr; + void (*kokkosp_end_parallel_scan_f)(uint64_t) = nullptr; + void (*kokkosp_begin_fence_f)(const char*, uint32_t, uint64_t*) = nullptr; + void (*kokkosp_end_fence_f)(uint64_t) = nullptr; + void (*kokkosp_push_profile_region_f)(const char*) = nullptr; + void (*kokkosp_pop_profile_region_f)() = nullptr; + void (*kokkosp_create_profile_section_f)(const char*, uint32_t*) = nullptr; + void (*kokkosp_destroy_profile_section_f)(uint32_t) = nullptr; + void (*kokkosp_start_profile_section_f)(uint32_t) = nullptr; + void (*kokkosp_stop_profile_section_f)(uint32_t) = nullptr; + void (*kokkosp_allocate_data_f)(const SpaceHandle, const char*, const void* const, + const uint64_t) = nullptr; + void (*kokkosp_deallocate_data_f)(const SpaceHandle, const char*, const void* const, + const uint64_t) = nullptr; + void (*kokkosp_begin_deep_copy_f)(SpaceHandle, const char*, const void*, SpaceHandle, + const char*, const void*, uint64_t) = nullptr; + void (*kokkosp_end_deep_copy_f)() = nullptr; + void (*kokkosp_profile_event_f)(const char*) = nullptr; + void (*kokkosp_dual_view_sync_f)(const char*, const void* const, bool) = nullptr; + void (*kokkosp_dual_view_modify_f)(const char*, const void* const, bool) = nullptr; + + // HSA functions +#if OMNITRACE_USE_ROCTRACER > 0 + bool (*hsa_on_load_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr; + void (*hsa_on_unload_f)() = nullptr; +#endif + + // OpenMP functions #if defined(OMNITRACE_USE_OMPT) && OMNITRACE_USE_OMPT > 0 ompt_start_tool_result_t* (*ompt_start_tool_f)(unsigned int, const char*); #endif @@ -556,6 +657,190 @@ extern "C" return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_pop_region_f, name); } + //----------------------------------------------------------------------------------// + // + // KokkosP + // + //----------------------------------------------------------------------------------// + + void kokkosp_print_help(char* argv0) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_print_help_f, argv0); + } + + void kokkosp_parse_args(int argc, char** argv) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_parse_args_f, argc, argv); + } + + void kokkosp_declare_metadata(const char* key, const char* value) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_declare_metadata_f, key, value); + } + + void kokkosp_request_tool_settings(const uint32_t version, + Kokkos_Tools_ToolSettings* settings) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_request_tool_settings_f, + version, settings); + } + + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, void* deviceInfo) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_init_library_f, loadSeq, + interfaceVer, devInfoCount, deviceInfo); + } + + void kokkosp_finalize_library() + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_finalize_library_f); + } + + void kokkosp_begin_parallel_for(const char* name, uint32_t devid, uint64_t* kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_for_f, name, + devid, kernid); + } + + void kokkosp_end_parallel_for(uint64_t kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_for_f, kernid); + } + + void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid, uint64_t* kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_reduce_f, name, + devid, kernid); + } + + void kokkosp_end_parallel_reduce(uint64_t kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_reduce_f, kernid); + } + + void kokkosp_begin_parallel_scan(const char* name, uint32_t devid, uint64_t* kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_scan_f, name, + devid, kernid); + } + + void kokkosp_end_parallel_scan(uint64_t kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_scan_f, kernid); + } + + void kokkosp_begin_fence(const char* name, uint32_t devid, uint64_t* kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_fence_f, name, devid, + kernid); + } + + void kokkosp_end_fence(uint64_t kernid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_fence_f, kernid); + } + + void kokkosp_push_profile_region(const char* name) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_push_profile_region_f, name); + } + + void kokkosp_pop_profile_region() + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_pop_profile_region_f); + } + + void kokkosp_create_profile_section(const char* name, uint32_t* secid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_create_profile_section_f, name, + secid); + } + + void kokkosp_destroy_profile_section(uint32_t secid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_destroy_profile_section_f, + secid); + } + + void kokkosp_start_profile_section(uint32_t secid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_start_profile_section_f, secid); + } + + void kokkosp_stop_profile_section(uint32_t secid) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_stop_profile_section_f, secid); + } + + void kokkosp_allocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_allocate_data_f, space, label, + ptr, size); + } + + void kokkosp_deallocate_data(const SpaceHandle space, const char* label, + const void* const ptr, const uint64_t size) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_deallocate_data_f, space, label, + ptr, size); + } + + void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char* dst_name, + const void* dst_ptr, SpaceHandle src_handle, + const char* src_name, const void* src_ptr, uint64_t size) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_deep_copy_f, dst_handle, + dst_name, dst_ptr, src_handle, src_name, src_ptr, + size); + } + + void kokkosp_end_deep_copy() + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_deep_copy_f); + } + + void kokkosp_profile_event(const char* name) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_profile_event_f, name); + } + + void kokkosp_dual_view_sync(const char* label, const void* const data, bool is_device) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_sync_f, label, data, + is_device); + } + + void kokkosp_dual_view_modify(const char* label, const void* const data, + bool is_device) + { + return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_modify_f, label, data, + is_device); + } + + //----------------------------------------------------------------------------------// + // + // HSA + // + //----------------------------------------------------------------------------------// + +#if OMNITRACE_USE_ROCTRACER > 0 + bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) + { + return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_load_f, table, runtime_version, + failed_tool_count, failed_tool_names); + } + + void OnUnload() { return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_unload_f); } +#endif + + //----------------------------------------------------------------------------------// + // + // OMPT + // + //----------------------------------------------------------------------------------// #if OMNITRACE_USE_OMPT > 0 ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char* runtime_version) diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl.hpp index 3bcb9c288a..e8e3677877 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl.hpp @@ -53,6 +53,10 @@ # define OMNITRACE_USE_OMPT 0 #endif +#if !defined(OMNITRACE_USE_ROCTRACER) +# define OMNITRACE_USE_ROCTRACER 0 +#endif + //--------------------------------------------------------------------------------------// // // omnitrace symbols @@ -87,12 +91,71 @@ extern "C" int omnitrace_user_push_region_dl(const char*) OMNITRACE_HIDDEN_API; int omnitrace_user_pop_region_dl(const char*) OMNITRACE_HIDDEN_API; + // KokkosP + struct OMNITRACE_HIDDEN_API SpaceHandle + { + char name[64]; + }; + + struct OMNITRACE_HIDDEN_API Kokkos_Tools_ToolSettings + { + bool requires_global_fencing; + bool padding[255]; + }; + + void kokkosp_print_help(char*) OMNITRACE_PUBLIC_API; + void kokkosp_parse_args(int, char**) OMNITRACE_PUBLIC_API; + void kokkosp_declare_metadata(const char*, const char*) OMNITRACE_PUBLIC_API; + void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API; + void kokkosp_init_library(const int, const uint64_t, const uint32_t, + void*) OMNITRACE_PUBLIC_API; + void kokkosp_finalize_library() OMNITRACE_PUBLIC_API; + void kokkosp_begin_parallel_for(const char*, uint32_t, + uint64_t*) OMNITRACE_PUBLIC_API; + void kokkosp_end_parallel_for(uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_begin_parallel_reduce(const char*, uint32_t, + uint64_t*) OMNITRACE_PUBLIC_API; + void kokkosp_end_parallel_reduce(uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_begin_parallel_scan(const char*, uint32_t, + uint64_t*) OMNITRACE_PUBLIC_API; + void kokkosp_end_parallel_scan(uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_begin_fence(const char*, uint32_t, uint64_t*) OMNITRACE_PUBLIC_API; + void kokkosp_end_fence(uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_push_profile_region(const char*) OMNITRACE_PUBLIC_API; + void kokkosp_pop_profile_region() OMNITRACE_PUBLIC_API; + void kokkosp_create_profile_section(const char*, uint32_t*) OMNITRACE_PUBLIC_API; + void kokkosp_destroy_profile_section(uint32_t) OMNITRACE_PUBLIC_API; + void kokkosp_start_profile_section(uint32_t) OMNITRACE_PUBLIC_API; + void kokkosp_stop_profile_section(uint32_t) OMNITRACE_PUBLIC_API; + void kokkosp_allocate_data(const SpaceHandle, const char*, const void* const, + const uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_deallocate_data(const SpaceHandle, const char*, const void* const, + const uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_begin_deep_copy(SpaceHandle, const char*, const void*, SpaceHandle, + const char*, const void*, uint64_t) OMNITRACE_PUBLIC_API; + void kokkosp_end_deep_copy() OMNITRACE_PUBLIC_API; + void kokkosp_profile_event(const char*) OMNITRACE_PUBLIC_API; + void kokkosp_dual_view_sync(const char*, const void* const, + bool) OMNITRACE_PUBLIC_API; + void kokkosp_dual_view_modify(const char*, const void* const, + bool) OMNITRACE_PUBLIC_API; + + // OpenMP Tools (OMPT) # if OMNITRACE_USE_OMPT > 0 struct ompt_start_tool_result_t; ompt_start_tool_result_t* ompt_start_tool(unsigned int, const char*) OMNITRACE_PUBLIC_API; # endif + +# if OMNITRACE_USE_ROCTRACER > 0 + // HSA + struct HsaApiTable; + bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) OMNITRACE_PUBLIC_API; + void OnUnload() OMNITRACE_PUBLIC_API; +# endif #endif } diff --git a/source/lib/omnitrace/library/kokkosp.cpp b/source/lib/omnitrace/library/kokkosp.cpp index d0714eeef5..6dc0578cf5 100644 --- a/source/lib/omnitrace/library/kokkosp.cpp +++ b/source/lib/omnitrace/library/kokkosp.cpp @@ -28,9 +28,11 @@ #include "library/components/user_region.hpp" #include "library/config.hpp" #include "library/debug.hpp" +#include "library/perfetto.hpp" #include "library/runtime.hpp" #include +#include namespace kokkosp = tim::kokkosp; @@ -80,6 +82,19 @@ std::vector _initialize_arguments = {}; extern "C" { + struct Kokkos_Tools_ToolSettings + { + bool requires_global_fencing; + bool padding[255]; + }; + + void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API; + void kokkosp_dual_view_sync(const char*, const void* const, + bool) OMNITRACE_PUBLIC_API; + void kokkosp_dual_view_modify(const char*, const void* const, + bool) OMNITRACE_PUBLIC_API; + void kokkosp_print_help(char*) {} void kokkosp_parse_args(int argc, char** argv) @@ -108,6 +123,12 @@ extern "C" tim::manager::add_metadata(key, value); } + void kokkosp_request_tool_settings(const uint32_t _version, + Kokkos_Tools_ToolSettings* _settings) + { + if(_version > 0) _settings->requires_global_fencing = false; + } + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { @@ -117,7 +138,11 @@ extern "C" if(_standalone_initialized || (!omnitrace::config::settings_are_configured() && omnitrace::get_state() < omnitrace::State::Active)) { - OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace...\n"); + OMNITRACE_BASIC_VERBOSE_F(0, + "Initializing kokkos omnitrace connector " + "(standalone, sequence %d, version: %llu)...\n", + loadSeq, (unsigned long long) interfaceVer); + OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace (standalone)... "); auto _mode = tim::get_env("OMNITRACE_MODE", "trace"); auto _arg0 = (_initialize_arguments.empty()) ? std::string{ "unknown" } : _initialize_arguments.at(0); @@ -125,19 +150,23 @@ extern "C" _standalone_initialized = true; omnitrace_set_mpi_hidden(false, false); omnitrace_init_hidden(_mode.c_str(), false, _arg0.c_str()); - omnitrace_push_trace("kokkos_main"); + omnitrace_push_trace_hidden("kokkos_main"); + } + else + { + OMNITRACE_VERBOSE_F(0, + "Initializing kokkos omnitrace connector " + "(sequence %d, version: %llu)... ", + loadSeq, (unsigned long long) interfaceVer); } - - OMNITRACE_VERBOSE_F(0, - "Initializing connector (sequence is %d, version: %llu)...", - loadSeq, (unsigned long long) interfaceVer); setup_kernel_logger(); tim::trait::runtime_enabled::set( omnitrace::config::get_use_timemory()); - if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n"); + if(_standalone_initialized && omnitrace::get_verbose() >= 0) + fprintf(stderr, "Done\n"); } void kokkosp_finalize_library() @@ -145,13 +174,14 @@ extern "C" OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); if(_standalone_initialized) { - omnitrace_pop_trace("kokkos_main"); - OMNITRACE_VERBOSE_F(0, "Finalizing connector (standalone)...\n"); + omnitrace_pop_trace_hidden("kokkos_main"); + OMNITRACE_VERBOSE_F( + 0, "Finalizing kokkos omnitrace connector (standalone)...\n"); omnitrace_finalize_hidden(); } else { - OMNITRACE_VERBOSE_F(0, "Finalizing connector... "); + OMNITRACE_VERBOSE_F(0, "Finalizing kokkos omnitrace connector... "); kokkosp::cleanup(); if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n"); } @@ -253,6 +283,7 @@ extern "C" void kokkosp_push_profile_region(const char* name) { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(1, __FUNCTION__, name); kokkosp::get_profiler_stack().push_back( @@ -262,6 +293,7 @@ extern "C" void kokkosp_pop_profile_region() { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__); if(kokkosp::get_profiler_stack().empty()) @@ -274,6 +306,7 @@ extern "C" void kokkosp_create_profile_section(const char* name, uint32_t* secid) { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); *secid = kokkosp::get_unique_id(); auto pname = TIMEMORY_JOIN(" ", "[kokkos]", name); @@ -282,6 +315,7 @@ extern "C" void kokkosp_destroy_profile_section(uint32_t secid) { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::destroy_profiler(secid); } @@ -290,6 +324,7 @@ extern "C" void kokkosp_start_profile_section(uint32_t secid) { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(1, __FUNCTION__, secid); kokkosp::start_profiler(secid); @@ -297,6 +332,7 @@ extern "C" void kokkosp_stop_profile_section(uint32_t secid) { + if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); kokkosp::logger_t{}.mark(-1, __FUNCTION__, secid); kokkosp::start_profiler(secid); @@ -369,6 +405,32 @@ extern "C" } //----------------------------------------------------------------------------------// + + void kokkosp_dual_view_sync(const char* label, const void* const, bool is_device) + { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + if(omnitrace::config::get_use_perfetto()) + { + auto _name = tim::get_hash_identifier_fast( + tim::add_hash_id(TIMEMORY_JOIN(" ", "[kokkos][dual_view_sync]", label))); + TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() }, + "target", (is_device) ? "device" : "host"); + } + } + + void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device) + { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + if(omnitrace::config::get_use_perfetto()) + { + auto _name = tim::get_hash_identifier_fast(tim::add_hash_id( + TIMEMORY_JOIN(" ", "[kokkos][dual_view_modify]", label))); + TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() }, + "target", (is_device) ? "device" : "host"); + } + } + + //----------------------------------------------------------------------------------// } TIMEMORY_INITIALIZE_STORAGE(kokkosp::memory_tracker)