Support omnitrace-dl as kokkos profile library (#37)

- add OnLoad and OnUnload to omnitrace-dl
- disable global fence for kokkos profiling tools
- tweak omnitrace_strip_target to use wildcards
- added dl-gen.py script for generating dlopen bindings
- added support for kokkosp_request_tool_settings
- added support for kokkosp_dual_view_sync
- added support for kokkosp_dual_view_modify
This commit is contained in:
Jonathan R. Madsen
2022-06-10 18:54:22 -05:00
committato da GitHub
parent b50a13c87e
commit ab395f86c4
5 ha cambiato i file con 568 aggiunte e 21 eliminazioni
+5 -11
Vedi File
@@ -118,20 +118,14 @@ function(OMNITRACE_STRIP_TARGET _TARGET)
TARGET ${_TARGET}
POST_BUILD
COMMAND
${CMAKE_STRIP} --keep-symbol="omnitrace_init"
${CMAKE_STRIP} -w --keep-symbol="omnitrace_init"
--keep-symbol="omnitrace_finalize" --keep-symbol="omnitrace_push_trace"
--keep-symbol="omnitrace_pop_trace" --keep-symbol="omnitrace_push_region"
--keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env"
--keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_configure"
--keep-symbol="omnitrace_user_get_callbacks"
--keep-symbol="omnitrace_user_error_string"
--keep-symbol="omnitrace_user_start_trace"
--keep-symbol="omnitrace_user_stop_trace"
--keep-symbol="omnitrace_user_start_thread_trace"
--keep-symbol="omnitrace_user_stop_thread_trace"
--keep-symbol="omnitrace_user_push_region"
--keep-symbol="omnitrace_user_pop_region" --keep-symbol="ompt_start_tool"
${ARGN} $<TARGET_FILE:${_TARGET}>
--keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_*"
--keep-symbol="ompt_start_tool" --keep-symbol="kokkosp_*"
--keep-symbol="OnLoad" --keep-symbol="OnUnload" ${ARGN}
$<TARGET_FILE:${_TARGET}>
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Stripping ${_TARGET}...")
endif()
+143
Vedi File
@@ -0,0 +1,143 @@
#!/usr/bin/env python3
import os
import sys
import glob
"""
This script reads in function prototypes can generates the implementation pieces
needed to dlsym the function in libomnitrace
Example input file:
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names);
void OnUnload();
generates:
##### declaration:
bool OnLoad(HsaApiTable*, uint64_t, uint64_t, const char* const*) OMNITRACE_PUBLIC_API;
void OnUnload() OMNITRACE_PUBLIC_API;
##### dlsym:
OMNITRACE_DLSYM(OnLoad_f, m_omnihandle, "OnLoad");
OMNITRACE_DLSYM(OnUnload_f, m_omnihandle, "OnUnload");
##### member variables:
bool (*OnLoad_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr;
void (*OnUnload_f)() = nullptr;
##### callers:
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names)
{
return OMNITRACE_DL_INVOKE(get_indirect().OnLoad_f, table, runtime_version, failed_tool_count, failed_tool_names);
}
void OnUnload()
{
return OMNITRACE_DL_INVOKE(get_indirect().OnUnload_f);
}
"""
class function:
def __init__(self, _f):
self.return_type = _f.split(" ", 1)[0]
_f = "".join(_f.split(" ", 1)[1:])
self.func_name = _f.split("(", 1)[0]
_f = "".join(_f.split("(", 1)[1:]).rstrip(")")
self.params = [x.strip() for x in _f.split(",")]
self.param_types = []
self.param_names = []
for itr in self.params:
_fields = itr.split(" ")
_len = len(_fields)
self.param_types.append(" ".join(_fields[0 : (_len - 1)]))
self.param_names.append(_fields[-1])
def valid(self):
return len(self.func_name) > 0
def member_variables(self):
return " {} (*{}_f)({}) = nullptr;".format(
self.return_type, self.func_name, ", ".join(self.param_types)
)
def function_decl(self):
return " {} {}({}) OMNITRACE_PUBLIC_API;".format(
self.return_type, self.func_name, ", ".join(self.param_types)
)
def dlsym_function(self):
return ' OMNITRACE_DLSYM({0}_f, m_omnihandle, "{0}");'.format(self.func_name)
def call_dlsym_function(self):
_param_names = ", ".join(self.param_names)
if _param_names and _param_names != ", ":
_param_names = f", {_param_names}"
return " {} {}({})\n {}\n return OMNITRACE_DL_INVOKE(get_indirect().{}_f{});\n {}".format(
self.return_type,
self.func_name,
", ".join(self.params),
"{",
self.func_name,
_param_names,
"}",
)
def run(fname):
with open(fname, "r") as f:
_str = ""
for itr in f.read():
_str += itr.replace("\n", " ")
while " " in _str:
_str = _str.replace(" ", " ")
data = [x.strip(" ") for x in _str.split(";")]
funcs = []
for itr in data:
f = function(itr)
if f.valid():
funcs.append(f)
return funcs
if __name__ == "__main__":
funcs = []
for inp in sys.argv[1:]:
if os.path.exists(inp):
funcs += run(inp)
else:
for itr in glob.glob(f"{inp}*"):
if os.path.exists(itr):
funcs += run(itr)
else:
printf(f"No file matched {itr}")
if funcs:
print(f"\n##### declaration:\n")
for itr in funcs:
print("{}".format(itr.function_decl()))
print(f"\n##### dlsym:\n")
for itr in funcs:
print("{}".format(itr.dlsym_function()))
print(f"\n##### member variables:\n")
for itr in funcs:
print("{}".format(itr.member_variables()))
print(f"\n##### callers:")
for itr in funcs:
print("")
print("{}".format(itr.call_dlsym_function()))
print("")
+285
Vedi File
@@ -55,6 +55,13 @@
//--------------------------------------------------------------------------------------//
std::ostream&
operator<<(std::ostream& _os, const SpaceHandle& _handle)
{
_os << _handle.name;
return _os;
}
namespace omnitrace
{
inline namespace dl
@@ -206,6 +213,57 @@ struct OMNITRACE_HIDDEN_API indirect
OMNITRACE_DLSYM(omnitrace_register_coverage_f, m_omnihandle,
"omnitrace_register_coverage");
OMNITRACE_DLSYM(kokkosp_print_help_f, m_omnihandle, "kokkosp_print_help");
OMNITRACE_DLSYM(kokkosp_parse_args_f, m_omnihandle, "kokkosp_parse_args");
OMNITRACE_DLSYM(kokkosp_declare_metadata_f, m_omnihandle,
"kokkosp_declare_metadata");
OMNITRACE_DLSYM(kokkosp_request_tool_settings_f, m_omnihandle,
"kokkosp_request_tool_settings");
OMNITRACE_DLSYM(kokkosp_init_library_f, m_omnihandle, "kokkosp_init_library");
OMNITRACE_DLSYM(kokkosp_finalize_library_f, m_omnihandle,
"kokkosp_finalize_library");
OMNITRACE_DLSYM(kokkosp_begin_parallel_for_f, m_omnihandle,
"kokkosp_begin_parallel_for");
OMNITRACE_DLSYM(kokkosp_end_parallel_for_f, m_omnihandle,
"kokkosp_end_parallel_for");
OMNITRACE_DLSYM(kokkosp_begin_parallel_reduce_f, m_omnihandle,
"kokkosp_begin_parallel_reduce");
OMNITRACE_DLSYM(kokkosp_end_parallel_reduce_f, m_omnihandle,
"kokkosp_end_parallel_reduce");
OMNITRACE_DLSYM(kokkosp_begin_parallel_scan_f, m_omnihandle,
"kokkosp_begin_parallel_scan");
OMNITRACE_DLSYM(kokkosp_end_parallel_scan_f, m_omnihandle,
"kokkosp_end_parallel_scan");
OMNITRACE_DLSYM(kokkosp_begin_fence_f, m_omnihandle, "kokkosp_begin_fence");
OMNITRACE_DLSYM(kokkosp_end_fence_f, m_omnihandle, "kokkosp_end_fence");
OMNITRACE_DLSYM(kokkosp_push_profile_region_f, m_omnihandle,
"kokkosp_push_profile_region");
OMNITRACE_DLSYM(kokkosp_pop_profile_region_f, m_omnihandle,
"kokkosp_pop_profile_region");
OMNITRACE_DLSYM(kokkosp_create_profile_section_f, m_omnihandle,
"kokkosp_create_profile_section");
OMNITRACE_DLSYM(kokkosp_destroy_profile_section_f, m_omnihandle,
"kokkosp_destroy_profile_section");
OMNITRACE_DLSYM(kokkosp_start_profile_section_f, m_omnihandle,
"kokkosp_start_profile_section");
OMNITRACE_DLSYM(kokkosp_stop_profile_section_f, m_omnihandle,
"kokkosp_stop_profile_section");
OMNITRACE_DLSYM(kokkosp_allocate_data_f, m_omnihandle, "kokkosp_allocate_data");
OMNITRACE_DLSYM(kokkosp_deallocate_data_f, m_omnihandle,
"kokkosp_deallocate_data");
OMNITRACE_DLSYM(kokkosp_begin_deep_copy_f, m_omnihandle,
"kokkosp_begin_deep_copy");
OMNITRACE_DLSYM(kokkosp_end_deep_copy_f, m_omnihandle, "kokkosp_end_deep_copy");
OMNITRACE_DLSYM(kokkosp_profile_event_f, m_omnihandle, "kokkosp_profile_event");
OMNITRACE_DLSYM(kokkosp_dual_view_sync_f, m_omnihandle, "kokkosp_dual_view_sync");
OMNITRACE_DLSYM(kokkosp_dual_view_modify_f, m_omnihandle,
"kokkosp_dual_view_modify");
#if OMNITRACE_USE_ROCTRACER > 0
OMNITRACE_DLSYM(hsa_on_load_f, m_omnihandle, "OnLoad");
OMNITRACE_DLSYM(hsa_on_unload_f, m_omnihandle, "OnUnload");
#endif
#if OMNITRACE_USE_OMPT == 0
_warn_verbose = 5;
#else
@@ -256,6 +314,7 @@ struct OMNITRACE_HIDDEN_API indirect
}
public:
// omnitrace functions
void (*omnitrace_init_library_f)(void) = nullptr;
void (*omnitrace_init_f)(const char*, bool, const char*) = nullptr;
void (*omnitrace_finalize_f)(void) = nullptr;
@@ -269,6 +328,48 @@ public:
int (*omnitrace_push_region_f)(const char*) = nullptr;
int (*omnitrace_pop_region_f)(const char*) = nullptr;
int (*omnitrace_user_configure_f)(int, void*, void*) = nullptr;
// KokkosP functions
void (*kokkosp_print_help_f)(char*) = nullptr;
void (*kokkosp_parse_args_f)(int, char**) = nullptr;
void (*kokkosp_declare_metadata_f)(const char*, const char*) = nullptr;
void (*kokkosp_request_tool_settings_f)(const uint32_t,
Kokkos_Tools_ToolSettings*) = nullptr;
void (*kokkosp_init_library_f)(const int, const uint64_t, const uint32_t,
void*) = nullptr;
void (*kokkosp_finalize_library_f)() = nullptr;
void (*kokkosp_begin_parallel_for_f)(const char*, uint32_t, uint64_t*) = nullptr;
void (*kokkosp_end_parallel_for_f)(uint64_t) = nullptr;
void (*kokkosp_begin_parallel_reduce_f)(const char*, uint32_t, uint64_t*) = nullptr;
void (*kokkosp_end_parallel_reduce_f)(uint64_t) = nullptr;
void (*kokkosp_begin_parallel_scan_f)(const char*, uint32_t, uint64_t*) = nullptr;
void (*kokkosp_end_parallel_scan_f)(uint64_t) = nullptr;
void (*kokkosp_begin_fence_f)(const char*, uint32_t, uint64_t*) = nullptr;
void (*kokkosp_end_fence_f)(uint64_t) = nullptr;
void (*kokkosp_push_profile_region_f)(const char*) = nullptr;
void (*kokkosp_pop_profile_region_f)() = nullptr;
void (*kokkosp_create_profile_section_f)(const char*, uint32_t*) = nullptr;
void (*kokkosp_destroy_profile_section_f)(uint32_t) = nullptr;
void (*kokkosp_start_profile_section_f)(uint32_t) = nullptr;
void (*kokkosp_stop_profile_section_f)(uint32_t) = nullptr;
void (*kokkosp_allocate_data_f)(const SpaceHandle, const char*, const void* const,
const uint64_t) = nullptr;
void (*kokkosp_deallocate_data_f)(const SpaceHandle, const char*, const void* const,
const uint64_t) = nullptr;
void (*kokkosp_begin_deep_copy_f)(SpaceHandle, const char*, const void*, SpaceHandle,
const char*, const void*, uint64_t) = nullptr;
void (*kokkosp_end_deep_copy_f)() = nullptr;
void (*kokkosp_profile_event_f)(const char*) = nullptr;
void (*kokkosp_dual_view_sync_f)(const char*, const void* const, bool) = nullptr;
void (*kokkosp_dual_view_modify_f)(const char*, const void* const, bool) = nullptr;
// HSA functions
#if OMNITRACE_USE_ROCTRACER > 0
bool (*hsa_on_load_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr;
void (*hsa_on_unload_f)() = nullptr;
#endif
// OpenMP functions
#if defined(OMNITRACE_USE_OMPT) && OMNITRACE_USE_OMPT > 0
ompt_start_tool_result_t* (*ompt_start_tool_f)(unsigned int, const char*);
#endif
@@ -556,6 +657,190 @@ extern "C"
return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_pop_region_f, name);
}
//----------------------------------------------------------------------------------//
//
// KokkosP
//
//----------------------------------------------------------------------------------//
void kokkosp_print_help(char* argv0)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_print_help_f, argv0);
}
void kokkosp_parse_args(int argc, char** argv)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_parse_args_f, argc, argv);
}
void kokkosp_declare_metadata(const char* key, const char* value)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_declare_metadata_f, key, value);
}
void kokkosp_request_tool_settings(const uint32_t version,
Kokkos_Tools_ToolSettings* settings)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_request_tool_settings_f,
version, settings);
}
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_init_library_f, loadSeq,
interfaceVer, devInfoCount, deviceInfo);
}
void kokkosp_finalize_library()
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_finalize_library_f);
}
void kokkosp_begin_parallel_for(const char* name, uint32_t devid, uint64_t* kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_for_f, name,
devid, kernid);
}
void kokkosp_end_parallel_for(uint64_t kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_for_f, kernid);
}
void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid, uint64_t* kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_reduce_f, name,
devid, kernid);
}
void kokkosp_end_parallel_reduce(uint64_t kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_reduce_f, kernid);
}
void kokkosp_begin_parallel_scan(const char* name, uint32_t devid, uint64_t* kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_scan_f, name,
devid, kernid);
}
void kokkosp_end_parallel_scan(uint64_t kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_scan_f, kernid);
}
void kokkosp_begin_fence(const char* name, uint32_t devid, uint64_t* kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_fence_f, name, devid,
kernid);
}
void kokkosp_end_fence(uint64_t kernid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_fence_f, kernid);
}
void kokkosp_push_profile_region(const char* name)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_push_profile_region_f, name);
}
void kokkosp_pop_profile_region()
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_pop_profile_region_f);
}
void kokkosp_create_profile_section(const char* name, uint32_t* secid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_create_profile_section_f, name,
secid);
}
void kokkosp_destroy_profile_section(uint32_t secid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_destroy_profile_section_f,
secid);
}
void kokkosp_start_profile_section(uint32_t secid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_start_profile_section_f, secid);
}
void kokkosp_stop_profile_section(uint32_t secid)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_stop_profile_section_f, secid);
}
void kokkosp_allocate_data(const SpaceHandle space, const char* label,
const void* const ptr, const uint64_t size)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_allocate_data_f, space, label,
ptr, size);
}
void kokkosp_deallocate_data(const SpaceHandle space, const char* label,
const void* const ptr, const uint64_t size)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_deallocate_data_f, space, label,
ptr, size);
}
void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char* dst_name,
const void* dst_ptr, SpaceHandle src_handle,
const char* src_name, const void* src_ptr, uint64_t size)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_deep_copy_f, dst_handle,
dst_name, dst_ptr, src_handle, src_name, src_ptr,
size);
}
void kokkosp_end_deep_copy()
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_deep_copy_f);
}
void kokkosp_profile_event(const char* name)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_profile_event_f, name);
}
void kokkosp_dual_view_sync(const char* label, const void* const data, bool is_device)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_sync_f, label, data,
is_device);
}
void kokkosp_dual_view_modify(const char* label, const void* const data,
bool is_device)
{
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_modify_f, label, data,
is_device);
}
//----------------------------------------------------------------------------------//
//
// HSA
//
//----------------------------------------------------------------------------------//
#if OMNITRACE_USE_ROCTRACER > 0
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names)
{
return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_load_f, table, runtime_version,
failed_tool_count, failed_tool_names);
}
void OnUnload() { return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_unload_f); }
#endif
//----------------------------------------------------------------------------------//
//
// OMPT
//
//----------------------------------------------------------------------------------//
#if OMNITRACE_USE_OMPT > 0
ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version,
const char* runtime_version)
+63
Vedi File
@@ -53,6 +53,10 @@
# define OMNITRACE_USE_OMPT 0
#endif
#if !defined(OMNITRACE_USE_ROCTRACER)
# define OMNITRACE_USE_ROCTRACER 0
#endif
//--------------------------------------------------------------------------------------//
//
// omnitrace symbols
@@ -87,12 +91,71 @@ extern "C"
int omnitrace_user_push_region_dl(const char*) OMNITRACE_HIDDEN_API;
int omnitrace_user_pop_region_dl(const char*) OMNITRACE_HIDDEN_API;
// KokkosP
struct OMNITRACE_HIDDEN_API SpaceHandle
{
char name[64];
};
struct OMNITRACE_HIDDEN_API Kokkos_Tools_ToolSettings
{
bool requires_global_fencing;
bool padding[255];
};
void kokkosp_print_help(char*) OMNITRACE_PUBLIC_API;
void kokkosp_parse_args(int, char**) OMNITRACE_PUBLIC_API;
void kokkosp_declare_metadata(const char*, const char*) OMNITRACE_PUBLIC_API;
void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API;
void kokkosp_init_library(const int, const uint64_t, const uint32_t,
void*) OMNITRACE_PUBLIC_API;
void kokkosp_finalize_library() OMNITRACE_PUBLIC_API;
void kokkosp_begin_parallel_for(const char*, uint32_t,
uint64_t*) OMNITRACE_PUBLIC_API;
void kokkosp_end_parallel_for(uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_begin_parallel_reduce(const char*, uint32_t,
uint64_t*) OMNITRACE_PUBLIC_API;
void kokkosp_end_parallel_reduce(uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_begin_parallel_scan(const char*, uint32_t,
uint64_t*) OMNITRACE_PUBLIC_API;
void kokkosp_end_parallel_scan(uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_begin_fence(const char*, uint32_t, uint64_t*) OMNITRACE_PUBLIC_API;
void kokkosp_end_fence(uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_push_profile_region(const char*) OMNITRACE_PUBLIC_API;
void kokkosp_pop_profile_region() OMNITRACE_PUBLIC_API;
void kokkosp_create_profile_section(const char*, uint32_t*) OMNITRACE_PUBLIC_API;
void kokkosp_destroy_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
void kokkosp_start_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
void kokkosp_stop_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
void kokkosp_allocate_data(const SpaceHandle, const char*, const void* const,
const uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_deallocate_data(const SpaceHandle, const char*, const void* const,
const uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_begin_deep_copy(SpaceHandle, const char*, const void*, SpaceHandle,
const char*, const void*, uint64_t) OMNITRACE_PUBLIC_API;
void kokkosp_end_deep_copy() OMNITRACE_PUBLIC_API;
void kokkosp_profile_event(const char*) OMNITRACE_PUBLIC_API;
void kokkosp_dual_view_sync(const char*, const void* const,
bool) OMNITRACE_PUBLIC_API;
void kokkosp_dual_view_modify(const char*, const void* const,
bool) OMNITRACE_PUBLIC_API;
// OpenMP Tools (OMPT)
# if OMNITRACE_USE_OMPT > 0
struct ompt_start_tool_result_t;
ompt_start_tool_result_t* ompt_start_tool(unsigned int,
const char*) OMNITRACE_PUBLIC_API;
# endif
# if OMNITRACE_USE_ROCTRACER > 0
// HSA
struct HsaApiTable;
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) OMNITRACE_PUBLIC_API;
void OnUnload() OMNITRACE_PUBLIC_API;
# endif
#endif
}
+72 -10
Vedi File
@@ -28,9 +28,11 @@
#include "library/components/user_region.hpp"
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/perfetto.hpp"
#include "library/runtime.hpp"
#include <timemory/api/kokkosp.hpp>
#include <timemory/hash/types.hpp>
namespace kokkosp = tim::kokkosp;
@@ -80,6 +82,19 @@ std::vector<std::string> _initialize_arguments = {};
extern "C"
{
struct Kokkos_Tools_ToolSettings
{
bool requires_global_fencing;
bool padding[255];
};
void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API;
void kokkosp_dual_view_sync(const char*, const void* const,
bool) OMNITRACE_PUBLIC_API;
void kokkosp_dual_view_modify(const char*, const void* const,
bool) OMNITRACE_PUBLIC_API;
void kokkosp_print_help(char*) {}
void kokkosp_parse_args(int argc, char** argv)
@@ -108,6 +123,12 @@ extern "C"
tim::manager::add_metadata(key, value);
}
void kokkosp_request_tool_settings(const uint32_t _version,
Kokkos_Tools_ToolSettings* _settings)
{
if(_version > 0) _settings->requires_global_fencing = false;
}
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo)
{
@@ -117,7 +138,11 @@ extern "C"
if(_standalone_initialized || (!omnitrace::config::settings_are_configured() &&
omnitrace::get_state() < omnitrace::State::Active))
{
OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace...\n");
OMNITRACE_BASIC_VERBOSE_F(0,
"Initializing kokkos omnitrace connector "
"(standalone, sequence %d, version: %llu)...\n",
loadSeq, (unsigned long long) interfaceVer);
OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace (standalone)... ");
auto _mode = tim::get_env<std::string>("OMNITRACE_MODE", "trace");
auto _arg0 = (_initialize_arguments.empty()) ? std::string{ "unknown" }
: _initialize_arguments.at(0);
@@ -125,19 +150,23 @@ extern "C"
_standalone_initialized = true;
omnitrace_set_mpi_hidden(false, false);
omnitrace_init_hidden(_mode.c_str(), false, _arg0.c_str());
omnitrace_push_trace("kokkos_main");
omnitrace_push_trace_hidden("kokkos_main");
}
else
{
OMNITRACE_VERBOSE_F(0,
"Initializing kokkos omnitrace connector "
"(sequence %d, version: %llu)... ",
loadSeq, (unsigned long long) interfaceVer);
}
OMNITRACE_VERBOSE_F(0,
"Initializing connector (sequence is %d, version: %llu)...",
loadSeq, (unsigned long long) interfaceVer);
setup_kernel_logger();
tim::trait::runtime_enabled<kokkosp::memory_tracker>::set(
omnitrace::config::get_use_timemory());
if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n");
if(_standalone_initialized && omnitrace::get_verbose() >= 0)
fprintf(stderr, "Done\n");
}
void kokkosp_finalize_library()
@@ -145,13 +174,14 @@ extern "C"
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
if(_standalone_initialized)
{
omnitrace_pop_trace("kokkos_main");
OMNITRACE_VERBOSE_F(0, "Finalizing connector (standalone)...\n");
omnitrace_pop_trace_hidden("kokkos_main");
OMNITRACE_VERBOSE_F(
0, "Finalizing kokkos omnitrace connector (standalone)...\n");
omnitrace_finalize_hidden();
}
else
{
OMNITRACE_VERBOSE_F(0, "Finalizing connector... ");
OMNITRACE_VERBOSE_F(0, "Finalizing kokkos omnitrace connector... ");
kokkosp::cleanup();
if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n");
}
@@ -253,6 +283,7 @@ extern "C"
void kokkosp_push_profile_region(const char* name)
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
kokkosp::logger_t{}.mark(1, __FUNCTION__, name);
kokkosp::get_profiler_stack<omnitrace::component::user_region>().push_back(
@@ -262,6 +293,7 @@ extern "C"
void kokkosp_pop_profile_region()
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
kokkosp::logger_t{}.mark(-1, __FUNCTION__);
if(kokkosp::get_profiler_stack<omnitrace::component::user_region>().empty())
@@ -274,6 +306,7 @@ extern "C"
void kokkosp_create_profile_section(const char* name, uint32_t* secid)
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
*secid = kokkosp::get_unique_id();
auto pname = TIMEMORY_JOIN(" ", "[kokkos]", name);
@@ -282,6 +315,7 @@ extern "C"
void kokkosp_destroy_profile_section(uint32_t secid)
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
kokkosp::destroy_profiler<omnitrace::component::user_region>(secid);
}
@@ -290,6 +324,7 @@ extern "C"
void kokkosp_start_profile_section(uint32_t secid)
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
kokkosp::logger_t{}.mark(1, __FUNCTION__, secid);
kokkosp::start_profiler<omnitrace::component::user_region>(secid);
@@ -297,6 +332,7 @@ extern "C"
void kokkosp_stop_profile_section(uint32_t secid)
{
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
kokkosp::logger_t{}.mark(-1, __FUNCTION__, secid);
kokkosp::start_profiler<omnitrace::component::user_region>(secid);
@@ -369,6 +405,32 @@ extern "C"
}
//----------------------------------------------------------------------------------//
void kokkosp_dual_view_sync(const char* label, const void* const, bool is_device)
{
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
if(omnitrace::config::get_use_perfetto())
{
auto _name = tim::get_hash_identifier_fast(
tim::add_hash_id(TIMEMORY_JOIN(" ", "[kokkos][dual_view_sync]", label)));
TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() },
"target", (is_device) ? "device" : "host");
}
}
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
{
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
if(omnitrace::config::get_use_perfetto())
{
auto _name = tim::get_hash_identifier_fast(tim::add_hash_id(
TIMEMORY_JOIN(" ", "[kokkos][dual_view_modify]", label)));
TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() },
"target", (is_device) ? "device" : "host");
}
}
//----------------------------------------------------------------------------------//
}
TIMEMORY_INITIALIZE_STORAGE(kokkosp::memory_tracker)