Support omnitrace-dl as kokkos profile library (#37)
- add OnLoad and OnUnload to omnitrace-dl
- disable global fence for kokkos profiling tools
- tweak omnitrace_strip_target to use wildcards
- added dl-gen.py script for generating dlopen bindings
- added support for kokkosp_request_tool_settings
- added support for kokkosp_dual_view_sync
- added support for kokkosp_dual_view_modify
[ROCm/rocprofiler-systems commit: ab395f86c4]
This commit is contained in:
committed by
GitHub
parent
1b91d1ad11
commit
ad9fd4b7ec
@@ -118,20 +118,14 @@ function(OMNITRACE_STRIP_TARGET _TARGET)
|
||||
TARGET ${_TARGET}
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_STRIP} --keep-symbol="omnitrace_init"
|
||||
${CMAKE_STRIP} -w --keep-symbol="omnitrace_init"
|
||||
--keep-symbol="omnitrace_finalize" --keep-symbol="omnitrace_push_trace"
|
||||
--keep-symbol="omnitrace_pop_trace" --keep-symbol="omnitrace_push_region"
|
||||
--keep-symbol="omnitrace_pop_region" --keep-symbol="omnitrace_set_env"
|
||||
--keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_configure"
|
||||
--keep-symbol="omnitrace_user_get_callbacks"
|
||||
--keep-symbol="omnitrace_user_error_string"
|
||||
--keep-symbol="omnitrace_user_start_trace"
|
||||
--keep-symbol="omnitrace_user_stop_trace"
|
||||
--keep-symbol="omnitrace_user_start_thread_trace"
|
||||
--keep-symbol="omnitrace_user_stop_thread_trace"
|
||||
--keep-symbol="omnitrace_user_push_region"
|
||||
--keep-symbol="omnitrace_user_pop_region" --keep-symbol="ompt_start_tool"
|
||||
${ARGN} $<TARGET_FILE:${_TARGET}>
|
||||
--keep-symbol="omnitrace_set_mpi" --keep-symbol="omnitrace_user_*"
|
||||
--keep-symbol="ompt_start_tool" --keep-symbol="kokkosp_*"
|
||||
--keep-symbol="OnLoad" --keep-symbol="OnUnload" ${ARGN}
|
||||
$<TARGET_FILE:${_TARGET}>
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Stripping ${_TARGET}...")
|
||||
endif()
|
||||
|
||||
+143
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
|
||||
"""
|
||||
This script reads in function prototypes can generates the implementation pieces
|
||||
needed to dlsym the function in libomnitrace
|
||||
|
||||
Example input file:
|
||||
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names);
|
||||
void OnUnload();
|
||||
|
||||
generates:
|
||||
|
||||
##### declaration:
|
||||
|
||||
bool OnLoad(HsaApiTable*, uint64_t, uint64_t, const char* const*) OMNITRACE_PUBLIC_API;
|
||||
void OnUnload() OMNITRACE_PUBLIC_API;
|
||||
|
||||
##### dlsym:
|
||||
|
||||
OMNITRACE_DLSYM(OnLoad_f, m_omnihandle, "OnLoad");
|
||||
OMNITRACE_DLSYM(OnUnload_f, m_omnihandle, "OnUnload");
|
||||
|
||||
##### member variables:
|
||||
|
||||
bool (*OnLoad_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr;
|
||||
void (*OnUnload_f)() = nullptr;
|
||||
|
||||
##### callers:
|
||||
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().OnLoad_f, table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
}
|
||||
|
||||
void OnUnload()
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().OnUnload_f);
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class function:
|
||||
def __init__(self, _f):
|
||||
self.return_type = _f.split(" ", 1)[0]
|
||||
_f = "".join(_f.split(" ", 1)[1:])
|
||||
self.func_name = _f.split("(", 1)[0]
|
||||
_f = "".join(_f.split("(", 1)[1:]).rstrip(")")
|
||||
self.params = [x.strip() for x in _f.split(",")]
|
||||
self.param_types = []
|
||||
self.param_names = []
|
||||
for itr in self.params:
|
||||
_fields = itr.split(" ")
|
||||
_len = len(_fields)
|
||||
self.param_types.append(" ".join(_fields[0 : (_len - 1)]))
|
||||
self.param_names.append(_fields[-1])
|
||||
|
||||
def valid(self):
|
||||
return len(self.func_name) > 0
|
||||
|
||||
def member_variables(self):
|
||||
return " {} (*{}_f)({}) = nullptr;".format(
|
||||
self.return_type, self.func_name, ", ".join(self.param_types)
|
||||
)
|
||||
|
||||
def function_decl(self):
|
||||
return " {} {}({}) OMNITRACE_PUBLIC_API;".format(
|
||||
self.return_type, self.func_name, ", ".join(self.param_types)
|
||||
)
|
||||
|
||||
def dlsym_function(self):
|
||||
return ' OMNITRACE_DLSYM({0}_f, m_omnihandle, "{0}");'.format(self.func_name)
|
||||
|
||||
def call_dlsym_function(self):
|
||||
_param_names = ", ".join(self.param_names)
|
||||
if _param_names and _param_names != ", ":
|
||||
_param_names = f", {_param_names}"
|
||||
return " {} {}({})\n {}\n return OMNITRACE_DL_INVOKE(get_indirect().{}_f{});\n {}".format(
|
||||
self.return_type,
|
||||
self.func_name,
|
||||
", ".join(self.params),
|
||||
"{",
|
||||
self.func_name,
|
||||
_param_names,
|
||||
"}",
|
||||
)
|
||||
|
||||
|
||||
def run(fname):
|
||||
with open(fname, "r") as f:
|
||||
_str = ""
|
||||
for itr in f.read():
|
||||
_str += itr.replace("\n", " ")
|
||||
|
||||
while " " in _str:
|
||||
_str = _str.replace(" ", " ")
|
||||
data = [x.strip(" ") for x in _str.split(";")]
|
||||
|
||||
funcs = []
|
||||
for itr in data:
|
||||
f = function(itr)
|
||||
if f.valid():
|
||||
funcs.append(f)
|
||||
|
||||
return funcs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
funcs = []
|
||||
for inp in sys.argv[1:]:
|
||||
if os.path.exists(inp):
|
||||
funcs += run(inp)
|
||||
else:
|
||||
for itr in glob.glob(f"{inp}*"):
|
||||
if os.path.exists(itr):
|
||||
funcs += run(itr)
|
||||
else:
|
||||
printf(f"No file matched {itr}")
|
||||
|
||||
if funcs:
|
||||
print(f"\n##### declaration:\n")
|
||||
for itr in funcs:
|
||||
print("{}".format(itr.function_decl()))
|
||||
|
||||
print(f"\n##### dlsym:\n")
|
||||
for itr in funcs:
|
||||
print("{}".format(itr.dlsym_function()))
|
||||
|
||||
print(f"\n##### member variables:\n")
|
||||
for itr in funcs:
|
||||
print("{}".format(itr.member_variables()))
|
||||
|
||||
print(f"\n##### callers:")
|
||||
for itr in funcs:
|
||||
print("")
|
||||
print("{}".format(itr.call_dlsym_function()))
|
||||
|
||||
print("")
|
||||
@@ -55,6 +55,13 @@
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& _os, const SpaceHandle& _handle)
|
||||
{
|
||||
_os << _handle.name;
|
||||
return _os;
|
||||
}
|
||||
|
||||
namespace omnitrace
|
||||
{
|
||||
inline namespace dl
|
||||
@@ -206,6 +213,57 @@ struct OMNITRACE_HIDDEN_API indirect
|
||||
OMNITRACE_DLSYM(omnitrace_register_coverage_f, m_omnihandle,
|
||||
"omnitrace_register_coverage");
|
||||
|
||||
OMNITRACE_DLSYM(kokkosp_print_help_f, m_omnihandle, "kokkosp_print_help");
|
||||
OMNITRACE_DLSYM(kokkosp_parse_args_f, m_omnihandle, "kokkosp_parse_args");
|
||||
OMNITRACE_DLSYM(kokkosp_declare_metadata_f, m_omnihandle,
|
||||
"kokkosp_declare_metadata");
|
||||
OMNITRACE_DLSYM(kokkosp_request_tool_settings_f, m_omnihandle,
|
||||
"kokkosp_request_tool_settings");
|
||||
OMNITRACE_DLSYM(kokkosp_init_library_f, m_omnihandle, "kokkosp_init_library");
|
||||
OMNITRACE_DLSYM(kokkosp_finalize_library_f, m_omnihandle,
|
||||
"kokkosp_finalize_library");
|
||||
OMNITRACE_DLSYM(kokkosp_begin_parallel_for_f, m_omnihandle,
|
||||
"kokkosp_begin_parallel_for");
|
||||
OMNITRACE_DLSYM(kokkosp_end_parallel_for_f, m_omnihandle,
|
||||
"kokkosp_end_parallel_for");
|
||||
OMNITRACE_DLSYM(kokkosp_begin_parallel_reduce_f, m_omnihandle,
|
||||
"kokkosp_begin_parallel_reduce");
|
||||
OMNITRACE_DLSYM(kokkosp_end_parallel_reduce_f, m_omnihandle,
|
||||
"kokkosp_end_parallel_reduce");
|
||||
OMNITRACE_DLSYM(kokkosp_begin_parallel_scan_f, m_omnihandle,
|
||||
"kokkosp_begin_parallel_scan");
|
||||
OMNITRACE_DLSYM(kokkosp_end_parallel_scan_f, m_omnihandle,
|
||||
"kokkosp_end_parallel_scan");
|
||||
OMNITRACE_DLSYM(kokkosp_begin_fence_f, m_omnihandle, "kokkosp_begin_fence");
|
||||
OMNITRACE_DLSYM(kokkosp_end_fence_f, m_omnihandle, "kokkosp_end_fence");
|
||||
OMNITRACE_DLSYM(kokkosp_push_profile_region_f, m_omnihandle,
|
||||
"kokkosp_push_profile_region");
|
||||
OMNITRACE_DLSYM(kokkosp_pop_profile_region_f, m_omnihandle,
|
||||
"kokkosp_pop_profile_region");
|
||||
OMNITRACE_DLSYM(kokkosp_create_profile_section_f, m_omnihandle,
|
||||
"kokkosp_create_profile_section");
|
||||
OMNITRACE_DLSYM(kokkosp_destroy_profile_section_f, m_omnihandle,
|
||||
"kokkosp_destroy_profile_section");
|
||||
OMNITRACE_DLSYM(kokkosp_start_profile_section_f, m_omnihandle,
|
||||
"kokkosp_start_profile_section");
|
||||
OMNITRACE_DLSYM(kokkosp_stop_profile_section_f, m_omnihandle,
|
||||
"kokkosp_stop_profile_section");
|
||||
OMNITRACE_DLSYM(kokkosp_allocate_data_f, m_omnihandle, "kokkosp_allocate_data");
|
||||
OMNITRACE_DLSYM(kokkosp_deallocate_data_f, m_omnihandle,
|
||||
"kokkosp_deallocate_data");
|
||||
OMNITRACE_DLSYM(kokkosp_begin_deep_copy_f, m_omnihandle,
|
||||
"kokkosp_begin_deep_copy");
|
||||
OMNITRACE_DLSYM(kokkosp_end_deep_copy_f, m_omnihandle, "kokkosp_end_deep_copy");
|
||||
OMNITRACE_DLSYM(kokkosp_profile_event_f, m_omnihandle, "kokkosp_profile_event");
|
||||
OMNITRACE_DLSYM(kokkosp_dual_view_sync_f, m_omnihandle, "kokkosp_dual_view_sync");
|
||||
OMNITRACE_DLSYM(kokkosp_dual_view_modify_f, m_omnihandle,
|
||||
"kokkosp_dual_view_modify");
|
||||
|
||||
#if OMNITRACE_USE_ROCTRACER > 0
|
||||
OMNITRACE_DLSYM(hsa_on_load_f, m_omnihandle, "OnLoad");
|
||||
OMNITRACE_DLSYM(hsa_on_unload_f, m_omnihandle, "OnUnload");
|
||||
#endif
|
||||
|
||||
#if OMNITRACE_USE_OMPT == 0
|
||||
_warn_verbose = 5;
|
||||
#else
|
||||
@@ -256,6 +314,7 @@ struct OMNITRACE_HIDDEN_API indirect
|
||||
}
|
||||
|
||||
public:
|
||||
// omnitrace functions
|
||||
void (*omnitrace_init_library_f)(void) = nullptr;
|
||||
void (*omnitrace_init_f)(const char*, bool, const char*) = nullptr;
|
||||
void (*omnitrace_finalize_f)(void) = nullptr;
|
||||
@@ -269,6 +328,48 @@ public:
|
||||
int (*omnitrace_push_region_f)(const char*) = nullptr;
|
||||
int (*omnitrace_pop_region_f)(const char*) = nullptr;
|
||||
int (*omnitrace_user_configure_f)(int, void*, void*) = nullptr;
|
||||
|
||||
// KokkosP functions
|
||||
void (*kokkosp_print_help_f)(char*) = nullptr;
|
||||
void (*kokkosp_parse_args_f)(int, char**) = nullptr;
|
||||
void (*kokkosp_declare_metadata_f)(const char*, const char*) = nullptr;
|
||||
void (*kokkosp_request_tool_settings_f)(const uint32_t,
|
||||
Kokkos_Tools_ToolSettings*) = nullptr;
|
||||
void (*kokkosp_init_library_f)(const int, const uint64_t, const uint32_t,
|
||||
void*) = nullptr;
|
||||
void (*kokkosp_finalize_library_f)() = nullptr;
|
||||
void (*kokkosp_begin_parallel_for_f)(const char*, uint32_t, uint64_t*) = nullptr;
|
||||
void (*kokkosp_end_parallel_for_f)(uint64_t) = nullptr;
|
||||
void (*kokkosp_begin_parallel_reduce_f)(const char*, uint32_t, uint64_t*) = nullptr;
|
||||
void (*kokkosp_end_parallel_reduce_f)(uint64_t) = nullptr;
|
||||
void (*kokkosp_begin_parallel_scan_f)(const char*, uint32_t, uint64_t*) = nullptr;
|
||||
void (*kokkosp_end_parallel_scan_f)(uint64_t) = nullptr;
|
||||
void (*kokkosp_begin_fence_f)(const char*, uint32_t, uint64_t*) = nullptr;
|
||||
void (*kokkosp_end_fence_f)(uint64_t) = nullptr;
|
||||
void (*kokkosp_push_profile_region_f)(const char*) = nullptr;
|
||||
void (*kokkosp_pop_profile_region_f)() = nullptr;
|
||||
void (*kokkosp_create_profile_section_f)(const char*, uint32_t*) = nullptr;
|
||||
void (*kokkosp_destroy_profile_section_f)(uint32_t) = nullptr;
|
||||
void (*kokkosp_start_profile_section_f)(uint32_t) = nullptr;
|
||||
void (*kokkosp_stop_profile_section_f)(uint32_t) = nullptr;
|
||||
void (*kokkosp_allocate_data_f)(const SpaceHandle, const char*, const void* const,
|
||||
const uint64_t) = nullptr;
|
||||
void (*kokkosp_deallocate_data_f)(const SpaceHandle, const char*, const void* const,
|
||||
const uint64_t) = nullptr;
|
||||
void (*kokkosp_begin_deep_copy_f)(SpaceHandle, const char*, const void*, SpaceHandle,
|
||||
const char*, const void*, uint64_t) = nullptr;
|
||||
void (*kokkosp_end_deep_copy_f)() = nullptr;
|
||||
void (*kokkosp_profile_event_f)(const char*) = nullptr;
|
||||
void (*kokkosp_dual_view_sync_f)(const char*, const void* const, bool) = nullptr;
|
||||
void (*kokkosp_dual_view_modify_f)(const char*, const void* const, bool) = nullptr;
|
||||
|
||||
// HSA functions
|
||||
#if OMNITRACE_USE_ROCTRACER > 0
|
||||
bool (*hsa_on_load_f)(HsaApiTable*, uint64_t, uint64_t, const char* const*) = nullptr;
|
||||
void (*hsa_on_unload_f)() = nullptr;
|
||||
#endif
|
||||
|
||||
// OpenMP functions
|
||||
#if defined(OMNITRACE_USE_OMPT) && OMNITRACE_USE_OMPT > 0
|
||||
ompt_start_tool_result_t* (*ompt_start_tool_f)(unsigned int, const char*);
|
||||
#endif
|
||||
@@ -556,6 +657,190 @@ extern "C"
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().omnitrace_pop_region_f, name);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// KokkosP
|
||||
//
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
void kokkosp_print_help(char* argv0)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_print_help_f, argv0);
|
||||
}
|
||||
|
||||
void kokkosp_parse_args(int argc, char** argv)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_parse_args_f, argc, argv);
|
||||
}
|
||||
|
||||
void kokkosp_declare_metadata(const char* key, const char* value)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_declare_metadata_f, key, value);
|
||||
}
|
||||
|
||||
void kokkosp_request_tool_settings(const uint32_t version,
|
||||
Kokkos_Tools_ToolSettings* settings)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_request_tool_settings_f,
|
||||
version, settings);
|
||||
}
|
||||
|
||||
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
|
||||
const uint32_t devInfoCount, void* deviceInfo)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_init_library_f, loadSeq,
|
||||
interfaceVer, devInfoCount, deviceInfo);
|
||||
}
|
||||
|
||||
void kokkosp_finalize_library()
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_finalize_library_f);
|
||||
}
|
||||
|
||||
void kokkosp_begin_parallel_for(const char* name, uint32_t devid, uint64_t* kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_for_f, name,
|
||||
devid, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_end_parallel_for(uint64_t kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_for_f, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_begin_parallel_reduce(const char* name, uint32_t devid, uint64_t* kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_reduce_f, name,
|
||||
devid, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_end_parallel_reduce(uint64_t kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_reduce_f, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_begin_parallel_scan(const char* name, uint32_t devid, uint64_t* kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_parallel_scan_f, name,
|
||||
devid, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_end_parallel_scan(uint64_t kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_parallel_scan_f, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_begin_fence(const char* name, uint32_t devid, uint64_t* kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_fence_f, name, devid,
|
||||
kernid);
|
||||
}
|
||||
|
||||
void kokkosp_end_fence(uint64_t kernid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_fence_f, kernid);
|
||||
}
|
||||
|
||||
void kokkosp_push_profile_region(const char* name)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_push_profile_region_f, name);
|
||||
}
|
||||
|
||||
void kokkosp_pop_profile_region()
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_pop_profile_region_f);
|
||||
}
|
||||
|
||||
void kokkosp_create_profile_section(const char* name, uint32_t* secid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_create_profile_section_f, name,
|
||||
secid);
|
||||
}
|
||||
|
||||
void kokkosp_destroy_profile_section(uint32_t secid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_destroy_profile_section_f,
|
||||
secid);
|
||||
}
|
||||
|
||||
void kokkosp_start_profile_section(uint32_t secid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_start_profile_section_f, secid);
|
||||
}
|
||||
|
||||
void kokkosp_stop_profile_section(uint32_t secid)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_stop_profile_section_f, secid);
|
||||
}
|
||||
|
||||
void kokkosp_allocate_data(const SpaceHandle space, const char* label,
|
||||
const void* const ptr, const uint64_t size)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_allocate_data_f, space, label,
|
||||
ptr, size);
|
||||
}
|
||||
|
||||
void kokkosp_deallocate_data(const SpaceHandle space, const char* label,
|
||||
const void* const ptr, const uint64_t size)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_deallocate_data_f, space, label,
|
||||
ptr, size);
|
||||
}
|
||||
|
||||
void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char* dst_name,
|
||||
const void* dst_ptr, SpaceHandle src_handle,
|
||||
const char* src_name, const void* src_ptr, uint64_t size)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_begin_deep_copy_f, dst_handle,
|
||||
dst_name, dst_ptr, src_handle, src_name, src_ptr,
|
||||
size);
|
||||
}
|
||||
|
||||
void kokkosp_end_deep_copy()
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_end_deep_copy_f);
|
||||
}
|
||||
|
||||
void kokkosp_profile_event(const char* name)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_profile_event_f, name);
|
||||
}
|
||||
|
||||
void kokkosp_dual_view_sync(const char* label, const void* const data, bool is_device)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_sync_f, label, data,
|
||||
is_device);
|
||||
}
|
||||
|
||||
void kokkosp_dual_view_modify(const char* label, const void* const data,
|
||||
bool is_device)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().kokkosp_dual_view_modify_f, label, data,
|
||||
is_device);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// HSA
|
||||
//
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
#if OMNITRACE_USE_ROCTRACER > 0
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names)
|
||||
{
|
||||
return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_load_f, table, runtime_version,
|
||||
failed_tool_count, failed_tool_names);
|
||||
}
|
||||
|
||||
void OnUnload() { return OMNITRACE_DL_INVOKE(get_indirect().hsa_on_unload_f); }
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// OMPT
|
||||
//
|
||||
//----------------------------------------------------------------------------------//
|
||||
#if OMNITRACE_USE_OMPT > 0
|
||||
ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version,
|
||||
const char* runtime_version)
|
||||
|
||||
@@ -53,6 +53,10 @@
|
||||
# define OMNITRACE_USE_OMPT 0
|
||||
#endif
|
||||
|
||||
#if !defined(OMNITRACE_USE_ROCTRACER)
|
||||
# define OMNITRACE_USE_ROCTRACER 0
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
//
|
||||
// omnitrace symbols
|
||||
@@ -87,12 +91,71 @@ extern "C"
|
||||
int omnitrace_user_push_region_dl(const char*) OMNITRACE_HIDDEN_API;
|
||||
int omnitrace_user_pop_region_dl(const char*) OMNITRACE_HIDDEN_API;
|
||||
|
||||
// KokkosP
|
||||
struct OMNITRACE_HIDDEN_API SpaceHandle
|
||||
{
|
||||
char name[64];
|
||||
};
|
||||
|
||||
struct OMNITRACE_HIDDEN_API Kokkos_Tools_ToolSettings
|
||||
{
|
||||
bool requires_global_fencing;
|
||||
bool padding[255];
|
||||
};
|
||||
|
||||
void kokkosp_print_help(char*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_parse_args(int, char**) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_declare_metadata(const char*, const char*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_request_tool_settings(const uint32_t,
|
||||
Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_init_library(const int, const uint64_t, const uint32_t,
|
||||
void*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_finalize_library() OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_begin_parallel_for(const char*, uint32_t,
|
||||
uint64_t*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_end_parallel_for(uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_begin_parallel_reduce(const char*, uint32_t,
|
||||
uint64_t*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_end_parallel_reduce(uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_begin_parallel_scan(const char*, uint32_t,
|
||||
uint64_t*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_end_parallel_scan(uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_begin_fence(const char*, uint32_t, uint64_t*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_end_fence(uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_push_profile_region(const char*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_pop_profile_region() OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_create_profile_section(const char*, uint32_t*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_destroy_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_start_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_stop_profile_section(uint32_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_allocate_data(const SpaceHandle, const char*, const void* const,
|
||||
const uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_deallocate_data(const SpaceHandle, const char*, const void* const,
|
||||
const uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_begin_deep_copy(SpaceHandle, const char*, const void*, SpaceHandle,
|
||||
const char*, const void*, uint64_t) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_end_deep_copy() OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_profile_event(const char*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_dual_view_sync(const char*, const void* const,
|
||||
bool) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_dual_view_modify(const char*, const void* const,
|
||||
bool) OMNITRACE_PUBLIC_API;
|
||||
|
||||
// OpenMP Tools (OMPT)
|
||||
# if OMNITRACE_USE_OMPT > 0
|
||||
struct ompt_start_tool_result_t;
|
||||
|
||||
ompt_start_tool_result_t* ompt_start_tool(unsigned int,
|
||||
const char*) OMNITRACE_PUBLIC_API;
|
||||
# endif
|
||||
|
||||
# if OMNITRACE_USE_ROCTRACER > 0
|
||||
// HSA
|
||||
struct HsaApiTable;
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) OMNITRACE_PUBLIC_API;
|
||||
void OnUnload() OMNITRACE_PUBLIC_API;
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -28,9 +28,11 @@
|
||||
#include "library/components/user_region.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/perfetto.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
|
||||
#include <timemory/api/kokkosp.hpp>
|
||||
#include <timemory/hash/types.hpp>
|
||||
|
||||
namespace kokkosp = tim::kokkosp;
|
||||
|
||||
@@ -80,6 +82,19 @@ std::vector<std::string> _initialize_arguments = {};
|
||||
|
||||
extern "C"
|
||||
{
|
||||
struct Kokkos_Tools_ToolSettings
|
||||
{
|
||||
bool requires_global_fencing;
|
||||
bool padding[255];
|
||||
};
|
||||
|
||||
void kokkosp_request_tool_settings(const uint32_t,
|
||||
Kokkos_Tools_ToolSettings*) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_dual_view_sync(const char*, const void* const,
|
||||
bool) OMNITRACE_PUBLIC_API;
|
||||
void kokkosp_dual_view_modify(const char*, const void* const,
|
||||
bool) OMNITRACE_PUBLIC_API;
|
||||
|
||||
void kokkosp_print_help(char*) {}
|
||||
|
||||
void kokkosp_parse_args(int argc, char** argv)
|
||||
@@ -108,6 +123,12 @@ extern "C"
|
||||
tim::manager::add_metadata(key, value);
|
||||
}
|
||||
|
||||
void kokkosp_request_tool_settings(const uint32_t _version,
|
||||
Kokkos_Tools_ToolSettings* _settings)
|
||||
{
|
||||
if(_version > 0) _settings->requires_global_fencing = false;
|
||||
}
|
||||
|
||||
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
|
||||
const uint32_t devInfoCount, void* deviceInfo)
|
||||
{
|
||||
@@ -117,7 +138,11 @@ extern "C"
|
||||
if(_standalone_initialized || (!omnitrace::config::settings_are_configured() &&
|
||||
omnitrace::get_state() < omnitrace::State::Active))
|
||||
{
|
||||
OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace...\n");
|
||||
OMNITRACE_BASIC_VERBOSE_F(0,
|
||||
"Initializing kokkos omnitrace connector "
|
||||
"(standalone, sequence %d, version: %llu)...\n",
|
||||
loadSeq, (unsigned long long) interfaceVer);
|
||||
OMNITRACE_BASIC_VERBOSE_F(0, "Initializing omnitrace (standalone)... ");
|
||||
auto _mode = tim::get_env<std::string>("OMNITRACE_MODE", "trace");
|
||||
auto _arg0 = (_initialize_arguments.empty()) ? std::string{ "unknown" }
|
||||
: _initialize_arguments.at(0);
|
||||
@@ -125,19 +150,23 @@ extern "C"
|
||||
_standalone_initialized = true;
|
||||
omnitrace_set_mpi_hidden(false, false);
|
||||
omnitrace_init_hidden(_mode.c_str(), false, _arg0.c_str());
|
||||
omnitrace_push_trace("kokkos_main");
|
||||
omnitrace_push_trace_hidden("kokkos_main");
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(0,
|
||||
"Initializing kokkos omnitrace connector "
|
||||
"(sequence %d, version: %llu)... ",
|
||||
loadSeq, (unsigned long long) interfaceVer);
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE_F(0,
|
||||
"Initializing connector (sequence is %d, version: %llu)...",
|
||||
loadSeq, (unsigned long long) interfaceVer);
|
||||
|
||||
setup_kernel_logger();
|
||||
|
||||
tim::trait::runtime_enabled<kokkosp::memory_tracker>::set(
|
||||
omnitrace::config::get_use_timemory());
|
||||
|
||||
if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n");
|
||||
if(_standalone_initialized && omnitrace::get_verbose() >= 0)
|
||||
fprintf(stderr, "Done\n");
|
||||
}
|
||||
|
||||
void kokkosp_finalize_library()
|
||||
@@ -145,13 +174,14 @@ extern "C"
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
if(_standalone_initialized)
|
||||
{
|
||||
omnitrace_pop_trace("kokkos_main");
|
||||
OMNITRACE_VERBOSE_F(0, "Finalizing connector (standalone)...\n");
|
||||
omnitrace_pop_trace_hidden("kokkos_main");
|
||||
OMNITRACE_VERBOSE_F(
|
||||
0, "Finalizing kokkos omnitrace connector (standalone)...\n");
|
||||
omnitrace_finalize_hidden();
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(0, "Finalizing connector... ");
|
||||
OMNITRACE_VERBOSE_F(0, "Finalizing kokkos omnitrace connector... ");
|
||||
kokkosp::cleanup();
|
||||
if(omnitrace::get_verbose() >= 0) fprintf(stderr, "Done\n");
|
||||
}
|
||||
@@ -253,6 +283,7 @@ extern "C"
|
||||
|
||||
void kokkosp_push_profile_region(const char* name)
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
kokkosp::logger_t{}.mark(1, __FUNCTION__, name);
|
||||
kokkosp::get_profiler_stack<omnitrace::component::user_region>().push_back(
|
||||
@@ -262,6 +293,7 @@ extern "C"
|
||||
|
||||
void kokkosp_pop_profile_region()
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
kokkosp::logger_t{}.mark(-1, __FUNCTION__);
|
||||
if(kokkosp::get_profiler_stack<omnitrace::component::user_region>().empty())
|
||||
@@ -274,6 +306,7 @@ extern "C"
|
||||
|
||||
void kokkosp_create_profile_section(const char* name, uint32_t* secid)
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
*secid = kokkosp::get_unique_id();
|
||||
auto pname = TIMEMORY_JOIN(" ", "[kokkos]", name);
|
||||
@@ -282,6 +315,7 @@ extern "C"
|
||||
|
||||
void kokkosp_destroy_profile_section(uint32_t secid)
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
kokkosp::destroy_profiler<omnitrace::component::user_region>(secid);
|
||||
}
|
||||
@@ -290,6 +324,7 @@ extern "C"
|
||||
|
||||
void kokkosp_start_profile_section(uint32_t secid)
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
kokkosp::logger_t{}.mark(1, __FUNCTION__, secid);
|
||||
kokkosp::start_profiler<omnitrace::component::user_region>(secid);
|
||||
@@ -297,6 +332,7 @@ extern "C"
|
||||
|
||||
void kokkosp_stop_profile_section(uint32_t secid)
|
||||
{
|
||||
if(omnitrace::get_use_perfetto()) return; // perfetto doesn't support regions
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
kokkosp::logger_t{}.mark(-1, __FUNCTION__, secid);
|
||||
kokkosp::start_profiler<omnitrace::component::user_region>(secid);
|
||||
@@ -369,6 +405,32 @@ extern "C"
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
void kokkosp_dual_view_sync(const char* label, const void* const, bool is_device)
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
if(omnitrace::config::get_use_perfetto())
|
||||
{
|
||||
auto _name = tim::get_hash_identifier_fast(
|
||||
tim::add_hash_id(TIMEMORY_JOIN(" ", "[kokkos][dual_view_sync]", label)));
|
||||
TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() },
|
||||
"target", (is_device) ? "device" : "host");
|
||||
}
|
||||
}
|
||||
|
||||
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
if(omnitrace::config::get_use_perfetto())
|
||||
{
|
||||
auto _name = tim::get_hash_identifier_fast(tim::add_hash_id(
|
||||
TIMEMORY_JOIN(" ", "[kokkos][dual_view_modify]", label)));
|
||||
TRACE_EVENT_INSTANT("user", ::perfetto::StaticString{ _name.data() },
|
||||
"target", (is_device) ? "device" : "host");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
}
|
||||
|
||||
TIMEMORY_INITIALIZE_STORAGE(kokkosp::memory_tracker)
|
||||
|
||||
Reference in New Issue
Block a user