rocprofv3: Update rocprofv3 command line for ATT (#201)

* rocprofv3: suppress agent info when no data collected

* Update output config serialization

- full serialization of output configuration

* Update rocprofiler-sdk-att/tests

- add version and soversion
- change output directory
- generate libatt_decoder_summary
- disable tests instead of removing them

* Update rocprofv3 command-line

- make --att-library-path hidden by default
- simplify check_att_capability
- reorder pc sampling options
- add hidden --echo option
- remove ROCPROF_LIST_AVAIL_TOOL_LIBRARY from preload

* Add new rocprofv3 tests for specify the ATT library path

* Tweak to rocprofv3-test-hsa-multiqueue-att tests

* Update rocprofv3 tool to enable output with att

* Fix standalone test installation

* Revert to fetchcontent_makeavailable to fetchcontent_populate

* Revert tests/common/CMakeLists.txt

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: 59b41ab5aa]
Этот коммит содержится в:
Madsen, Jonathan
2025-02-11 18:10:48 -06:00
коммит произвёл GitHub
родитель 5cc6244389
Коммит 81250fa3d4
12 изменённых файлов: 401 добавлений и 191 удалений
+118 -116
Просмотреть файл
@@ -107,16 +107,21 @@ def search_path(path_list):
def check_att_capability(args):
path = []
ROCPROFV3_DIR = os.path.dirname(os.path.realpath(__file__))
ROCM_DIR = os.path.dirname(ROCPROFV3_DIR)
support_input = {}
tmp_parser = argparse.ArgumentParser(add_help=False)
ld_library_paths = []
for itr in os.environ.get("LD_LIBRARY_PATH", "").split(":") + [f"{ROCM_DIR}/lib"]:
# don't add duplicates
if itr not in ld_library_paths:
ld_library_paths += [itr]
tmp_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
tmp_parser.add_argument(
"--att-library-path",
default=os.environ.get(
"ATT_LIBRARY_PATH", os.environ.get("LD_LIBRARY_PATH", None)
),
"ROCPROF_ATT_LIBRARY_PATH", ":".join(ld_library_paths)
).split(":"),
nargs="+",
type=str,
required=False,
)
@@ -129,52 +134,28 @@ def check_att_capability(args):
required=False,
)
tmp_data = {}
att_args, unparsed_args = tmp_parser.parse_known_args(args)
tmp_keys = list(att_args.__dict__.keys())
att_args, _ = tmp_parser.parse_known_args(args)
for itr in tmp_keys:
if has_set_attr(att_args, itr):
tmp_data[itr] = getattr(att_args, itr)
data = dotdict(tmp_data)
if data.input:
support = search_path(att_args.att_library_path)
support_input = {}
if att_args.input:
# If index of a pass in input file is a key in the support_input dict, then that pass has att-library-path arg
args_list = parse_input(data.input)
args_list = parse_input(att_args.input)
for index, itr in enumerate(args_list):
if itr.att_library_path:
library_path = []
if ":" in itr.att_library_path:
library_path.extend(itr.att_library_path.split(":"))
else:
library_path.append(itr.att_library_path)
support = search_path(library_path)
# If the att-library-path in the input file for a pass is valid, then the value of index key in the dict, support_input, is updated to that valid path
if support:
support_input[index] = set(support)
else:
# If the att-library-path in the input file for a pass is invalid, then the value of index key in the dict, support_input, is empty
support_input[index] = []
if data.att_library_path:
if ":" in data.att_library_path:
path.extend(data.att_library_path.split(":"))
else:
path.append(data.att_library_path)
else:
path.append(f"{ROCM_DIR}/lib")
path.append(f"{ROCM_DIR}/lib64")
library_path = (
itr.att_library_path.split(":")
if isinstance(itr.att_library_path, str)
else itr.att_library_path
)
_support = search_path(library_path)
# If the att-library-path in the input file for a pass is valid, then the value of index key in the dict,
# support_input, is updated to that valid path
# If the att-library-path in the input file for a pass is invalid, then the value of index key in the dict,
# support_input, is empty
support_input[index] = set(_support) if support else []
support = search_path(set(path))
if support:
if len(path) == 1:
os.environ["ATT_LIBRARY_PATH"] = path[0]
os.environ["ROCPROF_ATT_LIBRARY_PATH"] = path[0]
else:
os.environ["ATT_LIBRARY_PATH"] = ":".join(path)
os.environ["ROCPROF_ATT_LIBRARY_PATH"] = ":".join(path)
return support, support_input
return None, support_input
return (att_args.att_library_path, set(support), support_input)
class booleanArgAction(argparse.Action):
@@ -279,36 +260,6 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.",
)
pc_sampling_options = parser.add_argument_group("PC sampling options")
add_parser_bool_argument(
pc_sampling_options,
"--pc-sampling-beta-enabled",
help="enable pc sampling support; beta version",
)
pc_sampling_options.add_argument(
"--pc-sampling-unit",
help="",
default=None,
type=str.lower,
choices=("instructions", "cycles", "time"),
)
pc_sampling_options.add_argument(
"--pc-sampling-method",
help="",
default=None,
type=str.lower,
choices=("stochastic", "host_trap"),
)
pc_sampling_options.add_argument(
"--pc-sampling-interval",
help="",
default=None,
type=int,
)
basic_tracing_options = parser.add_argument_group("Basic tracing options")
# Add the arguments
@@ -408,6 +359,37 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
nargs="*",
)
pc_sampling_options = parser.add_argument_group("PC sampling options")
add_parser_bool_argument(
pc_sampling_options,
"--pc-sampling-beta-enabled",
help="enable pc sampling support; beta version",
)
pc_sampling_options.add_argument(
"--pc-sampling-unit",
help="",
default=None,
type=str.lower,
choices=("instructions", "cycles", "time"),
)
pc_sampling_options.add_argument(
"--pc-sampling-method",
help="",
default=None,
type=str.lower,
choices=("stochastic", "host_trap"),
)
pc_sampling_options.add_argument(
"--pc-sampling-interval",
help="",
default=None,
type=int,
)
post_processing_options = parser.add_argument_group("Post-processing tracing options")
add_parser_bool_argument(
@@ -564,13 +546,6 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
nargs="*",
)
advanced_options.add_argument(
"--att-library-path",
default=os.environ.get(
"ATT_LIBRARY_PATH", os.environ.get("LD_LIBRARY_PATH", None)
),
help="ATT library path to find decoder library",
)
# below is available for CI because LD_PRELOADing a library linked to a sanitizer library
# causes issues in apps where HIP is part of shared library.
add_parser_bool_argument(
@@ -579,6 +554,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
help=argparse.SUPPRESS,
)
# just echo the command line
add_parser_bool_argument(
advanced_options,
"--echo",
help=argparse.SUPPRESS,
)
if args is None:
args = sys.argv[1:]
@@ -592,22 +574,35 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
app_args = args[(idx + 1) :]
break
supported_list, is_support_input = check_att_capability(rocp_args)
if supported_list or len(is_support_input) != 0:
default_att_lib_path, att_support_args, att_support_inp = check_att_capability(
rocp_args
)
if att_support_args or len(att_support_inp) != 0:
choice_list = []
for keys, values in is_support_input.items():
for keys, values in att_support_inp.items():
choice_list.extend(values)
if supported_list:
choice_list.extend(list(supported_list))
if att_support_args:
choice_list.extend(list(att_support_args))
att_options = parser.add_argument_group("Advanced Thread Trace")
# remove duplicates
choice_list = list(set(choice_list))
att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options")
add_parser_bool_argument(
att_options,
"--advanced-thread-trace",
"--att",
help="Enable ATT",
)
att_options.add_argument(
"--att-library-path",
help="Search path(s) to decoder library/libraries",
default=default_att_lib_path if not att_support_inp else None,
nargs="+",
)
att_options.add_argument(
"--att-target-cu",
help="ATT target compute unit",
@@ -639,7 +634,9 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
att_options.add_argument(
"--att-parse",
type=str.lower,
default=None,
default=(
choice_list[0] if len(choice_list) == 1 and not att_support_inp else None
),
help="Select ATT Parse method from the choices",
choices=set(choice_list),
)
@@ -651,7 +648,7 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
help="Serialize all kernels",
)
return (parser.parse_args(rocp_args), app_args, supported_list, is_support_input)
return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp)
def parse_yaml(yaml_file):
@@ -792,7 +789,9 @@ def get_args(cmd_args, inp_args):
and has_set_attr(inp_args, itr)
and getattr(cmd_args, itr) != getattr(inp_args, itr)
):
raise RuntimeError(f"conflicting value for {itr}")
raise RuntimeError(
f"conflicting value for {itr} : {getattr(cmd_args, itr)} vs {getattr(inp_args, itr)}"
)
else:
data[itr] = get_attr(itr)
@@ -889,7 +888,6 @@ def run(app_args, args, **kwargs):
prepend_preload = [itr for itr in args.preload if itr]
append_preload = [
ROCPROF_TOOL_LIBRARY,
ROCPROF_LIST_AVAIL_TOOL_LIBRARY,
ROCPROF_SDK_LIBRARY,
]
@@ -1054,11 +1052,14 @@ def run(app_args, args, **kwargs):
args.truncate_kernels,
overwrite_if_true=True,
)
update_env(
"ROCPROF_LIST_AVAIL",
args.list_avail,
overwrite_if_true=True,
)
if args.list_avail:
update_env(
"ROCPROF_LIST_AVAIL_TOOL_LIBRARY",
ROCPROF_LIST_AVAIL_TOOL_LIBRARY,
overwrite_if_true=True,
)
if args.collection_period:
factors = {
"hour": 60 * 60 * 1e9,
@@ -1136,11 +1137,11 @@ def run(app_args, args, **kwargs):
update_env("ROCPROFILER_PC_SAMPLING_BETA_ENABLED", "on")
path = os.path.join(f"{ROCM_DIR}", "bin/rocprofv3_avail")
if app_args:
exit_code = subprocess.check_call(["python3", path], env=app_env)
exit_code = subprocess.check_call([sys.executable, path], env=app_env)
else:
app_args = ["python3", path]
app_args = [sys.executable, path]
elif not app_args:
elif not app_args and not args.echo:
log_config(app_env)
fatal_error("No application provided")
@@ -1169,11 +1170,6 @@ def run(app_args, args, **kwargs):
update_env(
"ROCPROF_COUNTERS", "pmc: {}".format(" ".join(args.pmc)), overwrite=True
)
else:
update_env("ROCPROF_COUNTER_COLLECTION", False, overwrite=True)
if args.log_level in ("info", "trace", "env"):
log_config(app_env)
if args.pc_sampling_unit or args.pc_sampling_method or args.pc_sampling_interval:
@@ -1256,17 +1252,10 @@ def run(app_args, args, **kwargs):
args.att_serialize_all,
overwrite=True,
)
if args.att_library_path:
update_env(
"ROCPROF_ATT_LIBRARY_PATH",
args.att_library_path,
overwrite=True,
)
update_env(
"ATT_LIBRARY_PATH",
args.att_library_path,
":".join(args.att_library_path),
overwrite=True,
)
if args.att_percounters:
@@ -1276,10 +1265,23 @@ def run(app_args, args, **kwargs):
overwrite=True,
)
if args.log_level in ("info", "trace", "env"):
log_config(app_env)
if use_execv:
# does not return
os.execvpe(app_args[0], app_args, env=app_env)
if args.echo:
sys.stderr.flush()
print(f"command: {app_args}")
sys.stdout.flush()
else:
# does not return
os.execvpe(app_args[0], app_args, env=app_env)
else:
if args.echo:
sys.stderr.flush()
print(f"command: {app_args}")
sys.stdout.flush()
return 0
try:
exit_code = subprocess.check_call(app_args, env=app_env)
if exit_code != 0:
+7
Просмотреть файл
@@ -121,6 +121,13 @@ output_config::save(ArchiveT& ar) const
CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit);
CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file);
CFG_SERIALIZE_MEMBER(csv_output);
CFG_SERIALIZE_MEMBER(json_output);
CFG_SERIALIZE_MEMBER(pftrace_output);
CFG_SERIALIZE_MEMBER(otf2_output);
CFG_SERIALIZE_MEMBER(summary_output);
CFG_SERIALIZE_MEMBER(kernel_rename);
#undef CFG_SERIALIZE_MEMBER
#undef CFG_SERIALIZE_NAMED_MEMBER
}
-8
Просмотреть файл
@@ -210,14 +210,6 @@ read_tmp_file(domain_type type)
auto& _fs = filebuf->file.stream;
if(_fs.is_open()) _fs.close();
filebuf->file.open(std::ios::binary | std::ios::in);
// for(auto itr : filebuf->file.file_pos)
// {
// _fs.seekg(itr); // set to the absolute position
// if(_fs.eof()) break;
// auto _buffer = ring_buffer_t<Tp>{};
// _buffer.load(_fs);
// _data.emplace_back(std::move(_buffer));
// }
}
} // namespace tool
} // namespace rocprofiler
+27 -12
Просмотреть файл
@@ -1,19 +1,19 @@
#
#
#
project(rocprofiler-att-parser-tests LANGUAGES CXX)
rocprofiler_deactivate_clang_tidy()
add_executable(att-parser-tool-v3)
target_sources(att-parser-tool-v3 PRIVATE standalone_tool_main.cpp)
target_link_libraries(
att-parser-tool-v3
PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser
rocprofiler-sdk::rocprofiler-sdk-json
rocprofiler-sdk::rocprofiler-sdk-common-library)
target_sources(att-parser-tool-v3 PRIVATE standalone_tool_main.cpp)
add_executable(att-decoder-test)
target_sources(att-decoder-test PRIVATE att_decoder_test.cpp)
target_link_libraries(
att-decoder-test
PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser
@@ -23,19 +23,34 @@ target_link_libraries(
rocprofiler-sdk::rocprofiler-sdk-static-library
GTest::gtest
GTest::gtest_main)
target_sources(att-decoder-test PRIVATE att_decoder_test.cpp)
set(env-att-lib "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}/../lib")
add_library(att_decoder_testing SHARED)
add_library(rocprofiler-sdk::att-decoder-testing ALIAS att_decoder_testing)
target_sources(att_decoder_testing PRIVATE dummy_decoder.cpp)
set_target_properties(att_decoder_testing PROPERTIES VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})
add_library(att_decoder_summary SHARED)
add_library(rocprofiler-sdk::att-decoder-summary ALIAS att_decoder_summary)
target_sources(att_decoder_summary PRIVATE dummy_decoder.cpp)
set_target_properties(
att_decoder_summary
PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/att
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})
if(NOT ROCPROFILER_MEMCHECK)
gtest_add_tests(
TARGET att-decoder-test
SOURCES att_decoder_test.cpp
TEST_LIST att-decoder-test_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(
${att-decoder-test_TESTS} PROPERTIES ENVIRONMENT "${env-att-lib}" TIMEOUT 10
LABELS "unittests")
set(IS_MEMCHECK OFF)
else()
set(IS_MEMCHECK ON)
endif()
set(env-att-lib "ROCPROF_ATT_LIBRARY_PATH=${PROJECT_BINARY_DIR}/lib")
gtest_add_tests(
TARGET att-decoder-test
SOURCES att_decoder_test.cpp
TEST_LIST att-decoder-test_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(
${att-decoder-test_TESTS} PROPERTIES ENVIRONMENT "${env-att-lib}" TIMEOUT 10 LABELS
"unittests" DISABLED "${IS_MEMCHECK}")
-1
Просмотреть файл
@@ -1,7 +1,6 @@
#
# Waitcnt ISA tests
#
project(rocprofiler-att-parser-waitcnt-tests LANGUAGES CXX)
add_executable(att-decoder-waitcnt-test)
target_link_libraries(
+52 -9
Просмотреть файл
@@ -68,6 +68,9 @@ struct att_perfcounter
{
std::string counter_name = {};
uint32_t simd_mask = 0xf;
template <typename ArchiveT>
void save(ArchiveT&) const;
};
struct config : output_config
@@ -79,6 +82,9 @@ struct config : output_config
uint64_t delay = 0;
uint64_t duration = 0;
uint64_t repeat = 0;
template <typename ArchiveT>
void save(ArchiveT& ar) const;
};
config();
@@ -114,10 +120,9 @@ struct config : output_config
rocprofiler_pc_sampling_method_t pc_sampling_method_value = ROCPROFILER_PC_SAMPLING_METHOD_NONE;
rocprofiler_pc_sampling_unit_t pc_sampling_unit_value = ROCPROFILER_PC_SAMPLING_UNIT_NONE;
std::string stats_summary_unit = get_env("ROCPROF_STATS_SUMMARY_UNITS", "nsec");
int mpi_size = get_mpi_size();
int mpi_rank = get_mpi_rank();
uint64_t att_param_shader_engine_mask =
int mpi_size = get_mpi_size();
int mpi_rank = get_mpi_rank();
uint64_t att_param_shader_engine_mask =
get_env<uint64_t>("ROCPROF_ATT_PARAM_SHADER_ENGINE_MASK", 0x1);
uint64_t att_param_buffer_size = get_env<uint64_t>("ROCPROF_ATT_PARAM_BUFFER_SIZE", 0x6000000);
uint64_t att_param_simd_select = get_env<uint64_t>("ROCPROF_ATT_PARAM_SIMD_SELECT", 0xF);
@@ -144,13 +149,30 @@ struct config : output_config
{}
};
#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR))
#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR))
template <typename ArchiveT>
void
att_perfcounter::save(ArchiveT& ar) const
{
CFG_SERIALIZE_MEMBER(counter_name);
CFG_SERIALIZE_MEMBER(simd_mask);
}
template <typename ArchiveT>
void
config::CollectionPeriod::save(ArchiveT& ar) const
{
CFG_SERIALIZE_MEMBER(delay);
CFG_SERIALIZE_MEMBER(duration);
CFG_SERIALIZE_MEMBER(repeat);
}
template <typename ArchiveT>
void
config::save(ArchiveT& ar) const
{
#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR))
#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR))
CFG_SERIALIZE_MEMBER(kernel_trace);
CFG_SERIALIZE_MEMBER(hsa_core_api_trace);
CFG_SERIALIZE_MEMBER(hsa_amd_ext_api_trace);
@@ -163,19 +185,40 @@ config::save(ArchiveT& ar) const
CFG_SERIALIZE_MEMBER(counter_collection);
CFG_SERIALIZE_MEMBER(hip_runtime_api_trace);
CFG_SERIALIZE_MEMBER(hip_compiler_api_trace);
CFG_SERIALIZE_MEMBER(kernel_rename);
CFG_SERIALIZE_MEMBER(rccl_api_trace);
CFG_SERIALIZE_MEMBER(rocdecode_api_trace);
CFG_SERIALIZE_MEMBER(mpi_rank);
CFG_SERIALIZE_MEMBER(mpi_size);
CFG_SERIALIZE_MEMBER(collection_periods);
CFG_SERIALIZE_MEMBER(counters);
CFG_SERIALIZE_MEMBER(extra_counters_contents);
CFG_SERIALIZE_MEMBER(kernel_filter_include);
CFG_SERIALIZE_MEMBER(kernel_filter_exclude);
CFG_SERIALIZE_MEMBER(kernel_filter_range);
CFG_SERIALIZE_MEMBER(demangle);
CFG_SERIALIZE_MEMBER(truncate);
CFG_SERIALIZE_MEMBER(pc_sampling_method);
CFG_SERIALIZE_MEMBER(pc_sampling_unit);
CFG_SERIALIZE_MEMBER(pc_sampling_interval);
CFG_SERIALIZE_MEMBER(pc_sampling_method_value);
CFG_SERIALIZE_MEMBER(pc_sampling_unit_value);
CFG_SERIALIZE_MEMBER(advanced_thread_trace);
CFG_SERIALIZE_MEMBER(att_serialize_all);
CFG_SERIALIZE_MEMBER(att_param_shader_engine_mask);
CFG_SERIALIZE_MEMBER(att_param_buffer_size);
CFG_SERIALIZE_MEMBER(att_param_simd_select);
CFG_SERIALIZE_MEMBER(att_param_target_cu);
CFG_SERIALIZE_MEMBER(att_capability);
CFG_SERIALIZE_MEMBER(att_param_perfcounters);
static_cast<const base_type&>(*this).save(ar);
}
#undef CFG_SERIALIZE_MEMBER
#undef CFG_SERIALIZE_NAMED_MEMBER
}
template <config_context ContextT>
config&
+41 -27
Просмотреть файл
@@ -1606,10 +1606,16 @@ using domain_stats_vec_t = tool::domain_stats_vec_t;
template <typename Tp, domain_type DomainT>
void
generate_output(tool::buffered_output<Tp, DomainT>& output_v, domain_stats_vec_t& contributions_v)
generate_output(tool::buffered_output<Tp, DomainT>& output_v,
uint64_t& num_output_v,
domain_stats_vec_t& contributions_v)
{
if(!output_v) return;
// if it has reached this point, the generator is not empty
num_output_v += 1;
// opens temporary file and sets read position to beginning
output_v.read();
if(tool::get_config().stats || tool::get_config().summary_output)
@@ -1668,38 +1674,46 @@ tool_fini(void* /*tool_data*/)
auto pc_sampling_host_trap_output =
tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap};
auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; };
auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; };
auto agents_output = CHECK_NOTNULL(tool_metadata)->agents;
std::sort(agents_output.begin(), agents_output.end(), node_id_sort);
auto _agents = CHECK_NOTNULL(tool_metadata)->agents;
std::sort(_agents.begin(), _agents.end(), node_id_sort);
uint64_t num_output = 0;
auto contributions = domain_stats_vec_t{};
if(tool::get_config().csv_output)
generate_output(kernel_dispatch_output, num_output, contributions);
generate_output(hsa_output, num_output, contributions);
generate_output(hip_output, num_output, contributions);
generate_output(memory_copy_output, num_output, contributions);
generate_output(memory_allocation_output, num_output, contributions);
generate_output(marker_output, num_output, contributions);
generate_output(rccl_output, num_output, contributions);
generate_output(counters_output, num_output, contributions);
generate_output(scratch_memory_output, num_output, contributions);
generate_output(rocdecode_output, num_output, contributions);
generate_output(pc_sampling_host_trap_output, num_output, contributions);
if(tool::get_config().advanced_thread_trace && !tool::get_config().att_capability.empty() &&
!tool_metadata->att_filenames.empty())
{
tool::generate_csv(tool::get_config(), *tool_metadata, _agents);
num_output += 1;
}
auto contributions = domain_stats_vec_t{};
ROCP_INFO << "Number of services generating output: " << num_output;
generate_output(kernel_dispatch_output, contributions);
generate_output(hsa_output, contributions);
generate_output(hip_output, contributions);
generate_output(memory_copy_output, contributions);
generate_output(memory_allocation_output, contributions);
generate_output(marker_output, contributions);
generate_output(rccl_output, contributions);
generate_output(counters_output, contributions);
generate_output(scratch_memory_output, contributions);
generate_output(rocdecode_output, contributions);
generate_output(pc_sampling_host_trap_output, contributions);
if(tool::get_config().csv_output && num_output > 0)
{
tool::generate_csv(tool::get_config(), *tool_metadata, agents_output);
}
if(tool::get_config().stats && tool::get_config().csv_output)
if(tool::get_config().stats && tool::get_config().csv_output && num_output > 0)
{
tool::generate_csv(tool::get_config(), *tool_metadata, contributions);
}
if(tool::get_config().advanced_thread_trace)
{
std::unordered_map<std::string_view, rocprofiler::att_wrapper::tool_att_capability_t>
const std::unordered_map<std::string_view, rocprofiler::att_wrapper::tool_att_capability_t>
tool_att_capability_map = {
{"testing", rocprofiler::att_wrapper::ATT_CAPABILITIES_TESTING},
{"summary", rocprofiler::att_wrapper::ATT_CAPABILITIES_SUMMARY},
@@ -1711,7 +1725,7 @@ tool_fini(void* /*tool_data*/)
auto att_capability_value = tool_att_capability_map.at(tool::get_config().att_capability);
auto decoder = rocprofiler::att_wrapper::ATTDecoder(att_capability_value);
ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found at ROCPORF_ATT_LIBRARY_PATH";
ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found at ROCPROF_ATT_LIBRARY_PATH";
auto codeobj = tool_metadata->get_code_object_load_info();
auto output_path = tool::format_path(tool::get_config().output_path);
for(auto& [dispatch_id, att_filename_data] : tool_metadata->att_filenames)
@@ -1731,7 +1745,7 @@ tool_fini(void* /*tool_data*/)
}
}
if(tool::get_config().json_output)
if(tool::get_config().json_output && num_output > 0)
{
auto json_ar = tool::open_json(tool::get_config());
@@ -1757,11 +1771,11 @@ tool_fini(void* /*tool_data*/)
tool::close_json(json_ar);
}
if(tool::get_config().pftrace_output)
if(tool::get_config().pftrace_output && num_output > 0)
{
tool::write_perfetto(tool::get_config(),
*tool_metadata,
_agents,
agents_output,
hip_output.get_generator(),
hsa_output.get_generator(),
kernel_dispatch_output.get_generator(),
@@ -1773,7 +1787,7 @@ tool_fini(void* /*tool_data*/)
rocdecode_output.get_generator());
}
if(tool::get_config().otf2_output)
if(tool::get_config().otf2_output && num_output > 0)
{
auto hip_elem_data = hip_output.load_all();
auto hsa_elem_data = hsa_output.load_all();
@@ -1788,7 +1802,7 @@ tool_fini(void* /*tool_data*/)
tool::write_otf2(tool::get_config(),
*tool_metadata,
getpid(),
_agents,
agents_output,
&hip_elem_data,
&hsa_elem_data,
&kernel_dispatch_elem_data,
@@ -1800,7 +1814,7 @@ tool_fini(void* /*tool_data*/)
&rocdecode_elem_data);
}
if(tool::get_config().summary_output)
if(tool::get_config().summary_output && num_output > 0)
{
tool::generate_stats(tool::get_config(), *tool_metadata, contributions);
}
+6
Просмотреть файл
@@ -33,6 +33,12 @@ include(GNUInstallDirs)
# always use lib instead of lib64
set(CMAKE_INSTALL_LIBDIR "lib")
# define the library output directory
if(PROJECT_IS_TOP_LEVEL)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}")
endif()
# needed for validation
find_package(Python3 REQUIRED)
+8 -1
Просмотреть файл
@@ -36,6 +36,14 @@ set(AMDGPU_TARGETS
"GPU targets to compile for AMDGPUs (update GPU_TARGETS, not this variable)"
FORCE)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
cmake_policy(SET CMP0135 NEW)
endif()
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
cmake_policy(SET CMP0167 NEW)
endif()
# build flags
add_library(rocprofiler-sdk-tests-build-flags INTERFACE)
add_library(rocprofiler-sdk::tests-build-flags ALIAS rocprofiler-sdk-tests-build-flags)
@@ -78,7 +86,6 @@ if(NOT TARGET rocprofiler-sdk::rocprofiler-sdk-cereal)
${PROJECT_BINARY_DIR}/external/build/cereal-build SUBBUILD_DIR
${PROJECT_BINARY_DIR}/external/build/cereal-subdir)
# This particular version of projD requires workarounds
fetchcontent_getproperties(cereal)
if(NOT cereal_POPULATED)
+109 -14
Просмотреть файл
@@ -21,8 +21,8 @@ find_package(rocprofiler-sdk REQUIRED)
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --advanced-thread-trace 1
--att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
out --output-format json ${PRELOAD_ARGS} --
@@ -31,14 +31,13 @@ add_test(
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH}
FAIL_REGULAR_EXPRESSION "HSA_API|HIP_API")
LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH})
# hsa multiqueue dependency test
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --advanced-thread-trace 1
--att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
out --output-format json ${PRELOAD_ARGS} --
@@ -47,22 +46,20 @@ add_test(
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
ATT_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib FAIL_REGULAR_EXPRESSION
"HSA_API|HIP_API")
ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
# hsa multiqueue dependency test
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-json-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att-library-path
${CMAKE_BINARY_DIR}/lib -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} --
$<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-json-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" FAIL_REGULAR_EXPRESSION
"HSA_API|HIP_API")
set_tests_properties(rocprofv3-test-hsa-multiqueue-att-json-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests")
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate
@@ -98,3 +95,101 @@ set_tests_properties(
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
"rocprofv3-test-hsa-multiqueue-att-json-execute" FAIL_REGULAR_EXPRESSION
"AssertionError")
if(TARGET rocprofiler-sdk::att-decoder-testing AND TARGET
rocprofiler-sdk::att-decoder-summary)
set(MISSING_TEST_DECODER_LIBS OFF)
else()
set(MISSING_TEST_DECODER_LIBS ON)
endif()
function(configure_att_input _FILENAME _OUTDIR)
set(LIBRARY_OUTPUT_DIR ${_OUTDIR})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/att_input.yml.in
${CMAKE_CURRENT_BINARY_DIR}/${_FILENAME} @ONLY)
endfunction()
configure_att_input(att_input.yml "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
#
# Uses att_library_path in YAML input to specify ATT library path
#
add_test(
NAME rocprofv3-test-att-library-path-yaml-input
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-yaml-input
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
add_test(
NAME rocprofv3-test-att-library-path-yaml-input-will-fail
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/att_input_will_fail.yml --log-level env --echo --
sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-yaml-input-will-fail
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
"${MISSING_TEST_DECODER_LIBS}")
#
# Uses --att-library-path to specify ATT library path
#
add_test(
NAME rocprofv3-test-att-library-path-cmd-line
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse summary --log-level env --echo
-- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-cmd-line
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
add_test(
NAME rocprofv3-test-att-library-path-cmd-line-will-fail
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing --log-level env --echo
-- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-cmd-line-will-fail
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
"${MISSING_TEST_DECODER_LIBS}")
#
# Uses ROCPROF_ATT_LIBRARY_PATH to specify ATT library path
#
add_test(NAME rocprofv3-test-att-library-path-env-var
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse summary
--log-level env --echo -- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-env-var
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse testing
--log-level env --echo -- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-env-var-will-fail
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
ENVIRONMENT
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att"
WILL_FAIL
ON
DISABLED
"${MISSING_TEST_DECODER_LIBS}")
+3 -3
Просмотреть файл
@@ -9,9 +9,9 @@
],
"truncate_kernels": true,
"advanced_thread_trace": true,
"att_parse" : "testing",
"att_target_cu" : 1,
"att_shader_engine_mask" : "0x11",
"att_parse": "testing",
"att_target_cu": 1,
"att_shader_engine_mask": "0x11",
"att_simd_select": "0x3",
"att_buffer_size": "0x6000000",
"att_perfcounters": "SQ_WAVES:0x1 SQ_INSTS_VALU:0x3 SQ_INSTS_SALU:0xF"
+30
Просмотреть файл
@@ -0,0 +1,30 @@
jobs:
- advanced_thread_trace: True
att_parse: summary
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- advanced_thread_trace: True
att_parse: summary
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing
- advanced_thread_trace: True
att_parse: testing
att_library_path:
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing
att_library_path:
- @LIBRARY_OUTPUT_DIR@