diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3.py b/projects/rocprofiler-sdk/source/bin/rocprofv3.py index 9ef5f62f51..e27f02665d 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3.py +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3.py @@ -107,16 +107,21 @@ def search_path(path_list): def check_att_capability(args): - path = [] ROCPROFV3_DIR = os.path.dirname(os.path.realpath(__file__)) ROCM_DIR = os.path.dirname(ROCPROFV3_DIR) - support_input = {} - tmp_parser = argparse.ArgumentParser(add_help=False) + ld_library_paths = [] + for itr in os.environ.get("LD_LIBRARY_PATH", "").split(":") + [f"{ROCM_DIR}/lib"]: + # don't add duplicates + if itr not in ld_library_paths: + ld_library_paths += [itr] + + tmp_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) tmp_parser.add_argument( "--att-library-path", default=os.environ.get( - "ATT_LIBRARY_PATH", os.environ.get("LD_LIBRARY_PATH", None) - ), + "ROCPROF_ATT_LIBRARY_PATH", ":".join(ld_library_paths) + ).split(":"), + nargs="+", type=str, required=False, ) @@ -129,52 +134,28 @@ def check_att_capability(args): required=False, ) - tmp_data = {} - att_args, unparsed_args = tmp_parser.parse_known_args(args) - tmp_keys = list(att_args.__dict__.keys()) + att_args, _ = tmp_parser.parse_known_args(args) - for itr in tmp_keys: - if has_set_attr(att_args, itr): - tmp_data[itr] = getattr(att_args, itr) - - data = dotdict(tmp_data) - if data.input: + support = search_path(att_args.att_library_path) + support_input = {} + if att_args.input: # If index of a pass in input file is a key in the support_input dict, then that pass has att-library-path arg - args_list = parse_input(data.input) + args_list = parse_input(att_args.input) for index, itr in enumerate(args_list): if itr.att_library_path: - library_path = [] - if ":" in itr.att_library_path: - library_path.extend(itr.att_library_path.split(":")) - else: - library_path.append(itr.att_library_path) - support = search_path(library_path) - # If the att-library-path in the input file for a pass is valid, then the value of index key in the dict, support_input, is updated to that valid path - if support: - support_input[index] = set(support) - else: - # If the att-library-path in the input file for a pass is invalid, then the value of index key in the dict, support_input, is empty - support_input[index] = [] - if data.att_library_path: - if ":" in data.att_library_path: - path.extend(data.att_library_path.split(":")) - else: - path.append(data.att_library_path) - else: - path.append(f"{ROCM_DIR}/lib") - path.append(f"{ROCM_DIR}/lib64") + library_path = ( + itr.att_library_path.split(":") + if isinstance(itr.att_library_path, str) + else itr.att_library_path + ) + _support = search_path(library_path) + # If the att-library-path in the input file for a pass is valid, then the value of index key in the dict, + # support_input, is updated to that valid path + # If the att-library-path in the input file for a pass is invalid, then the value of index key in the dict, + # support_input, is empty + support_input[index] = set(_support) if support else [] - support = search_path(set(path)) - if support: - if len(path) == 1: - os.environ["ATT_LIBRARY_PATH"] = path[0] - os.environ["ROCPROF_ATT_LIBRARY_PATH"] = path[0] - else: - os.environ["ATT_LIBRARY_PATH"] = ":".join(path) - os.environ["ROCPROF_ATT_LIBRARY_PATH"] = ":".join(path) - return support, support_input - - return None, support_input + return (att_args.att_library_path, set(support), support_input) class booleanArgAction(argparse.Action): @@ -279,36 +260,6 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, ROCDecode API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.", ) - pc_sampling_options = parser.add_argument_group("PC sampling options") - - add_parser_bool_argument( - pc_sampling_options, - "--pc-sampling-beta-enabled", - help="enable pc sampling support; beta version", - ) - - pc_sampling_options.add_argument( - "--pc-sampling-unit", - help="", - default=None, - type=str.lower, - choices=("instructions", "cycles", "time"), - ) - - pc_sampling_options.add_argument( - "--pc-sampling-method", - help="", - default=None, - type=str.lower, - choices=("stochastic", "host_trap"), - ) - - pc_sampling_options.add_argument( - "--pc-sampling-interval", - help="", - default=None, - type=int, - ) basic_tracing_options = parser.add_argument_group("Basic tracing options") # Add the arguments @@ -408,6 +359,37 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins nargs="*", ) + pc_sampling_options = parser.add_argument_group("PC sampling options") + + add_parser_bool_argument( + pc_sampling_options, + "--pc-sampling-beta-enabled", + help="enable pc sampling support; beta version", + ) + + pc_sampling_options.add_argument( + "--pc-sampling-unit", + help="", + default=None, + type=str.lower, + choices=("instructions", "cycles", "time"), + ) + + pc_sampling_options.add_argument( + "--pc-sampling-method", + help="", + default=None, + type=str.lower, + choices=("stochastic", "host_trap"), + ) + + pc_sampling_options.add_argument( + "--pc-sampling-interval", + help="", + default=None, + type=int, + ) + post_processing_options = parser.add_argument_group("Post-processing tracing options") add_parser_bool_argument( @@ -564,13 +546,6 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins nargs="*", ) - advanced_options.add_argument( - "--att-library-path", - default=os.environ.get( - "ATT_LIBRARY_PATH", os.environ.get("LD_LIBRARY_PATH", None) - ), - help="ATT library path to find decoder library", - ) # below is available for CI because LD_PRELOADing a library linked to a sanitizer library # causes issues in apps where HIP is part of shared library. add_parser_bool_argument( @@ -579,6 +554,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins help=argparse.SUPPRESS, ) + # just echo the command line + add_parser_bool_argument( + advanced_options, + "--echo", + help=argparse.SUPPRESS, + ) + if args is None: args = sys.argv[1:] @@ -592,22 +574,35 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins app_args = args[(idx + 1) :] break - supported_list, is_support_input = check_att_capability(rocp_args) - if supported_list or len(is_support_input) != 0: + default_att_lib_path, att_support_args, att_support_inp = check_att_capability( + rocp_args + ) + if att_support_args or len(att_support_inp) != 0: choice_list = [] - for keys, values in is_support_input.items(): + for keys, values in att_support_inp.items(): choice_list.extend(values) - if supported_list: - choice_list.extend(list(supported_list)) + if att_support_args: + choice_list.extend(list(att_support_args)) - att_options = parser.add_argument_group("Advanced Thread Trace") + # remove duplicates + choice_list = list(set(choice_list)) + + att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options") add_parser_bool_argument( att_options, "--advanced-thread-trace", + "--att", help="Enable ATT", ) + att_options.add_argument( + "--att-library-path", + help="Search path(s) to decoder library/libraries", + default=default_att_lib_path if not att_support_inp else None, + nargs="+", + ) + att_options.add_argument( "--att-target-cu", help="ATT target compute unit", @@ -639,7 +634,9 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins att_options.add_argument( "--att-parse", type=str.lower, - default=None, + default=( + choice_list[0] if len(choice_list) == 1 and not att_support_inp else None + ), help="Select ATT Parse method from the choices", choices=set(choice_list), ) @@ -651,7 +648,7 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins help="Serialize all kernels", ) - return (parser.parse_args(rocp_args), app_args, supported_list, is_support_input) + return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp) def parse_yaml(yaml_file): @@ -792,7 +789,9 @@ def get_args(cmd_args, inp_args): and has_set_attr(inp_args, itr) and getattr(cmd_args, itr) != getattr(inp_args, itr) ): - raise RuntimeError(f"conflicting value for {itr}") + raise RuntimeError( + f"conflicting value for {itr} : {getattr(cmd_args, itr)} vs {getattr(inp_args, itr)}" + ) else: data[itr] = get_attr(itr) @@ -889,7 +888,6 @@ def run(app_args, args, **kwargs): prepend_preload = [itr for itr in args.preload if itr] append_preload = [ ROCPROF_TOOL_LIBRARY, - ROCPROF_LIST_AVAIL_TOOL_LIBRARY, ROCPROF_SDK_LIBRARY, ] @@ -1054,11 +1052,14 @@ def run(app_args, args, **kwargs): args.truncate_kernels, overwrite_if_true=True, ) - update_env( - "ROCPROF_LIST_AVAIL", - args.list_avail, - overwrite_if_true=True, - ) + + if args.list_avail: + update_env( + "ROCPROF_LIST_AVAIL_TOOL_LIBRARY", + ROCPROF_LIST_AVAIL_TOOL_LIBRARY, + overwrite_if_true=True, + ) + if args.collection_period: factors = { "hour": 60 * 60 * 1e9, @@ -1136,11 +1137,11 @@ def run(app_args, args, **kwargs): update_env("ROCPROFILER_PC_SAMPLING_BETA_ENABLED", "on") path = os.path.join(f"{ROCM_DIR}", "bin/rocprofv3_avail") if app_args: - exit_code = subprocess.check_call(["python3", path], env=app_env) + exit_code = subprocess.check_call([sys.executable, path], env=app_env) else: - app_args = ["python3", path] + app_args = [sys.executable, path] - elif not app_args: + elif not app_args and not args.echo: log_config(app_env) fatal_error("No application provided") @@ -1169,11 +1170,6 @@ def run(app_args, args, **kwargs): update_env( "ROCPROF_COUNTERS", "pmc: {}".format(" ".join(args.pmc)), overwrite=True ) - else: - update_env("ROCPROF_COUNTER_COLLECTION", False, overwrite=True) - - if args.log_level in ("info", "trace", "env"): - log_config(app_env) if args.pc_sampling_unit or args.pc_sampling_method or args.pc_sampling_interval: @@ -1256,17 +1252,10 @@ def run(app_args, args, **kwargs): args.att_serialize_all, overwrite=True, ) - if args.att_library_path: - update_env( "ROCPROF_ATT_LIBRARY_PATH", - args.att_library_path, - overwrite=True, - ) - update_env( - "ATT_LIBRARY_PATH", - args.att_library_path, + ":".join(args.att_library_path), overwrite=True, ) if args.att_percounters: @@ -1276,10 +1265,23 @@ def run(app_args, args, **kwargs): overwrite=True, ) + if args.log_level in ("info", "trace", "env"): + log_config(app_env) + if use_execv: - # does not return - os.execvpe(app_args[0], app_args, env=app_env) + if args.echo: + sys.stderr.flush() + print(f"command: {app_args}") + sys.stdout.flush() + else: + # does not return + os.execvpe(app_args[0], app_args, env=app_env) else: + if args.echo: + sys.stderr.flush() + print(f"command: {app_args}") + sys.stdout.flush() + return 0 try: exit_code = subprocess.check_call(app_args, env=app_env) if exit_code != 0: diff --git a/projects/rocprofiler-sdk/source/lib/output/output_config.hpp b/projects/rocprofiler-sdk/source/lib/output/output_config.hpp index fd7838449f..afa80f8cb6 100644 --- a/projects/rocprofiler-sdk/source/lib/output/output_config.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/output_config.hpp @@ -121,6 +121,13 @@ output_config::save(ArchiveT& ar) const CFG_SERIALIZE_NAMED_MEMBER("summary_unit", stats_summary_unit); CFG_SERIALIZE_NAMED_MEMBER("summary_file", stats_summary_file); + CFG_SERIALIZE_MEMBER(csv_output); + CFG_SERIALIZE_MEMBER(json_output); + CFG_SERIALIZE_MEMBER(pftrace_output); + CFG_SERIALIZE_MEMBER(otf2_output); + CFG_SERIALIZE_MEMBER(summary_output); + CFG_SERIALIZE_MEMBER(kernel_rename); + #undef CFG_SERIALIZE_MEMBER #undef CFG_SERIALIZE_NAMED_MEMBER } diff --git a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp index e671d810bf..c76b79e9c6 100644 --- a/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/tmp_file_buffer.hpp @@ -210,14 +210,6 @@ read_tmp_file(domain_type type) auto& _fs = filebuf->file.stream; if(_fs.is_open()) _fs.close(); filebuf->file.open(std::ios::binary | std::ios::in); - // for(auto itr : filebuf->file.file_pos) - // { - // _fs.seekg(itr); // set to the absolute position - // if(_fs.eof()) break; - // auto _buffer = ring_buffer_t{}; - // _buffer.load(_fs); - // _data.emplace_back(std::move(_buffer)); - // } } } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/tests/CMakeLists.txt index ce93a71770..55d64f5617 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/tests/CMakeLists.txt @@ -1,19 +1,19 @@ # # # -project(rocprofiler-att-parser-tests LANGUAGES CXX) rocprofiler_deactivate_clang_tidy() add_executable(att-parser-tool-v3) +target_sources(att-parser-tool-v3 PRIVATE standalone_tool_main.cpp) target_link_libraries( att-parser-tool-v3 PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser rocprofiler-sdk::rocprofiler-sdk-json rocprofiler-sdk::rocprofiler-sdk-common-library) -target_sources(att-parser-tool-v3 PRIVATE standalone_tool_main.cpp) add_executable(att-decoder-test) +target_sources(att-decoder-test PRIVATE att_decoder_test.cpp) target_link_libraries( att-decoder-test PRIVATE rocprofiler-sdk::rocprofiler-sdk-att-parser @@ -23,19 +23,34 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-static-library GTest::gtest GTest::gtest_main) -target_sources(att-decoder-test PRIVATE att_decoder_test.cpp) -set(env-att-lib "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}/../lib") add_library(att_decoder_testing SHARED) +add_library(rocprofiler-sdk::att-decoder-testing ALIAS att_decoder_testing) target_sources(att_decoder_testing PRIVATE dummy_decoder.cpp) +set_target_properties(att_decoder_testing PROPERTIES VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR}) + +add_library(att_decoder_summary SHARED) +add_library(rocprofiler-sdk::att-decoder-summary ALIAS att_decoder_summary) +target_sources(att_decoder_summary PRIVATE dummy_decoder.cpp) +set_target_properties( + att_decoder_summary + PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/att + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR}) if(NOT ROCPROFILER_MEMCHECK) - gtest_add_tests( - TARGET att-decoder-test - SOURCES att_decoder_test.cpp - TEST_LIST att-decoder-test_TESTS - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_tests_properties( - ${att-decoder-test_TESTS} PROPERTIES ENVIRONMENT "${env-att-lib}" TIMEOUT 10 - LABELS "unittests") + set(IS_MEMCHECK OFF) +else() + set(IS_MEMCHECK ON) endif() + +set(env-att-lib "ROCPROF_ATT_LIBRARY_PATH=${PROJECT_BINARY_DIR}/lib") +gtest_add_tests( + TARGET att-decoder-test + SOURCES att_decoder_test.cpp + TEST_LIST att-decoder-test_TESTS + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +set_tests_properties( + ${att-decoder-test_TESTS} PROPERTIES ENVIRONMENT "${env-att-lib}" TIMEOUT 10 LABELS + "unittests" DISABLED "${IS_MEMCHECK}") diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/waitcnt/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/waitcnt/tests/CMakeLists.txt index b2958be3c2..8a13e80cbe 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/waitcnt/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/waitcnt/tests/CMakeLists.txt @@ -1,7 +1,6 @@ # # Waitcnt ISA tests # -project(rocprofiler-att-parser-waitcnt-tests LANGUAGES CXX) add_executable(att-decoder-waitcnt-test) target_link_libraries( diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp index 989f4c7330..7915baa61a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp @@ -68,6 +68,9 @@ struct att_perfcounter { std::string counter_name = {}; uint32_t simd_mask = 0xf; + + template + void save(ArchiveT&) const; }; struct config : output_config @@ -79,6 +82,9 @@ struct config : output_config uint64_t delay = 0; uint64_t duration = 0; uint64_t repeat = 0; + + template + void save(ArchiveT& ar) const; }; config(); @@ -114,10 +120,9 @@ struct config : output_config rocprofiler_pc_sampling_method_t pc_sampling_method_value = ROCPROFILER_PC_SAMPLING_METHOD_NONE; rocprofiler_pc_sampling_unit_t pc_sampling_unit_value = ROCPROFILER_PC_SAMPLING_UNIT_NONE; - std::string stats_summary_unit = get_env("ROCPROF_STATS_SUMMARY_UNITS", "nsec"); - int mpi_size = get_mpi_size(); - int mpi_rank = get_mpi_rank(); - uint64_t att_param_shader_engine_mask = + int mpi_size = get_mpi_size(); + int mpi_rank = get_mpi_rank(); + uint64_t att_param_shader_engine_mask = get_env("ROCPROF_ATT_PARAM_SHADER_ENGINE_MASK", 0x1); uint64_t att_param_buffer_size = get_env("ROCPROF_ATT_PARAM_BUFFER_SIZE", 0x6000000); uint64_t att_param_simd_select = get_env("ROCPROF_ATT_PARAM_SIMD_SELECT", 0xF); @@ -144,13 +149,30 @@ struct config : output_config {} }; +#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR)) +#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR)) + +template +void +att_perfcounter::save(ArchiveT& ar) const +{ + CFG_SERIALIZE_MEMBER(counter_name); + CFG_SERIALIZE_MEMBER(simd_mask); +} + +template +void +config::CollectionPeriod::save(ArchiveT& ar) const +{ + CFG_SERIALIZE_MEMBER(delay); + CFG_SERIALIZE_MEMBER(duration); + CFG_SERIALIZE_MEMBER(repeat); +} + template void config::save(ArchiveT& ar) const { -#define CFG_SERIALIZE_MEMBER(VAR) ar(cereal::make_nvp(#VAR, VAR)) -#define CFG_SERIALIZE_NAMED_MEMBER(NAME, VAR) ar(cereal::make_nvp(NAME, VAR)) - CFG_SERIALIZE_MEMBER(kernel_trace); CFG_SERIALIZE_MEMBER(hsa_core_api_trace); CFG_SERIALIZE_MEMBER(hsa_amd_ext_api_trace); @@ -163,19 +185,40 @@ config::save(ArchiveT& ar) const CFG_SERIALIZE_MEMBER(counter_collection); CFG_SERIALIZE_MEMBER(hip_runtime_api_trace); CFG_SERIALIZE_MEMBER(hip_compiler_api_trace); - CFG_SERIALIZE_MEMBER(kernel_rename); + CFG_SERIALIZE_MEMBER(rccl_api_trace); + CFG_SERIALIZE_MEMBER(rocdecode_api_trace); + + CFG_SERIALIZE_MEMBER(mpi_rank); + CFG_SERIALIZE_MEMBER(mpi_size); + CFG_SERIALIZE_MEMBER(collection_periods); CFG_SERIALIZE_MEMBER(counters); + CFG_SERIALIZE_MEMBER(extra_counters_contents); CFG_SERIALIZE_MEMBER(kernel_filter_include); CFG_SERIALIZE_MEMBER(kernel_filter_exclude); CFG_SERIALIZE_MEMBER(kernel_filter_range); CFG_SERIALIZE_MEMBER(demangle); CFG_SERIALIZE_MEMBER(truncate); + CFG_SERIALIZE_MEMBER(pc_sampling_method); + CFG_SERIALIZE_MEMBER(pc_sampling_unit); + CFG_SERIALIZE_MEMBER(pc_sampling_interval); + CFG_SERIALIZE_MEMBER(pc_sampling_method_value); + CFG_SERIALIZE_MEMBER(pc_sampling_unit_value); + + CFG_SERIALIZE_MEMBER(advanced_thread_trace); + CFG_SERIALIZE_MEMBER(att_serialize_all); + CFG_SERIALIZE_MEMBER(att_param_shader_engine_mask); + CFG_SERIALIZE_MEMBER(att_param_buffer_size); + CFG_SERIALIZE_MEMBER(att_param_simd_select); + CFG_SERIALIZE_MEMBER(att_param_target_cu); + CFG_SERIALIZE_MEMBER(att_capability); + CFG_SERIALIZE_MEMBER(att_param_perfcounters); + static_cast(*this).save(ar); +} #undef CFG_SERIALIZE_MEMBER #undef CFG_SERIALIZE_NAMED_MEMBER -} template config& diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index ed220f16b8..6bf7418073 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -1606,10 +1606,16 @@ using domain_stats_vec_t = tool::domain_stats_vec_t; template void -generate_output(tool::buffered_output& output_v, domain_stats_vec_t& contributions_v) +generate_output(tool::buffered_output& output_v, + uint64_t& num_output_v, + domain_stats_vec_t& contributions_v) { if(!output_v) return; + // if it has reached this point, the generator is not empty + num_output_v += 1; + + // opens temporary file and sets read position to beginning output_v.read(); if(tool::get_config().stats || tool::get_config().summary_output) @@ -1668,38 +1674,46 @@ tool_fini(void* /*tool_data*/) auto pc_sampling_host_trap_output = tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap}; - auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }; + auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }; + auto agents_output = CHECK_NOTNULL(tool_metadata)->agents; + std::sort(agents_output.begin(), agents_output.end(), node_id_sort); - auto _agents = CHECK_NOTNULL(tool_metadata)->agents; - std::sort(_agents.begin(), _agents.end(), node_id_sort); + uint64_t num_output = 0; + auto contributions = domain_stats_vec_t{}; - if(tool::get_config().csv_output) + generate_output(kernel_dispatch_output, num_output, contributions); + generate_output(hsa_output, num_output, contributions); + generate_output(hip_output, num_output, contributions); + generate_output(memory_copy_output, num_output, contributions); + generate_output(memory_allocation_output, num_output, contributions); + generate_output(marker_output, num_output, contributions); + generate_output(rccl_output, num_output, contributions); + generate_output(counters_output, num_output, contributions); + generate_output(scratch_memory_output, num_output, contributions); + generate_output(rocdecode_output, num_output, contributions); + generate_output(pc_sampling_host_trap_output, num_output, contributions); + + if(tool::get_config().advanced_thread_trace && !tool::get_config().att_capability.empty() && + !tool_metadata->att_filenames.empty()) { - tool::generate_csv(tool::get_config(), *tool_metadata, _agents); + num_output += 1; } - auto contributions = domain_stats_vec_t{}; + ROCP_INFO << "Number of services generating output: " << num_output; - generate_output(kernel_dispatch_output, contributions); - generate_output(hsa_output, contributions); - generate_output(hip_output, contributions); - generate_output(memory_copy_output, contributions); - generate_output(memory_allocation_output, contributions); - generate_output(marker_output, contributions); - generate_output(rccl_output, contributions); - generate_output(counters_output, contributions); - generate_output(scratch_memory_output, contributions); - generate_output(rocdecode_output, contributions); - generate_output(pc_sampling_host_trap_output, contributions); + if(tool::get_config().csv_output && num_output > 0) + { + tool::generate_csv(tool::get_config(), *tool_metadata, agents_output); + } - if(tool::get_config().stats && tool::get_config().csv_output) + if(tool::get_config().stats && tool::get_config().csv_output && num_output > 0) { tool::generate_csv(tool::get_config(), *tool_metadata, contributions); } if(tool::get_config().advanced_thread_trace) { - std::unordered_map + const std::unordered_map tool_att_capability_map = { {"testing", rocprofiler::att_wrapper::ATT_CAPABILITIES_TESTING}, {"summary", rocprofiler::att_wrapper::ATT_CAPABILITIES_SUMMARY}, @@ -1711,7 +1725,7 @@ tool_fini(void* /*tool_data*/) auto att_capability_value = tool_att_capability_map.at(tool::get_config().att_capability); auto decoder = rocprofiler::att_wrapper::ATTDecoder(att_capability_value); - ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found at ROCPORF_ATT_LIBRARY_PATH"; + ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found at ROCPROF_ATT_LIBRARY_PATH"; auto codeobj = tool_metadata->get_code_object_load_info(); auto output_path = tool::format_path(tool::get_config().output_path); for(auto& [dispatch_id, att_filename_data] : tool_metadata->att_filenames) @@ -1731,7 +1745,7 @@ tool_fini(void* /*tool_data*/) } } - if(tool::get_config().json_output) + if(tool::get_config().json_output && num_output > 0) { auto json_ar = tool::open_json(tool::get_config()); @@ -1757,11 +1771,11 @@ tool_fini(void* /*tool_data*/) tool::close_json(json_ar); } - if(tool::get_config().pftrace_output) + if(tool::get_config().pftrace_output && num_output > 0) { tool::write_perfetto(tool::get_config(), *tool_metadata, - _agents, + agents_output, hip_output.get_generator(), hsa_output.get_generator(), kernel_dispatch_output.get_generator(), @@ -1773,7 +1787,7 @@ tool_fini(void* /*tool_data*/) rocdecode_output.get_generator()); } - if(tool::get_config().otf2_output) + if(tool::get_config().otf2_output && num_output > 0) { auto hip_elem_data = hip_output.load_all(); auto hsa_elem_data = hsa_output.load_all(); @@ -1788,7 +1802,7 @@ tool_fini(void* /*tool_data*/) tool::write_otf2(tool::get_config(), *tool_metadata, getpid(), - _agents, + agents_output, &hip_elem_data, &hsa_elem_data, &kernel_dispatch_elem_data, @@ -1800,7 +1814,7 @@ tool_fini(void* /*tool_data*/) &rocdecode_elem_data); } - if(tool::get_config().summary_output) + if(tool::get_config().summary_output && num_output > 0) { tool::generate_stats(tool::get_config(), *tool_metadata, contributions); } diff --git a/projects/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/tests/CMakeLists.txt index ef26a2975e..642b3075e5 100644 --- a/projects/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/CMakeLists.txt @@ -33,6 +33,12 @@ include(GNUInstallDirs) # always use lib instead of lib64 set(CMAKE_INSTALL_LIBDIR "lib") +# define the library output directory +if(PROJECT_IS_TOP_LEVEL) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}") + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}") +endif() + # needed for validation find_package(Python3 REQUIRED) diff --git a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt index 3a4942cf0b..bd2acb4115 100644 --- a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt @@ -36,6 +36,14 @@ set(AMDGPU_TARGETS "GPU targets to compile for AMDGPUs (update GPU_TARGETS, not this variable)" FORCE) +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24) + cmake_policy(SET CMP0135 NEW) +endif() + +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30) + cmake_policy(SET CMP0167 NEW) +endif() + # build flags add_library(rocprofiler-sdk-tests-build-flags INTERFACE) add_library(rocprofiler-sdk::tests-build-flags ALIAS rocprofiler-sdk-tests-build-flags) @@ -78,7 +86,6 @@ if(NOT TARGET rocprofiler-sdk::rocprofiler-sdk-cereal) ${PROJECT_BINARY_DIR}/external/build/cereal-build SUBBUILD_DIR ${PROJECT_BINARY_DIR}/external/build/cereal-subdir) - # This particular version of projD requires workarounds fetchcontent_getproperties(cereal) if(NOT cereal_POPULATED) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt index cfe9f89c11..7365ec2ec1 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt @@ -21,8 +21,8 @@ find_package(rocprofiler-sdk REQUIRED) add_test( NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute COMMAND - $ --advanced-thread-trace 1 - --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD + $ --log-level env --advanced-thread-trace + 1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing --att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json ${PRELOAD_ARGS} -- @@ -31,14 +31,13 @@ add_test( set_tests_properties( rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH} - FAIL_REGULAR_EXPRESSION "HSA_API|HIP_API") + LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}) # hsa multiqueue dependency test add_test( NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute COMMAND - $ --advanced-thread-trace 1 - --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD + $ --log-level env --advanced-thread-trace + 1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing --att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json ${PRELOAD_ARGS} -- @@ -47,22 +46,20 @@ add_test( set_tests_properties( rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - ATT_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib FAIL_REGULAR_EXPRESSION - "HSA_API|HIP_API") + ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) # hsa multiqueue dependency test add_test( NAME rocprofv3-test-hsa-multiqueue-att-json-execute COMMAND - $ --att-library-path - ${CMAKE_BINARY_DIR}/lib -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i + $ --log-level env --att-library-path + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -d + ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i ${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} -- $) -set_tests_properties( - rocprofv3-test-hsa-multiqueue-att-json-execute - PROPERTIES TIMEOUT 45 LABELS "integration-tests" FAIL_REGULAR_EXPRESSION - "HSA_API|HIP_API") +set_tests_properties(rocprofv3-test-hsa-multiqueue-att-json-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests") add_test( NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate @@ -98,3 +95,101 @@ set_tests_properties( PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS "rocprofv3-test-hsa-multiqueue-att-json-execute" FAIL_REGULAR_EXPRESSION "AssertionError") + +if(TARGET rocprofiler-sdk::att-decoder-testing AND TARGET + rocprofiler-sdk::att-decoder-summary) + set(MISSING_TEST_DECODER_LIBS OFF) +else() + set(MISSING_TEST_DECODER_LIBS ON) +endif() + +function(configure_att_input _FILENAME _OUTDIR) + set(LIBRARY_OUTPUT_DIR ${_OUTDIR}) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/att_input.yml.in + ${CMAKE_CURRENT_BINARY_DIR}/${_FILENAME} @ONLY) +endfunction() + +configure_att_input(att_input.yml "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + +# +# Uses att_library_path in YAML input to specify ATT library path +# +add_test( + NAME rocprofv3-test-att-library-path-yaml-input + COMMAND $ -i + ${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-yaml-input + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED + "${MISSING_TEST_DECODER_LIBS}") + +add_test( + NAME rocprofv3-test-att-library-path-yaml-input-will-fail + COMMAND + $ -i + ${CMAKE_CURRENT_BINARY_DIR}/att_input_will_fail.yml --log-level env --echo -- + sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-yaml-input-will-fail + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED + "${MISSING_TEST_DECODER_LIBS}") + +# +# Uses --att-library-path to specify ATT library path +# +add_test( + NAME rocprofv3-test-att-library-path-cmd-line + COMMAND + $ --att --att-library-path + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse summary --log-level env --echo + -- sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-cmd-line + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED + "${MISSING_TEST_DECODER_LIBS}") + +add_test( + NAME rocprofv3-test-att-library-path-cmd-line-will-fail + COMMAND + $ --att --att-library-path + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing --log-level env --echo + -- sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-cmd-line-will-fail + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED + "${MISSING_TEST_DECODER_LIBS}") + +# +# Uses ROCPROF_ATT_LIBRARY_PATH to specify ATT library path +# +add_test(NAME rocprofv3-test-att-library-path-env-var + COMMAND $ --att --att-parse summary + --log-level env --echo -- sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-env-var + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" DISABLED + "${MISSING_TEST_DECODER_LIBS}") + +add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail + COMMAND $ --att --att-parse testing + --log-level env --echo -- sleep 0) + +set_tests_properties( + rocprofv3-test-att-library-path-env-var-will-fail + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + ENVIRONMENT + "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" + WILL_FAIL + ON + DISABLED + "${MISSING_TEST_DECODER_LIBS}") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json index 6a27d932d2..03dea9e053 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json @@ -9,9 +9,9 @@ ], "truncate_kernels": true, "advanced_thread_trace": true, - "att_parse" : "testing", - "att_target_cu" : 1, - "att_shader_engine_mask" : "0x11", + "att_parse": "testing", + "att_target_cu": 1, + "att_shader_engine_mask": "0x11", "att_simd_select": "0x3", "att_buffer_size": "0x6000000", "att_perfcounters": "SQ_WAVES:0x1 SQ_INSTS_VALU:0x3 SQ_INSTS_SALU:0xF" diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in new file mode 100644 index 0000000000..c33de7b07d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in @@ -0,0 +1,30 @@ +jobs: + - advanced_thread_trace: True + att_parse: summary + att_library_path: + - @LIBRARY_OUTPUT_DIR@/att + + - advanced_thread_trace: True + att_parse: summary + att_library_path: + - @LIBRARY_OUTPUT_DIR@/att + - @LIBRARY_OUTPUT_DIR@ + + - advanced_thread_trace: True + att_parse: testing + att_library_path: + - @LIBRARY_OUTPUT_DIR@/att + - @LIBRARY_OUTPUT_DIR@ + + - advanced_thread_trace: True + att_parse: testing + + - advanced_thread_trace: True + att_parse: testing + att_library_path: + - @LIBRARY_OUTPUT_DIR@ + + - advanced_thread_trace: True + att_parse: testing + att_library_path: + - @LIBRARY_OUTPUT_DIR@