From 0d939edbbaa9efa7670221f7d73cc7b9cebc6ff2 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Thu, 22 Feb 2024 00:16:43 -0600 Subject: [PATCH] Updates/fixes for CI, docs, tests, samples, and common library (#528) - .github/workflows/continuous_integration.yml - apt-get update before apt-get install - remove libgtest-dev - actions-comment-pull-request: v2.4.3 -> v2.5.0 - .github/workflows/formatting.yml - create-pull-request: v5 -> v6 - cmake/rocprofiler_options.cmake - remove unused ROCPROFILER_DEBUG_TRACE and ROCPROFILER_LD_AQLPROFILE options - samples/counter_collection/callback_client.cpp - corr_id field renamed to correlation_id - samples/counter_collection/client.cpp - corr_id field renamed to correlation_id - include/rocprofiler-sdk/fwd.h - In rocprofiler_record_counter_t: rename corr_id field to correlation_id - doxygen fixes - lib/common/utility.* - remove get_accurate_clock_id_impl - timestamp_ns() defaults to CLOCK_BOOTTIME - lib/rocprofiler-sdk/counters/core.cpp - fix spelling mistake: extrenal -> external - corr_id field renamed to correlation_id - lib/rocprofiler-sdk-tool/tool.cpp - fix destruction of static tool::output_file before finalization - scripts/update-docs.sh - define PROJECT_NAME - tests/async-copy-tracing/validate.py - init_time and fini_time checks - hip_api_traces, marker_api_tracing - tests/common/serialization.hpp - fix save function for rocprofiler_record_counter_t following rename of corr_id to correlation_id - tests/kernel-tracing/validate.py - init_time and fini_time checks - relax test_total_runtime range - tests/rocprofv3/tracing/CMakeLists.txt - remove -M from rocprofv3-test-systrace-execute - exclude test_hsa_api_trace in rocprofv3-test-systrace-validate due to HIP API tracing - tests/rocprofv3/tracing/validate.py - update test_kernel_trace to accept mangled or demangled - tests/tools/json-tool.cpp - remove use of GLOG - include init_time and fini_time - write_json(...) function --- .github/workflows/continuous_integration.yml | 10 +- .github/workflows/formatting.yml | 6 +- cmake/rocprofiler_options.cmake | 3 - .../counter_collection/callback_client.cpp | 2 +- samples/counter_collection/client.cpp | 2 +- source/include/rocprofiler-sdk/fwd.h | 23 +- source/lib/common/utility.cpp | 27 +- source/lib/common/utility.hpp | 38 +-- source/lib/rocprofiler-sdk-tool/tool.cpp | 257 +++++++++++------- source/lib/rocprofiler-sdk/counters/core.cpp | 10 +- source/scripts/update-docs.sh | 2 +- tests/async-copy-tracing/validate.py | 45 +-- tests/common/serialization.hpp | 8 +- tests/kernel-tracing/validate.py | 37 ++- tests/rocprofv3/tracing/CMakeLists.txt | 5 +- tests/rocprofv3/tracing/validate.py | 10 +- tests/tools/json-tool.cpp | 40 ++- 17 files changed, 292 insertions(+), 233 deletions(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 260acd4b9f..afaf12fd0f 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -89,7 +89,7 @@ jobs: run: | git config --global --add safe.directory '*' apt-get update - apt-get install -y cmake clang-tidy-11 g++-11 g++-12 libgtest-dev python3-pip + apt-get install -y cmake clang-tidy-11 g++-11 g++-12 python3-pip update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 10 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12 @@ -257,7 +257,8 @@ jobs: shell: bash run: | git config --global --add safe.directory '*' - apt-get install -y cmake libgtest-dev python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig + apt-get update + apt-get install -y cmake python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig python3 -m pip install -r requirements.txt python3 -m pip install pytest pycobertura @@ -356,7 +357,7 @@ jobs: - name: Write Code Coverage Comment if: github.event_name == 'pull_request' timeout-minutes: 5 - uses: thollander/actions-comment-pull-request@v2.4.3 + uses: thollander/actions-comment-pull-request@v2.5.0 with: comment_tag: codecov-report filePath: .codecov/report.md @@ -471,7 +472,8 @@ jobs: shell: bash run: | git config --global --add safe.directory '*' - apt-get install -y cmake libgtest-dev python3-pip libasan8 libtsan2 software-properties-common + apt-get update + apt-get install -y cmake python3-pip libasan8 libtsan2 software-properties-common python3 -m pip install -r requirements.txt python3 -m pip install pytest add-apt-repository ppa:ubuntu-toolchain-r/test diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index a72bf6b422..daf0e1bc2a 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -48,7 +48,7 @@ jobs: - name: Create pull request if: failure() - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: commit-message: "cmake formatting (cmake-format)" branch: ${{ steps.extract_branch.outputs.branch }}-cmake-format @@ -90,7 +90,7 @@ jobs: - name: Create pull request if: failure() - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: commit-message: "source formatting (clang-format v11)" branch: ${{ steps.extract_branch.outputs.branch }}-clang-format @@ -137,7 +137,7 @@ jobs: - name: Create pull request if: failure() - uses: peter-evans/create-pull-request@v5 + uses: peter-evans/create-pull-request@v6 with: commit-message: "python formatting (black)" branch: ${{ steps.extract_branch.outputs.branch }}-python-format diff --git a/cmake/rocprofiler_options.cmake b/cmake/rocprofiler_options.cmake index 6f6faba343..fe3b073b57 100644 --- a/cmake/rocprofiler_options.cmake +++ b/cmake/rocprofiler_options.cmake @@ -56,9 +56,6 @@ if(ROCPROFILER_BUILD_TESTS) "Enable building gtest (Google testing) library internally" ON ADVANCED) endif() -rocprofiler_add_option(ROCPROFILER_DEBUG_TRACE "Enable debug tracing" OFF ADVANCED) -rocprofiler_add_option(ROCPROFILER_LD_AQLPROFILE - "Enable direct loading of AQL-profile HSA extension" OFF ADVANCED) rocprofiler_add_option(ROCPROFILER_ENABLE_CLANG_TIDY "Enable clang-tidy checks" OFF ADVANCED) diff --git a/samples/counter_collection/callback_client.cpp b/samples/counter_collection/callback_client.cpp index 2c3ea18db4..82ad66c547 100644 --- a/samples/counter_collection/callback_client.cpp +++ b/samples/counter_collection/callback_client.cpp @@ -79,7 +79,7 @@ record_callback(rocprofiler_queue_id_t, for(size_t i = 0; i < record_count; ++i) { ss << "(Id: " << record_data[i].id << " Value [D]: " << record_data[i].counter_value - << " Corr_Id: " << record_data[i].corr_id.internal << "),"; + << " Corr_Id: " << record_data[i].correlation_id.internal << "),"; } auto* output_stream = static_cast(callback_data_args); if(!output_stream) throw std::runtime_error{"nullptr to output stream"}; diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index 24ec2edebc..8e82640887 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -99,7 +99,7 @@ buffered_callback(rocprofiler_context_id_t, // Print the returned counter data. auto* record = static_cast(header->payload); ss << "(Id: " << record->id << " Value [D]: " << record->counter_value - << " Corr_Id: " << record->corr_id.internal << "),"; + << " Corr_Id: " << record->correlation_id.internal << "),"; } } diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index e85266f522..fc4e59297d 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -344,8 +344,8 @@ typedef uint64_t rocprofiler_counter_dimension_id_t; */ typedef union rocprofiler_user_data_t { - uint64_t value; - void* ptr; + uint64_t value; ///< usage example: set to process id, thread id, etc. + void* ptr; ///< usage example: set to address of data allocation } rocprofiler_user_data_t; //--------------------------------------------------------------------------------------// @@ -504,10 +504,12 @@ rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind) */ typedef struct { - const char* name; - size_t instance_size; - rocprofiler_counter_dimension_id_t - id; //<< Id for this dimension used by @ref rocprofiler_query_record_dimension_position + const char* name; + size_t instance_size; + rocprofiler_counter_dimension_id_t id; + + /// @var id + /// @brief Id for this dimension used by @ref rocprofiler_query_record_dimension_position } rocprofiler_record_dimension_info_t; /** @@ -515,9 +517,12 @@ typedef struct */ typedef struct { - rocprofiler_counter_instance_id_t id; - double counter_value; //<< counter value - rocprofiler_correlation_id_t corr_id; + rocprofiler_counter_instance_id_t id; ///< counter identifier + double counter_value; ///< counter value + rocprofiler_correlation_id_t correlation_id; + + /// @var correlation_id + /// @brief Used to correlate the kernel data to an API call } rocprofiler_record_counter_t; /** diff --git a/source/lib/common/utility.cpp b/source/lib/common/utility.cpp index 66ff3ccddf..8ab6bea2dc 100644 --- a/source/lib/common/utility.cpp +++ b/source/lib/common/utility.cpp @@ -62,35 +62,10 @@ get_clock_name(clockid_t _id) default: break; } return "CLOCK_UNKNOWN"; +#undef CLOCK_NAME_CASE_STATEMENT } } // namespace -clockid_t -get_accurate_clock_id_impl() -{ - auto clock = CLOCK_MONOTONIC; - utsname kernelInfo; - if(uname(&kernelInfo) == 0) - { - try - { - std::string ver = kernelInfo.release; - size_t idx; - int major = std::stoi(ver, &idx); - int minor = std::stoi(ver.substr(idx + 1)); - if(major > 4 || ((major == 4) && (minor >= 4))) - { - clock = CLOCK_MONOTONIC_RAW; - } - } catch(...) - { - // Kernel version string doesn't conform to the standard pattern. - // Keep using the "safe" (non-RAW) clock. - } - } - return clock; -} - uint64_t get_clock_period_ns_impl(clockid_t _clk_id) { diff --git a/source/lib/common/utility.hpp b/source/lib/common/utility.hpp index 1ddb66d44a..162af7e840 100644 --- a/source/lib/common/utility.hpp +++ b/source/lib/common/utility.hpp @@ -48,9 +48,6 @@ namespace rocprofiler { namespace common { -clockid_t -get_accurate_clock_id_impl(); - uint64_t get_clock_period_ns_impl(clockid_t _clk_id); @@ -62,20 +59,6 @@ get_tid() return _v; } -inline clockid_t -get_accurate_clock_id() -{ - static auto clk_id = get_accurate_clock_id_impl(); - return clk_id; -} - -inline uint64_t -get_accurate_clock_period_ns() -{ - static auto clk_period = get_clock_period_ns_impl(get_accurate_clock_id()); - return clk_period; -} - inline uint64_t get_ticks(clockid_t clk_id_v) noexcept { @@ -92,24 +75,17 @@ get_ticks(clockid_t clk_id_v) noexcept return (static_cast(ts.tv_sec) * nanosec) + static_cast(ts.tv_nsec); } -// this equates to HSA-runtime library implementation of os::ReadAccurateClock() +// CLOCK_MONOTONIC_RAW equates to HSA-runtime library implementation of os::ReadAccurateClock() +// CLOCK_BOOTTIME equates to HSA-runtime library implementation of os::ReadSystemClock() +template inline uint64_t timestamp_ns() { - auto&& clk_period = get_accurate_clock_period_ns(); - if(ROCPROFILER_LIKELY(clk_period == 1)) return get_ticks(get_accurate_clock_id()); - return get_ticks(get_accurate_clock_id()) / clk_period; -} + constexpr auto _clk = ClockT; + static auto _clk_period = get_clock_period_ns_impl(_clk); -// this equates to HSA-runtime library implementation of os::ReadSystemClock() -inline uint64_t -system_timestamp_ns() -{ - constexpr auto boottime_clk = CLOCK_BOOTTIME; - static auto boottime_clk_period = get_clock_period_ns_impl(boottime_clk); - - if(ROCPROFILER_LIKELY(boottime_clk_period == 1)) return get_ticks(boottime_clk); - return get_ticks(boottime_clk) / boottime_clk_period; + if(ROCPROFILER_LIKELY(_clk_period == 1)) return get_ticks(_clk); + return get_ticks(_clk) / _clk_period; } std::vector diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 5c60b7f368..c67c48f750 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -52,104 +52,154 @@ #include #include -namespace common = ::rocprofiler::common; -namespace tool = ::rocprofiler::tool; -static const uint32_t lds_block_size = 128 * 4; +namespace common = ::rocprofiler::common; +namespace tool = ::rocprofiler::tool; namespace -{} // namespace +{ +constexpr uint32_t lds_block_size = 128 * 4; -auto& +auto destructors = new std::vector>{}; + +template +Tp& +get_dereference(Tp* ptr) +{ + return *CHECK_NOTNULL(ptr); +} + +template +void +add_destructor(Tp*& ptr) +{ + static auto _mutex = std::mutex{}; + auto _lk = std::unique_lock{_mutex}; + destructors->emplace_back([&ptr]() { + delete ptr; + ptr = nullptr; + }); +} + +#define ADD_DESTRUCTOR(PTR) \ + { \ + static auto _once = std::once_flag{}; \ + std::call_once(_once, []() { add_destructor(PTR); }); \ + } + +tool::output_file*& get_hsa_api_file() { - static auto _v = tool::output_file{"hsa_api_trace", - tool::csv::api_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; + static auto* _v = new tool::output_file{"hsa_api_trace", + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + ADD_DESTRUCTOR(_v); return _v; } -auto& +tool::output_file*& get_hip_api_file() { - static auto _v = tool::output_file{"hip_api_trace", - tool::csv::api_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; + static auto* _v = new tool::output_file{"hip_api_trace", + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + ADD_DESTRUCTOR(_v); return _v; } -auto& +tool::output_file*& get_kernel_trace_file() { - static auto _v = tool::output_file{"kernel_trace", - tool::csv::kernel_trace_csv_encoder{}, - {"Kind", - "Agent_Id", - "Queue_Id", - "Kernel_Id", - "Kernel_Name", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp", - "Private_Segment_Size", - "Group_Segment_Size", - "Workgroup_Size_X", - "Workgroup_Size_Y", - "Workgroup_Size_Z", - "Grid_Size_X", - "Grid_Size_Y", - "Grid_Size_Z"}}; + static auto* _v = new tool::output_file{"kernel_trace", + tool::csv::kernel_trace_csv_encoder{}, + {"Kind", + "Agent_Id", + "Queue_Id", + "Kernel_Id", + "Kernel_Name", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp", + "Private_Segment_Size", + "Group_Segment_Size", + "Workgroup_Size_X", + "Workgroup_Size_Y", + "Workgroup_Size_Z", + "Grid_Size_X", + "Grid_Size_Y", + "Grid_Size_Z"}}; + ADD_DESTRUCTOR(_v); return _v; } -auto& +tool::output_file*& get_counter_collection_file() { - static auto _v = tool::output_file{"counter_collection", - tool::csv::counter_collection_csv_encoder{}, - {"Counter_Id", - "Agent_Id", - "Queue_Id", - "Process_Id", - "Thread_Id", - "Grid_Size", - "Kernel-Name", - "Workgroup_Size", - "LDS_Block_Size", - "Scratch_Size", - "VGPR_Count", - "SGPR_Count", - "Counter_Name", - "Counter_Value"}}; + static auto* _v = new tool::output_file{"counter_collection", + tool::csv::counter_collection_csv_encoder{}, + {"Counter_Id", + "Agent_Id", + "Queue_Id", + "Process_Id", + "Thread_Id", + "Grid_Size", + "Kernel-Name", + "Workgroup_Size", + "LDS_Block_Size", + "Scratch_Size", + "VGPR_Count", + "SGPR_Count", + "Counter_Name", + "Counter_Value"}}; + ADD_DESTRUCTOR(_v); return _v; } -auto& +tool::output_file*& get_memory_copy_trace_file() { - static auto _v = tool::output_file{"memory_copy_trace", - tool::csv::memory_copy_csv_encoder{}, - {"Kind", - "Direction", - "Source_Agent_Id", - "Destination_Agent_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; + static auto* _v = new tool::output_file{"memory_copy_trace", + tool::csv::memory_copy_csv_encoder{}, + {"Kind", + "Direction", + "Source_Agent_Id", + "Destination_Agent_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + ADD_DESTRUCTOR(_v); return _v; } +tool::output_file*& +get_marker_api_file() +{ + static auto* _v = new tool::output_file{"marker_api_trace", + tool::csv::marker_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + ADD_DESTRUCTOR(_v); + return _v; +} + +#undef ADD_DESTRUCTOR + struct marker_entry { uint64_t cid = 0; @@ -159,21 +209,6 @@ struct marker_entry std::string message = {}; }; -auto& -get_marker_api_file() -{ - static auto _v = tool::output_file{"marker_api_trace", - tool::csv::marker_csv_encoder{}, - {"Domain", - "Function", - "Process_Id", - "Thread_Id", - "Correlation_Id", - "Start_Timestamp", - "End_Timestamp"}}; - return _v; -} - struct buffer_ids { rocprofiler_buffer_id_t hsa_api_trace = {}; @@ -283,7 +318,7 @@ cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record, record.correlation_id.internal, user_data->value, ts); - get_marker_api_file() << ss.str(); + get_dereference(get_marker_api_file()) << ss.str(); } } } @@ -319,7 +354,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, record.correlation_id.internal, ts, ts); - get_marker_api_file() << ss.str(); + get_dereference(get_marker_api_file()) << ss.str(); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA) @@ -349,7 +384,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, auto ss = std::stringstream{}; tool::csv::marker_csv_encoder::write_row( ss, kind_name, val.message, val.pid, val.tid, val.cid, val.data.value, ts); - get_marker_api_file() << ss.str(); + get_dereference(get_marker_api_file()) << ss.str(); } } else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA) @@ -384,7 +419,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, _entry.cid, _entry.data.value, ts); - get_marker_api_file() << ss.str(); + get_dereference(get_marker_api_file()) << ss.str(); global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id); } @@ -408,7 +443,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, record.correlation_id.internal, user_data->value, ts); - get_marker_api_file() << ss.str(); + get_dereference(get_marker_api_file()) << ss.str(); } } } @@ -504,7 +539,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, record->grid_size.y, record->grid_size.z); - get_kernel_trace_file() << kernel_trace_ss.str(); + get_dereference(get_kernel_trace_file()) << kernel_trace_ss.str(); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API || @@ -525,7 +560,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, record->start_timestamp, record->end_timestamp); - get_hsa_api_file() << hsa_trace_ss.str(); + get_dereference(get_hsa_api_file()) << hsa_trace_ss.str(); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) { @@ -543,7 +578,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, record->start_timestamp, record->end_timestamp); - get_memory_copy_trace_file() << memory_copy_trace_ss.str(); + get_dereference(get_memory_copy_trace_file()) << memory_copy_trace_ss.str(); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API || header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) @@ -562,7 +597,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, record->start_timestamp, record->end_timestamp); - get_hip_api_file() << hip_trace_ss.str(); + get_dereference(get_hip_api_file()) << hip_trace_ss.str(); } else { @@ -575,7 +610,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, { auto* profiler_record = static_cast(header->payload); rocprofiler_tool_kernel_properties_t kernel_properties = - GetKernelProperties(profiler_record->corr_id.internal); + GetKernelProperties(profiler_record->correlation_id.internal); rocprofiler_counter_id_t counter_id; size_t pos; rocprofiler_counter_info_v0_t version; @@ -606,7 +641,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, fmt::format("{}[{}]", version.name, pos), profiler_record->counter_value); - get_counter_collection_file() << counter_collection_ss.str(); + get_dereference(get_counter_collection_file()) << counter_collection_ss.str(); } } } @@ -925,27 +960,45 @@ tool_fini(void* tool_data) flush(); rocprofiler_stop_context(get_client_ctx()); + if(destructors) + { + for(const auto& itr : *destructors) + itr(); + delete destructors; + destructors = nullptr; + } + (void) (tool_data); } +} // namespace extern "C" rocprofiler_tool_configure_result_t* -rocprofiler_configure(uint32_t /*version*/, - const char* /*runtime_version*/, +rocprofiler_configure(uint32_t version, + const char* runtime_version, uint32_t priority, rocprofiler_client_id_t* id) { common::init_logging("ROCPROF_LOG_LEVEL"); FLAGS_colorlogtostderr = true; - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name - id->name = "rocprofiler-tool"; + id->name = "rocprofv3"; // store client info client_identifier = id; + // note that rocprofv3 is not the primary tool + LOG_IF(WARNING, priority > 0) << id->name << " has a priority of " << priority + << " (not primary tool)"; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + LOG(INFO) << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch + << " (" << runtime_version << ")"; + // create configure data static auto cfg = rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr}; diff --git a/source/lib/rocprofiler-sdk/counters/core.cpp b/source/lib/rocprofiler-sdk/counters/core.cpp index 21fe1435d0..e3649ab8d9 100644 --- a/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/core.cpp @@ -265,10 +265,10 @@ queue_cb(const std::shared_ptr& info, if(const auto* _corr_id = correlation_id) { _corr_id_v.internal = _corr_id->internal; - if(const auto* extrenal = + if(const auto* external = rocprofiler::common::get_val(extern_corr_ids, info->internal_context)) { - _corr_id_v.external = *extrenal; + _corr_id_v.external = *external; } } @@ -380,10 +380,10 @@ completed_cb(const std::shared_ptr& info, if(const auto* _corr_id = session.correlation_id) { _corr_id_v.internal = _corr_id->internal; - if(const auto* extrenal = + if(const auto* external = rocprofiler::common::get_val(session.extern_corr_ids, info->internal_context)) { - _corr_id_v.external = *extrenal; + _corr_id_v.external = *external; } } @@ -396,7 +396,7 @@ completed_cb(const std::shared_ptr& info, for(auto& val : *ret) { - val.corr_id = _corr_id_v; + val.correlation_id = _corr_id_v; if(buf) buf->emplace(ROCPROFILER_BUFFER_CATEGORY_COUNTERS, 0, val); else diff --git a/source/scripts/update-docs.sh b/source/scripts/update-docs.sh index c6660d8862..09357af319 100755 --- a/source/scripts/update-docs.sh +++ b/source/scripts/update-docs.sh @@ -21,7 +21,7 @@ message "Changing directory to ${WORK_DIR}" cd ${WORK_DIR} message "Generating rocprofiler-sdk.dox" -cmake -DSOURCE_DIR=${SOURCE_DIR} -P ${WORK_DIR}/generate-doxyfile.cmake +cmake -DSOURCE_DIR=${SOURCE_DIR} -DPROJECT_NAME="Rocprofiler SDK" -P ${WORK_DIR}/generate-doxyfile.cmake message "Generating doxygen xml files" doxygen rocprofiler-sdk.dox diff --git a/tests/async-copy-tracing/validate.py b/tests/async-copy-tracing/validate.py index 1e03d16b03..7e435dc362 100644 --- a/tests/async-copy-tracing/validate.py +++ b/tests/async-copy-tracing/validate.py @@ -8,7 +8,8 @@ import pytest def node_exists(name, data, min_len=1): assert name in data assert data[name] is not None - assert len(data[name]) >= min_len + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len def test_data_structure(input_data): @@ -19,6 +20,12 @@ def test_data_structure(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] + node_exists("metadata", sdk_data) + node_exists("pid", sdk_data["metadata"]) + node_exists("main_tid", sdk_data["metadata"]) + node_exists("init_time", sdk_data["metadata"]) + node_exists("fini_time", sdk_data["metadata"]) + node_exists("agents", sdk_data) node_exists("call_stack", sdk_data) node_exists("callback_records", sdk_data) @@ -28,11 +35,15 @@ def test_data_structure(input_data): node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) + node_exists("hip_api_traces", sdk_data["callback_records"], 0) + node_exists("marker_api_traces", sdk_data["callback_records"]) node_exists("names", sdk_data["buffer_records"]) node_exists("kernel_dispatches", sdk_data["buffer_records"]) node_exists("memory_copies", sdk_data["buffer_records"], 4) node_exists("hsa_api_traces", sdk_data["buffer_records"]) + node_exists("hip_api_traces", sdk_data["buffer_records"], 0) + node_exists("marker_api_traces", sdk_data["buffer_records"]) def test_timestamps(input_data): @@ -41,7 +52,7 @@ def test_timestamps(input_data): cb_start = {} cb_end = {} - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: cid = itr["record"]["correlation_id"]["internal"] phase = itr["record"]["phase"] @@ -56,18 +67,20 @@ def test_timestamps(input_data): for itr in sdk_data["buffer_records"][titr]: assert itr["start_timestamp"] <= itr["end_timestamp"] - for itr in sdk_data["buffer_records"]["memory_copies"]: - assert itr["start_timestamp"] <= itr["end_timestamp"] + for titr in ["kernel_dispatches", "memory_copies"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] < itr["end_timestamp"] + assert itr["correlation_id"]["internal"] > 0 + assert itr["correlation_id"]["external"] > 0 + assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"] + assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"] - for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - assert itr["start_timestamp"] < itr["end_timestamp"] - assert itr["correlation_id"]["internal"] > 0 - assert itr["correlation_id"]["external"] > 0 - - api_start = cb_start[itr["correlation_id"]["internal"]] - api_end = cb_end[itr["correlation_id"]["internal"]] - assert api_start < itr["start_timestamp"] - assert api_end <= itr["end_timestamp"] + api_start = cb_start[itr["correlation_id"]["internal"]] + api_end = cb_end[itr["correlation_id"]["internal"]] + assert api_start < itr["start_timestamp"] + assert api_end <= itr["end_timestamp"] def test_internal_correlation_ids(input_data): @@ -75,7 +88,7 @@ def test_internal_correlation_ids(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] api_corr_ids = [] - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: api_corr_ids.append(itr["record"]["correlation_id"]["internal"]) @@ -101,7 +114,7 @@ def test_external_correlation_ids(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] extern_corr_ids = [] - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: assert itr["record"]["correlation_id"]["external"] > 0 assert ( @@ -110,7 +123,7 @@ def test_external_correlation_ids(input_data): extern_corr_ids.append(itr["record"]["correlation_id"]["external"]) extern_corr_ids = list(set(sorted(extern_corr_ids))) - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["buffer_records"][titr]: assert itr["correlation_id"]["external"] > 0 assert itr["thread_id"] == itr["correlation_id"]["external"] diff --git a/tests/common/serialization.hpp b/tests/common/serialization.hpp index 389be0899d..efff8acb47 100644 --- a/tests/common/serialization.hpp +++ b/tests/common/serialization.hpp @@ -205,9 +205,9 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_record_t data) { SAVE_DATA_FIELD(context_id); SAVE_DATA_FIELD(thread_id); - SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(kind); SAVE_DATA_FIELD(operation); + SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(phase); } @@ -217,8 +217,8 @@ save_buffer_tracing_api_record(ArchiveT& ar, Tp data) { SAVE_DATA_FIELD(size); SAVE_DATA_FIELD(kind); - SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(operation); + SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(start_timestamp); SAVE_DATA_FIELD(end_timestamp); SAVE_DATA_FIELD(thread_id); @@ -237,7 +237,7 @@ save(ArchiveT& ar, rocprofiler_record_counter_t data) { SAVE_DATA_FIELD(id); SAVE_DATA_FIELD(counter_value); - SAVE_DATA_FIELD(corr_id); + SAVE_DATA_FIELD(correlation_id); } template @@ -278,8 +278,8 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data) { SAVE_DATA_FIELD(size); SAVE_DATA_FIELD(kind); - SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(operation); + SAVE_DATA_FIELD(correlation_id); SAVE_DATA_FIELD(start_timestamp); SAVE_DATA_FIELD(end_timestamp); SAVE_DATA_FIELD(dst_agent_id); diff --git a/tests/kernel-tracing/validate.py b/tests/kernel-tracing/validate.py index 08c67553a3..4c6c01360e 100644 --- a/tests/kernel-tracing/validate.py +++ b/tests/kernel-tracing/validate.py @@ -8,7 +8,8 @@ import pytest def node_exists(name, data, min_len=1): assert name in data assert data[name] is not None - assert len(data[name]) >= min_len + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len def test_data_structure(input_data): @@ -19,6 +20,12 @@ def test_data_structure(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] + node_exists("metadata", sdk_data) + node_exists("pid", sdk_data["metadata"]) + node_exists("main_tid", sdk_data["metadata"]) + node_exists("init_time", sdk_data["metadata"]) + node_exists("fini_time", sdk_data["metadata"]) + node_exists("agents", sdk_data) node_exists("call_stack", sdk_data) node_exists("callback_records", sdk_data) @@ -60,18 +67,20 @@ def test_timestamps(input_data): for itr in sdk_data["buffer_records"][titr]: assert itr["start_timestamp"] <= itr["end_timestamp"] - for itr in sdk_data["buffer_records"]["memory_copies"]: - assert itr["start_timestamp"] <= itr["end_timestamp"] + for titr in ["kernel_dispatches", "memory_copies"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] < itr["end_timestamp"] + assert itr["correlation_id"]["internal"] > 0 + assert itr["correlation_id"]["external"] > 0 + assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"] + assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"] - for itr in sdk_data["buffer_records"]["kernel_dispatches"]: - assert itr["start_timestamp"] < itr["end_timestamp"] - assert itr["correlation_id"]["internal"] > 0 - assert itr["correlation_id"]["external"] > 0 - - api_start = cb_start[itr["correlation_id"]["internal"]] - api_end = cb_end[itr["correlation_id"]["internal"]] - assert api_start < itr["start_timestamp"] - assert api_end <= itr["end_timestamp"] + api_start = cb_start[itr["correlation_id"]["internal"]] + api_end = cb_end[itr["correlation_id"]["internal"]] + assert api_start < itr["start_timestamp"] + assert api_end <= itr["end_timestamp"] def test_total_runtime(input_data): @@ -84,8 +93,8 @@ def test_total_runtime(input_data): expected_runtime = 1.0e3 # one second in milliseconds - assert (sum(runtime_data) * 1.0e-6) >= (0.9 * expected_runtime) - assert (sum(runtime_data) * 1.0e-6) <= (1.1 * expected_runtime) + assert (sum(runtime_data) * 1.0e-6) >= (0.8 * expected_runtime) + assert (sum(runtime_data) * 1.0e-6) <= (1.2 * expected_runtime) def test_internal_correlation_ids(input_data): diff --git a/tests/rocprofv3/tracing/CMakeLists.txt b/tests/rocprofv3/tracing/CMakeLists.txt index c9c5601027..5c72cf70a1 100644 --- a/tests/rocprofv3/tracing/CMakeLists.txt +++ b/tests/rocprofv3/tracing/CMakeLists.txt @@ -84,7 +84,7 @@ set_tests_properties( add_test( NAME rocprofv3-test-systrace-execute COMMAND - $ -M --sys-trace -d + $ --sys-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-systrace -o out $) @@ -102,7 +102,8 @@ set_tests_properties( add_test( NAME rocprofv3-test-systrace-validate COMMAND - ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --hsa-input + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k + "not test_hsa_api_trace" --hsa-input ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-systrace/out_hsa_api_trace.csv --kernel-input ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-systrace/out_kernel_trace.csv diff --git a/tests/rocprofv3/tracing/validate.py b/tests/rocprofv3/tracing/validate.py index 310b80137a..d7e28faedd 100644 --- a/tests/rocprofv3/tracing/validate.py +++ b/tests/rocprofv3/tracing/validate.py @@ -2,7 +2,6 @@ import sys import pytest -import re def test_hsa_api_trace(hsa_input_data): @@ -37,13 +36,18 @@ def test_hsa_api_trace(hsa_input_data): def test_kernel_trace(kernel_input_data): - mangled_kernel_name = "_Z15matrixTransposePfS_i.kd" + valid_kernel_names = ( + "_Z15matrixTransposePfS_i.kd", + "matrixTranspose(float*, float*, int)", + ) + + assert len(kernel_input_data) == 1 for row in kernel_input_data: assert row["Kind"] == "KERNEL_DISPATCH" assert int(row["Agent_Id"]) > 0 assert int(row["Queue_Id"]) > 0 assert int(row["Kernel_Id"]) > 0 - assert row["Kernel_Name"] == mangled_kernel_name + assert row["Kernel_Name"] in valid_kernel_names assert int(row["Correlation_Id"]) > 0 assert int(row["Workgroup_Size_X"]) == 4 assert int(row["Workgroup_Size_Y"]) == 4 diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index 0c04ffd79c..e6ec8c467c 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -43,8 +43,6 @@ #include #include -#include - #include #include #include @@ -432,7 +430,7 @@ dispatch_callback(rocprofiler_queue_id_t, /*queue_id*/ // Counters we want to collect (here its SQ_WAVES_sum) auto* counters_env = getenv("ROCPROF_COUNTERS"); if(std::string(counters_env) != "SQ_WAVES_sum") - LOG(FATAL) << "Counter not supported in the test tool"; + throw std::runtime_error{"Counter not supported in the test tool"}; std::set counters_to_collect = {"SQ_WAVES_sum"}; // GPU Counter IDs @@ -728,9 +726,16 @@ auto buffers = std::array{&hsa_api_buffered_buffer, auto agents = std::vector{}; +rocprofiler_timestamp_t init_time = 0; +rocprofiler_timestamp_t fini_time = 0; +rocprofiler_thread_id_t main_tid = 0; + int tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { + rocprofiler_get_timestamp(&init_time); + rocprofiler_get_thread_id(&main_tid); + assert(tool_data != nullptr); rocprofiler_available_agents_cb_t iterate_cb = @@ -997,6 +1002,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) return 0; } +void +write_json(call_stack_t* _call_stack); + void tool_fini(void* tool_data) { @@ -1006,6 +1014,8 @@ tool_fini(void* tool_data) stop(); flush(); + rocprofiler_get_timestamp(&fini_time); + std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalizing... agents=" << agents.size() << ", code_object_callback_records=" << code_object_records.size() @@ -1027,6 +1037,17 @@ tool_fini(void* tool_data) _call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""}); } + write_json(_call_stack); + + std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n" + << std::flush; + + delete _call_stack; +} + +void +write_json(call_stack_t* _call_stack) +{ auto ofname = std::string{"rocprofiler-tool-results.json"}; if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname; @@ -1068,6 +1089,14 @@ tool_fini(void* tool_data) json_ar.setNextName("rocprofiler-sdk-json-tool"); json_ar.startNode(); + json_ar.setNextName("metadata"); + json_ar.startNode(); + json_ar(cereal::make_nvp("pid", getpid())); + json_ar(cereal::make_nvp("main_tid", main_tid)); + json_ar(cereal::make_nvp("init_time", init_time)); + json_ar(cereal::make_nvp("fini_time", fini_time)); + json_ar.finishNode(); + json_ar(cereal::make_nvp("agents", agents)); if(_call_stack) json_ar(cereal::make_nvp("call_stack", *_call_stack)); @@ -1114,11 +1143,6 @@ tool_fini(void* tool_data) *ofs << std::flush; if(cleanup) cleanup(ofs); - - std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n" - << std::flush; - - delete _call_stack; } void