From b610b5ff56483685d9a8cc6b5d64cf2b81771bd6 Mon Sep 17 00:00:00 2001 From: "Madsen, Jonathan" Date: Thu, 6 Feb 2025 08:34:56 -0600 Subject: [PATCH] SDK: No bg thread if no clients use SDK (#123) * SDK: No bg thread if no clients use SDK * Update CHANGELOG --------- Co-authored-by: Jonathan R. Madsen [ROCm/rocprofiler-sdk commit: 0fbe6cc7b6a522dd5a9c5a0b4266056372db2285] --- projects/rocprofiler-sdk/CHANGELOG.md | 2 + .../counters/tests/device_counting.cpp | 44 +++++++++---------- .../lib/rocprofiler-sdk/registration.cpp | 19 +++++++- .../tests/c-tool/CMakeLists.txt | 6 ++- projects/rocprofiler-sdk/tests/tools/c-tool.c | 28 ++++++++++++ 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index b5ea5c80b0..a86f43cc4d 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -157,6 +157,8 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec ### Changed +- SDK no longer creates a background thread when every tool returns a nullptr from `rocprofiler_configure`. + ### Resolved issues ### Removed diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/device_counting.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/device_counting.cpp index 074446986f..ea9f86c21c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/device_counting.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/device_counting.cpp @@ -77,11 +77,11 @@ findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set ret; const auto* all_counters = counters::getMetricMap(); - ROCP_WARNING << "Looking up counters for " << std::string(agent.name()); + ROCP_INFO << "Looking up counters for " << std::string(agent.name()); const auto* gfx_metrics = common::get_val(*all_counters, std::string(agent.name())); if(!gfx_metrics) { - ROCP_WARNING << "No counters found for " << std::string(agent.name()); + ROCP_INFO << "No counters found for " << std::string(agent.name()); return ret; } @@ -145,7 +145,7 @@ check_output_created(rocprofiler_context_id_t, break; } found_value = record->user_data.value; - // ROCP_WARNING << fmt::format("Found counter value: {}", record->counter_value); + // ROCP_INFO << fmt::format("Found counter value: {}", record->counter_value); global_recs().wlock([&](auto& data) { data.push_back(*record); }); } } @@ -202,7 +202,7 @@ gen_kernel_pkt(uint64_t obj) packet.kernel_dispatch.kernel_object = obj; packet.kernel_dispatch.kernarg_address = nullptr; packet.kernel_dispatch.completion_signal.handle = 0; - ROCP_WARNING << fmt::format("{:x}", packet.kernel_dispatch.kernel_object); + ROCP_INFO << fmt::format("{:x}", packet.kernel_dispatch.kernel_object); return packet; } @@ -334,7 +334,7 @@ protected: std::vector output_records(10000); // global_recs().clear(); track_metric++; - ROCP_WARNING << "Testing metric " << metric.name(); + ROCP_INFO << "Testing metric " << metric.name(); rocprofiler_context_id_t ctx = {.handle = 0}; ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed"); rocprofiler_buffer_id_t opt_buff_id = {.handle = 0}; @@ -387,8 +387,8 @@ protected: auto status = rocprofiler_start_context(ctx); if(status == ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS) { - ROCP_WARNING << fmt::format("No hardware counters for {}, skipping", - metric.name()); + ROCP_INFO << fmt::format("No hardware counters for {}, skipping", + metric.name()); continue; } else if(status != ROCPROFILER_STATUS_SUCCESS) @@ -516,8 +516,8 @@ protected: test_kernels kernel_loader(gpu_agent); auto kernel_handle = kernel_loader.load_kernel(gpu_agent, "null_kernel"); - ROCP_WARNING << fmt::format("Running test on agent {:x}", - gpu_agent.get_hsa_agent().handle); + ROCP_INFO << fmt::format("Running test on agent {:x}", + gpu_agent.get_hsa_agent().handle); const auto* agent_map = rocprofiler::common::get_val(counters::get_ast_map(), std::string(gpu_agent.name())); @@ -596,19 +596,19 @@ protected: UINT32_MAX, HSA_WAIT_STATE_ACTIVE); - ROCP_WARNING << "Processing Next..."; + ROCP_INFO << "Processing Next..."; auto decoded_pkt = counters::EvaluateAST::read_pkt(&pkt_constructor, *inst_pkts); CHECK(!decoded_pkt.empty()); - ROCP_WARNING << "Decoded Packet:"; + ROCP_INFO << "Decoded Packet:"; for(const auto& [id, data_vec] : decoded_pkt) { - ROCP_WARNING << fmt::format("\t[{} = {}]", id, fmt::join(data_vec, ",")); + ROCP_INFO << fmt::format("\t[{} = {}]", id, fmt::join(data_vec, ",")); } std::vector>> cache; auto* ret = counter_ast.evaluate(decoded_pkt, cache); CHECK(!ret->empty()); - ROCP_WARNING << fmt::format( + ROCP_INFO << fmt::format( "Final Decoded Counter Values: {} (iter={})", fmt::join(*ret, ","), i); CHECK_EQ(ret->size(), expected_values.size()); @@ -648,7 +648,7 @@ TEST_F(device_counting_service_test, sync_grbm_verify) { test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GRBM_COUNT"}, 50000); auto local_recs = global_recs().rlock([](const auto& data) { return data; }); - ROCP_WARNING << local_recs.size(); + ROCP_INFO << local_recs.size(); for(const auto& val : local_recs) { @@ -656,7 +656,7 @@ TEST_F(device_counting_service_test, sync_grbm_verify) rocprofiler_query_record_counter_id(val.id, &id); rocprofiler_counter_info_v0_t info; rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); - ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); EXPECT_GT(val.counter_value, 0.0); } } @@ -665,7 +665,7 @@ TEST_F(device_counting_service_test, sync_gpu_util_verify) { test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GPU_UTIL"}, 50000); auto local_recs = global_recs().rlock([](const auto& data) { return data; }); - ROCP_WARNING << local_recs.size(); + ROCP_INFO << local_recs.size(); for(const auto& val : local_recs) { @@ -673,7 +673,7 @@ TEST_F(device_counting_service_test, sync_gpu_util_verify) rocprofiler_query_record_counter_id(val.id, &id); rocprofiler_counter_info_v0_t info; rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); - ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); EXPECT_GT(val.counter_value, 0.0); } } @@ -682,7 +682,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify) { test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"SQ_WAVES_sum"}, 50000); auto local_recs = global_recs().rlock([](const auto& data) { return data; }); - ROCP_WARNING << local_recs.size(); + ROCP_INFO << local_recs.size(); for(const auto& val : local_recs) { @@ -690,7 +690,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify) rocprofiler_query_record_counter_id(val.id, &id); rocprofiler_counter_info_v0_t info; rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); - ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); EXPECT_GT(val.counter_value, 0.0); } } @@ -701,14 +701,14 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify_non_intercept) // deamon. if(!counters::counter_collection_has_device_lock()) { - ROCP_WARNING << "Unsupported kernel driver version, skipping test"; + ROCP_INFO << "Unsupported kernel driver version, skipping test"; GTEST_SKIP(); } ROCP_WARNING << "Running non-intercept test"; test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"SQ_WAVES_sum"}, 50000, true); auto local_recs = global_recs().rlock([](const auto& data) { return data; }); - ROCP_WARNING << local_recs.size(); + ROCP_INFO << local_recs.size(); for(const auto& val : local_recs) { @@ -716,7 +716,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify_non_intercept) rocprofiler_query_record_counter_id(val.id, &id); rocprofiler_counter_info_v0_t info; rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); - ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); EXPECT_GT(val.counter_value, 0.0); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index a129bd1336..ac84a9d80e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -382,6 +382,20 @@ get_clients() return _v; } +uint64_t +get_num_clients() +{ + uint64_t val = 0; + if(get_clients()) + { + for(auto& itr : *get_clients()) + { + if(itr && itr->configure_result != nullptr) val += 1; + } + } + return val; +} + using mutex_t = std::mutex; using scoped_lock_t = std::unique_lock; @@ -605,7 +619,7 @@ initialize() init_logging(); invoke_client_configures(); invoke_client_initializers(); - internal_threading::initialize(); + if(get_num_clients() > 0) internal_threading::initialize(); // initialization is no longer available set_init_status(1); }); @@ -637,6 +651,7 @@ finalize() static auto _once = std::once_flag{}; std::call_once(_once, []() { + auto num_clients = get_num_clients(); set_fini_status(-1); hsa::async_copy_fini(); counters::device_counting_service_finalize(); @@ -653,7 +668,7 @@ finalize() { invoke_client_finalizers(); } - internal_threading::finalize(); + if(num_clients > 0) internal_threading::finalize(); set_fini_status(1); }); diff --git a/projects/rocprofiler-sdk/tests/c-tool/CMakeLists.txt b/projects/rocprofiler-sdk/tests/c-tool/CMakeLists.txt index ee9689bb98..46a29211eb 100644 --- a/projects/rocprofiler-sdk/tests/c-tool/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/c-tool/CMakeLists.txt @@ -36,7 +36,8 @@ set_tests_properties( PASS_REGULAR_EXPRESSION "Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)" FAIL_REGULAR_EXPRESSION - "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + "${ROCPROFILER_DEFAULT_FAIL_REGEX}|Internal thread for rocprofiler-sdk should not be created" + ) # this test uses ROCP_TOOL_LIBRARIES instead of LD_PRELOAD add_test(NAME test-c-tool-rocp-tool-lib-execute COMMAND $ 1) @@ -59,4 +60,5 @@ set_tests_properties( PASS_REGULAR_EXPRESSION "Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)" FAIL_REGULAR_EXPRESSION - "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + "${ROCPROFILER_DEFAULT_FAIL_REGEX}|Internal thread for rocprofiler-sdk should not be created" + ) diff --git a/projects/rocprofiler-sdk/tests/tools/c-tool.c b/projects/rocprofiler-sdk/tests/tools/c-tool.c index d4d87c0f9c..d54a32288d 100644 --- a/projects/rocprofiler-sdk/tests/tools/c-tool.c +++ b/projects/rocprofiler-sdk/tests/tools/c-tool.c @@ -34,6 +34,29 @@ #include #include +#include +#include + +static void +thread_precreate(rocprofiler_runtime_library_t lib, void* tool_data) +{ + uint32_t* priority = (uint32_t*) tool_data; + + if(priority && *priority == 0) + { + fprintf( + stderr, + "Internal thread for rocprofiler-sdk should not be created when all tools return NULL " + "from rocprofiler_configure\n"); + fflush(stderr); + abort(); + } + + (void) lib; +} + +static uint32_t tool_priority = 0; + rocprofiler_tool_configure_result_t* rocprofiler_configure(uint32_t version, const char* runtime_version, @@ -43,6 +66,8 @@ rocprofiler_configure(uint32_t version, // set the client name id->name = "Test C tool"; + tool_priority = priority; + // compute major/minor/patch version info uint32_t major = version / 10000; uint32_t minor = (version % 10000) / 100; @@ -57,6 +82,9 @@ rocprofiler_configure(uint32_t version, patch, runtime_version); + rocprofiler_at_internal_thread_create( + thread_precreate, NULL, ROCPROFILER_LIBRARY, &tool_priority); + // return pointer to configure data return NULL; }