SDK: No bg thread if no clients use SDK (#123)
* SDK: No bg thread if no clients use SDK * Update CHANGELOG --------- Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
90ae424c57
Коммит
0fbe6cc7b6
@@ -157,6 +157,8 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
|
||||
### Changed
|
||||
|
||||
- SDK no longer creates a background thread when every tool returns a nullptr from `rocprofiler_configure`.
|
||||
|
||||
### Resolved issues
|
||||
|
||||
### Removed
|
||||
|
||||
@@ -77,11 +77,11 @@ findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set<std::st
|
||||
std::vector<counters::Metric> ret;
|
||||
const auto* all_counters = counters::getMetricMap();
|
||||
|
||||
ROCP_WARNING << "Looking up counters for " << std::string(agent.name());
|
||||
ROCP_INFO << "Looking up counters for " << std::string(agent.name());
|
||||
const auto* gfx_metrics = common::get_val(*all_counters, std::string(agent.name()));
|
||||
if(!gfx_metrics)
|
||||
{
|
||||
ROCP_WARNING << "No counters found for " << std::string(agent.name());
|
||||
ROCP_INFO << "No counters found for " << std::string(agent.name());
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -145,7 +145,7 @@ check_output_created(rocprofiler_context_id_t,
|
||||
break;
|
||||
}
|
||||
found_value = record->user_data.value;
|
||||
// ROCP_WARNING << fmt::format("Found counter value: {}", record->counter_value);
|
||||
// ROCP_INFO << fmt::format("Found counter value: {}", record->counter_value);
|
||||
global_recs().wlock([&](auto& data) { data.push_back(*record); });
|
||||
}
|
||||
}
|
||||
@@ -202,7 +202,7 @@ gen_kernel_pkt(uint64_t obj)
|
||||
packet.kernel_dispatch.kernel_object = obj;
|
||||
packet.kernel_dispatch.kernarg_address = nullptr;
|
||||
packet.kernel_dispatch.completion_signal.handle = 0;
|
||||
ROCP_WARNING << fmt::format("{:x}", packet.kernel_dispatch.kernel_object);
|
||||
ROCP_INFO << fmt::format("{:x}", packet.kernel_dispatch.kernel_object);
|
||||
return packet;
|
||||
}
|
||||
|
||||
@@ -334,7 +334,7 @@ protected:
|
||||
std::vector<rocprofiler_record_counter_t> output_records(10000);
|
||||
// global_recs().clear();
|
||||
track_metric++;
|
||||
ROCP_WARNING << "Testing metric " << metric.name();
|
||||
ROCP_INFO << "Testing metric " << metric.name();
|
||||
rocprofiler_context_id_t ctx = {.handle = 0};
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed");
|
||||
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
|
||||
@@ -387,8 +387,8 @@ protected:
|
||||
auto status = rocprofiler_start_context(ctx);
|
||||
if(status == ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS)
|
||||
{
|
||||
ROCP_WARNING << fmt::format("No hardware counters for {}, skipping",
|
||||
metric.name());
|
||||
ROCP_INFO << fmt::format("No hardware counters for {}, skipping",
|
||||
metric.name());
|
||||
continue;
|
||||
}
|
||||
else if(status != ROCPROFILER_STATUS_SUCCESS)
|
||||
@@ -516,8 +516,8 @@ protected:
|
||||
test_kernels kernel_loader(gpu_agent);
|
||||
auto kernel_handle = kernel_loader.load_kernel(gpu_agent, "null_kernel");
|
||||
|
||||
ROCP_WARNING << fmt::format("Running test on agent {:x}",
|
||||
gpu_agent.get_hsa_agent().handle);
|
||||
ROCP_INFO << fmt::format("Running test on agent {:x}",
|
||||
gpu_agent.get_hsa_agent().handle);
|
||||
|
||||
const auto* agent_map = rocprofiler::common::get_val(counters::get_ast_map(),
|
||||
std::string(gpu_agent.name()));
|
||||
@@ -596,19 +596,19 @@ protected:
|
||||
UINT32_MAX,
|
||||
HSA_WAIT_STATE_ACTIVE);
|
||||
|
||||
ROCP_WARNING << "Processing Next...";
|
||||
ROCP_INFO << "Processing Next...";
|
||||
auto decoded_pkt = counters::EvaluateAST::read_pkt(&pkt_constructor, *inst_pkts);
|
||||
CHECK(!decoded_pkt.empty());
|
||||
ROCP_WARNING << "Decoded Packet:";
|
||||
ROCP_INFO << "Decoded Packet:";
|
||||
for(const auto& [id, data_vec] : decoded_pkt)
|
||||
{
|
||||
ROCP_WARNING << fmt::format("\t[{} = {}]", id, fmt::join(data_vec, ","));
|
||||
ROCP_INFO << fmt::format("\t[{} = {}]", id, fmt::join(data_vec, ","));
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>> cache;
|
||||
auto* ret = counter_ast.evaluate(decoded_pkt, cache);
|
||||
CHECK(!ret->empty());
|
||||
ROCP_WARNING << fmt::format(
|
||||
ROCP_INFO << fmt::format(
|
||||
"Final Decoded Counter Values: {} (iter={})", fmt::join(*ret, ","), i);
|
||||
|
||||
CHECK_EQ(ret->size(), expected_values.size());
|
||||
@@ -648,7 +648,7 @@ TEST_F(device_counting_service_test, sync_grbm_verify)
|
||||
{
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GRBM_COUNT"}, 50000);
|
||||
auto local_recs = global_recs().rlock([](const auto& data) { return data; });
|
||||
ROCP_WARNING << local_recs.size();
|
||||
ROCP_INFO << local_recs.size();
|
||||
|
||||
for(const auto& val : local_recs)
|
||||
{
|
||||
@@ -656,7 +656,7 @@ TEST_F(device_counting_service_test, sync_grbm_verify)
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
@@ -665,7 +665,7 @@ TEST_F(device_counting_service_test, sync_gpu_util_verify)
|
||||
{
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GPU_UTIL"}, 50000);
|
||||
auto local_recs = global_recs().rlock([](const auto& data) { return data; });
|
||||
ROCP_WARNING << local_recs.size();
|
||||
ROCP_INFO << local_recs.size();
|
||||
|
||||
for(const auto& val : local_recs)
|
||||
{
|
||||
@@ -673,7 +673,7 @@ TEST_F(device_counting_service_test, sync_gpu_util_verify)
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
@@ -682,7 +682,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify)
|
||||
{
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"SQ_WAVES_sum"}, 50000);
|
||||
auto local_recs = global_recs().rlock([](const auto& data) { return data; });
|
||||
ROCP_WARNING << local_recs.size();
|
||||
ROCP_INFO << local_recs.size();
|
||||
|
||||
for(const auto& val : local_recs)
|
||||
{
|
||||
@@ -690,7 +690,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify)
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
@@ -701,14 +701,14 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify_non_intercept)
|
||||
// deamon.
|
||||
if(!counters::counter_collection_has_device_lock())
|
||||
{
|
||||
ROCP_WARNING << "Unsupported kernel driver version, skipping test";
|
||||
ROCP_INFO << "Unsupported kernel driver version, skipping test";
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
ROCP_WARNING << "Running non-intercept test";
|
||||
test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"SQ_WAVES_sum"}, 50000, true);
|
||||
auto local_recs = global_recs().rlock([](const auto& data) { return data; });
|
||||
ROCP_WARNING << local_recs.size();
|
||||
ROCP_INFO << local_recs.size();
|
||||
|
||||
for(const auto& val : local_recs)
|
||||
{
|
||||
@@ -716,7 +716,7 @@ TEST_F(device_counting_service_test, sync_sq_waves_verify_non_intercept)
|
||||
rocprofiler_query_record_counter_id(val.id, &id);
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info);
|
||||
ROCP_WARNING << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
ROCP_INFO << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value);
|
||||
EXPECT_GT(val.counter_value, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -382,6 +382,20 @@ get_clients()
|
||||
return _v;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_num_clients()
|
||||
{
|
||||
uint64_t val = 0;
|
||||
if(get_clients())
|
||||
{
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(itr && itr->configure_result != nullptr) val += 1;
|
||||
}
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
using mutex_t = std::mutex;
|
||||
using scoped_lock_t = std::unique_lock<mutex_t>;
|
||||
|
||||
@@ -605,7 +619,7 @@ initialize()
|
||||
init_logging();
|
||||
invoke_client_configures();
|
||||
invoke_client_initializers();
|
||||
internal_threading::initialize();
|
||||
if(get_num_clients() > 0) internal_threading::initialize();
|
||||
// initialization is no longer available
|
||||
set_init_status(1);
|
||||
});
|
||||
@@ -637,6 +651,7 @@ finalize()
|
||||
|
||||
static auto _once = std::once_flag{};
|
||||
std::call_once(_once, []() {
|
||||
auto num_clients = get_num_clients();
|
||||
set_fini_status(-1);
|
||||
hsa::async_copy_fini();
|
||||
counters::device_counting_service_finalize();
|
||||
@@ -653,7 +668,7 @@ finalize()
|
||||
{
|
||||
invoke_client_finalizers();
|
||||
}
|
||||
internal_threading::finalize();
|
||||
if(num_clients > 0) internal_threading::finalize();
|
||||
set_fini_status(1);
|
||||
});
|
||||
|
||||
|
||||
@@ -36,7 +36,8 @@ set_tests_properties(
|
||||
PASS_REGULAR_EXPRESSION
|
||||
"Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}|Internal thread for rocprofiler-sdk should not be created"
|
||||
)
|
||||
|
||||
# this test uses ROCP_TOOL_LIBRARIES instead of LD_PRELOAD
|
||||
add_test(NAME test-c-tool-rocp-tool-lib-execute COMMAND $<TARGET_FILE:transpose> 1)
|
||||
@@ -59,4 +60,5 @@ set_tests_properties(
|
||||
PASS_REGULAR_EXPRESSION
|
||||
"Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}|Internal thread for rocprofiler-sdk should not be created"
|
||||
)
|
||||
|
||||
@@ -34,6 +34,29 @@
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void
|
||||
thread_precreate(rocprofiler_runtime_library_t lib, void* tool_data)
|
||||
{
|
||||
uint32_t* priority = (uint32_t*) tool_data;
|
||||
|
||||
if(priority && *priority == 0)
|
||||
{
|
||||
fprintf(
|
||||
stderr,
|
||||
"Internal thread for rocprofiler-sdk should not be created when all tools return NULL "
|
||||
"from rocprofiler_configure\n");
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
(void) lib;
|
||||
}
|
||||
|
||||
static uint32_t tool_priority = 0;
|
||||
|
||||
rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
@@ -43,6 +66,8 @@ rocprofiler_configure(uint32_t version,
|
||||
// set the client name
|
||||
id->name = "Test C tool";
|
||||
|
||||
tool_priority = priority;
|
||||
|
||||
// compute major/minor/patch version info
|
||||
uint32_t major = version / 10000;
|
||||
uint32_t minor = (version % 10000) / 100;
|
||||
@@ -57,6 +82,9 @@ rocprofiler_configure(uint32_t version,
|
||||
patch,
|
||||
runtime_version);
|
||||
|
||||
rocprofiler_at_internal_thread_create(
|
||||
thread_precreate, NULL, ROCPROFILER_LIBRARY, &tool_priority);
|
||||
|
||||
// return pointer to configure data
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user