Counter API and Samples Updates (#410)
* Update include/rocprofiler-sdk/{counters,profile_config}.h
- use rocprofiler_agent_id_t instead of rocprofiler_agent_t
* Update samples
- use rocprofiler-sdk::rocprofiler-sdk instead of rocprofiler::rocprofiler in cmake
- api_callback_tracing sample roctxProfiler{Pause,Resume}
- api_callback_tracing sample uses ROCTx
- updates to use rocprofiler_agent_id_t
* Update run-ci.py
- exclude rocprofiler-sdk-tool from samples (no sample uses that code)
* Update lib/rocprofiler-sdk-tool/tool.cpp
- Update rocprofiler_iterate_agent_supported_counters to use agent ID
* Update lib/rocprofiler-sdk/counters/core.*
- profile_config has pointer to agent instead of copy
* Update lib/rocprofiler-sdk/agent.*
- provide get_agent(...) func via rocp agent id
* Update lib/rocprofiler-sdk/{buffer,callback}_tracing.cpp
- return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED for enums missing implementation
* Update lib/rocprofiler-sdk/counters.cpp
- update to use rocprofiler_agent_id_t instead of rocprofiler_agent_t
* Update lib/rocprofiler-sdk/profile_config.cpp
- update to use rocprofiler_agent_id_t instead of rocprofiler_agent_t
* Update source/docs
- requirements.txt + install reqs in cmake
* Bump version to 0.1.0
* Update samples/api_callback_tracing/CMakeLists.txt
- LD_LIBRARY_PATH for test
* Update test/rocprofv3/tracing/CMakeLists.txt
- reorder validation files so memory copy comes first
* Update lib/rocprofiler-sdk-tool/tool.cpp
- logging for flushing buffers
- variables for buffer_size and buffer_watermark
- increase the watermark to a full buffer
- use dedicated threads for each buffer
* Update lib/rocprofiler-sdk-tool/CMakeLists.txt
- test sets ROCPROF_LOG_LEVEL and ROCPROFILER_LOG_LEVEL to info
* Remove lib/rocprofiler-sdk-tool/trace_buffer.hpp
* Update lib/rocprofiler-sdk-tool/CMakeLists.txt
- drop log level to warning when leak sanitizer is enabled (produces small memory leak)
Bu işleme şunda yer alıyor:
işlemeyi yapan:
GitHub
ebeveyn
c641749fe6
işleme
9a8b6f6b7b
+1
-1
@@ -1 +1 @@
|
||||
0.0.0
|
||||
0.1.0
|
||||
|
||||
@@ -31,7 +31,7 @@ add_library(buffered-api-tracing-client SHARED)
|
||||
target_sources(buffered-api-tracing-client PRIVATE client.cpp client.hpp)
|
||||
target_link_libraries(
|
||||
buffered-api-tracing-client
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
|
||||
rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
|
||||
@@ -31,28 +31,30 @@ add_library(callback-api-tracing-client SHARED)
|
||||
target_sources(callback-api-tracing-client PRIVATE client.cpp client.hpp)
|
||||
target_link_libraries(
|
||||
callback-api-tracing-client
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
|
||||
rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(rocprofiler-sdk-roctx REQUIRED)
|
||||
|
||||
add_executable(callback-api-tracing)
|
||||
target_sources(callback-api-tracing PRIVATE main.cpp)
|
||||
target_link_libraries(
|
||||
callback-api-tracing PRIVATE callback-api-tracing-client Threads::Threads
|
||||
rocprofiler::samples-build-flags)
|
||||
callback-api-tracing
|
||||
PRIVATE callback-api-tracing-client Threads::Threads
|
||||
rocprofiler-sdk-roctx::rocprofiler-sdk-roctx rocprofiler::samples-build-flags)
|
||||
|
||||
add_test(NAME callback-api-tracing COMMAND $<TARGET_FILE:callback-api-tracing>)
|
||||
|
||||
set(callback-api-tracing-env
|
||||
${ROCPROFILER_MEMCHECK_PRELOAD_ENV}
|
||||
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>"
|
||||
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>:$ENV{LD_LIBRARY_PATH}"
|
||||
)
|
||||
|
||||
set_tests_properties(
|
||||
callback-api-tracing
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"samples"
|
||||
ENVIRONMENT
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV};HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"threw an exception")
|
||||
PROPERTIES TIMEOUT 45 LABELS "samples" ENVIRONMENT "${callback-api-tracing-env}"
|
||||
FAIL_REGULAR_EXPRESSION "threw an exception")
|
||||
|
||||
@@ -33,6 +33,9 @@
|
||||
|
||||
#include "client.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/context.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
@@ -54,6 +57,7 @@
|
||||
#include <ratio>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
namespace client
|
||||
{
|
||||
@@ -129,6 +133,11 @@ print_call_stack(const call_stack_t& _call_stack)
|
||||
callback_name_info
|
||||
get_callback_id_names()
|
||||
{
|
||||
static auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_API};
|
||||
|
||||
auto cb_name_info = callback_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
@@ -137,8 +146,7 @@ get_callback_id_names()
|
||||
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data_v);
|
||||
|
||||
if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API ||
|
||||
kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API)
|
||||
if(supported.count(kindv) > 0)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
@@ -160,8 +168,7 @@ get_callback_id_names()
|
||||
"query callback tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API ||
|
||||
kind == ROCPROFILER_CALLBACK_TRACING_HIP_API)
|
||||
if(supported.count(kind) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
|
||||
@@ -177,6 +184,27 @@ get_callback_id_names()
|
||||
return cb_name_info;
|
||||
}
|
||||
|
||||
void
|
||||
tool_tracing_ctrl_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t*,
|
||||
void* client_data)
|
||||
{
|
||||
auto* ctx = static_cast<rocprofiler_context_id_t*>(client_data);
|
||||
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
|
||||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
|
||||
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing client context");
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
|
||||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
|
||||
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming client context");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
@@ -226,6 +254,58 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
_mutex.unlock();
|
||||
}
|
||||
|
||||
std::vector<uint32_t>
|
||||
tool_control_init(rocprofiler_context_id_t& primary_ctx)
|
||||
{
|
||||
struct RoctxOperations
|
||||
{
|
||||
std::vector<uint32_t> core = {};
|
||||
std::vector<uint32_t> cntrl = {};
|
||||
};
|
||||
|
||||
auto roctx_ops = RoctxOperations();
|
||||
|
||||
// get all the operations for ROCPROFILER_CALLBACK_TRACING_MARKER_API and
|
||||
// separate them into two arrays; one which contains the pause/resume operations
|
||||
// and one with everything else
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
|
||||
[](rocprofiler_callback_tracing_kind_t, uint32_t operation_v, void* data_v) {
|
||||
auto* roctx_ops_v = static_cast<RoctxOperations*>(data_v);
|
||||
if(operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerPause ||
|
||||
operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
|
||||
roctx_ops_v->cntrl.emplace_back(operation_v);
|
||||
else
|
||||
roctx_ops_v->core.emplace_back(operation_v);
|
||||
return 0;
|
||||
},
|
||||
&roctx_ops),
|
||||
"iterating callback tracing kind operations");
|
||||
|
||||
// Create a specialized (throw-away) context for handling ROCTx profiler pause and resume.
|
||||
// A separate context is used because if the context that is associated with roctxProfilerPause
|
||||
// disabled that same context, a call to roctxProfilerResume would be ignored because the
|
||||
// context that enables the callback for that API call is disabled.
|
||||
auto cntrl_ctx = rocprofiler_context_id_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&cntrl_ctx), "control context creation failed");
|
||||
|
||||
// enable callback marker tracing with only the pause/resume operations
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(cntrl_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
|
||||
roctx_ops.cntrl.data(),
|
||||
roctx_ops.cntrl.size(),
|
||||
tool_tracing_ctrl_callback,
|
||||
&primary_ctx),
|
||||
"callback tracing service failed to configure");
|
||||
|
||||
// start the context so that it is always active
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(cntrl_ctx), "start of control context");
|
||||
|
||||
return roctx_ops.core;
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
{
|
||||
@@ -263,6 +343,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation failed");
|
||||
|
||||
// enable the control
|
||||
auto roctx_ops = tool_control_init(client_ctx);
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(client_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_API,
|
||||
@@ -281,6 +364,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
tool_data),
|
||||
"callback tracing service failed to configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(client_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
|
||||
roctx_ops.data(),
|
||||
roctx_ops.size(),
|
||||
tool_tracing_callback,
|
||||
tool_data),
|
||||
"callback tracing service failed to configure");
|
||||
|
||||
int valid_ctx = 0;
|
||||
ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx),
|
||||
"failure checking context validity");
|
||||
|
||||
@@ -22,7 +22,8 @@
|
||||
|
||||
#include "client.hpp"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <rocprofiler-sdk-roctx/roctx.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
@@ -65,7 +66,7 @@ verify(int* in, int* out, int M, int N);
|
||||
} // namespace
|
||||
|
||||
__global__ void
|
||||
transpose_a(int* in, int* out, int M, int N);
|
||||
transpose_a(const int* in, int* out, int M, int N);
|
||||
|
||||
void
|
||||
run(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
@@ -76,6 +77,8 @@ main(int argc, char** argv)
|
||||
client::setup(); // currently does nothing
|
||||
// client::start(); // currently will fail
|
||||
|
||||
auto range_id = roctxRangeStart("main");
|
||||
|
||||
int rank = 0;
|
||||
for(int i = 1; i < argc; ++i)
|
||||
{
|
||||
@@ -114,18 +117,34 @@ main(int argc, char** argv)
|
||||
{
|
||||
std::vector<std::thread> _threads{};
|
||||
std::vector<hipStream_t> _streams(nthreads);
|
||||
roctxMark("stream creation");
|
||||
for(size_t i = 0; i < nthreads; ++i)
|
||||
HIP_API_CALL(hipStreamCreate(&_streams.at(i)));
|
||||
roctxMark("thread creation");
|
||||
for(size_t i = 1; i < nthreads; ++i)
|
||||
_threads.emplace_back(run, rank, i, _streams.at(i), argc, argv);
|
||||
run(rank, 0, _streams.at(0), argc, argv);
|
||||
roctxMark("thread sync");
|
||||
for(auto& itr : _threads)
|
||||
itr.join();
|
||||
roctxMark("stream destroy");
|
||||
for(size_t i = 0; i < nthreads; ++i)
|
||||
HIP_API_CALL(hipStreamDestroy(_streams.at(i)));
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
|
||||
auto tid = roctx_thread_id_t{};
|
||||
// get the thread id recognized by rocprofiler-sdk from roctx
|
||||
roctxGetThreadId(&tid);
|
||||
// pause API tracing
|
||||
roctxProfilerPause(tid);
|
||||
// would not expect below to show up in profiler (depends on tool)
|
||||
HIP_API_CALL(hipDeviceReset());
|
||||
// resume API tracing
|
||||
roctxProfilerResume(tid);
|
||||
|
||||
roctxRangeStop(range_id);
|
||||
|
||||
client::stop();
|
||||
client::shutdown();
|
||||
@@ -134,7 +153,7 @@ main(int argc, char** argv)
|
||||
}
|
||||
|
||||
__global__ void
|
||||
transpose_a(int* in, int* out, int M, int N)
|
||||
transpose_a(const int* in, int* out, int M, int N)
|
||||
{
|
||||
__shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim];
|
||||
|
||||
@@ -148,6 +167,10 @@ transpose_a(int* in, int* out, int M, int N)
|
||||
void
|
||||
run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
{
|
||||
auto run_name = std::stringstream{};
|
||||
run_name << __FUNCTION__ << "(" << rank << ", " << tid << ")";
|
||||
roctxRangePush(run_name.str().c_str());
|
||||
|
||||
unsigned int M = 4960 * 2;
|
||||
unsigned int N = 4960 * 2;
|
||||
if(argc > 2) nitr = atoll(argv[2]);
|
||||
@@ -157,8 +180,9 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
std::cout << "[" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl;
|
||||
_lk.unlock();
|
||||
|
||||
std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)};
|
||||
std::uniform_int_distribution<int> _dist{0, 1000};
|
||||
auto _seed = std::random_device{}() * (rank + 1) * (tid + 1);
|
||||
auto _engine = std::default_random_engine{_seed};
|
||||
auto _dist = std::uniform_int_distribution<int>{0, 1000};
|
||||
|
||||
size_t size = sizeof(int) * M * N;
|
||||
int* inp_matrix = new int[size];
|
||||
@@ -210,6 +234,8 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
|
||||
delete[] inp_matrix;
|
||||
delete[] out_matrix;
|
||||
|
||||
roctxRangePop();
|
||||
}
|
||||
|
||||
namespace
|
||||
|
||||
@@ -31,7 +31,7 @@ add_library(code-object-tracing-client SHARED)
|
||||
target_sources(code-object-tracing-client PRIVATE client.cpp)
|
||||
target_link_libraries(
|
||||
code-object-tracing-client
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
|
||||
rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
|
||||
@@ -32,7 +32,7 @@ target_sources(counter-collection-buffer-client PRIVATE client.cpp client.hpp)
|
||||
target_link_libraries(
|
||||
counter-collection-buffer-client
|
||||
PUBLIC rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library)
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
add_executable(counter-collection-buffer)
|
||||
@@ -60,7 +60,7 @@ target_sources(counter-collection-functional-counter-client
|
||||
target_link_libraries(
|
||||
counter-collection-functional-counter-client
|
||||
PUBLIC rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library)
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
|
||||
|
||||
add_executable(counter-collection-print-functional-counters)
|
||||
target_sources(counter-collection-print-functional-counters PRIVATE main.cpp)
|
||||
|
||||
@@ -157,20 +157,22 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
|
||||
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
||||
|
||||
// Iterate through the agents and get the counters available on that agent
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_agent_supported_counters(
|
||||
*agent,
|
||||
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
vec->push_back(counters[i]);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&gpu_counters)),
|
||||
"Could not fetch supported counters");
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
agent->id,
|
||||
[](rocprofiler_agent_id_t,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data) {
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
vec->push_back(counters[i]);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&gpu_counters)),
|
||||
"Could not fetch supported counters");
|
||||
|
||||
std::vector<rocprofiler_counter_id_t> collect_counters;
|
||||
// Look for the counters contained in counters_to_collect in gpu_counters
|
||||
@@ -190,7 +192,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
|
||||
// Create a colleciton profile for the counters
|
||||
rocprofiler_profile_config_id_t profile;
|
||||
ROCPROFILER_CALL(rocprofiler_create_profile_config(
|
||||
*agent, collect_counters.data(), collect_counters.size(), &profile),
|
||||
agent->id, collect_counters.data(), collect_counters.size(), &profile),
|
||||
"Could not construct profile cfg");
|
||||
|
||||
profile_cache.emplace(agent->id.handle, profile);
|
||||
|
||||
@@ -127,25 +127,27 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
|
||||
if(cap.expected.empty())
|
||||
{
|
||||
std::vector<rocprofiler_counter_id_t> counters_needed;
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_agent_supported_counters(
|
||||
*agent,
|
||||
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
vec->push_back(counters[i]);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&counters_needed)),
|
||||
"Could not fetch supported counters");
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
agent->id,
|
||||
[](rocprofiler_agent_id_t,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data) {
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
vec->push_back(counters[i]);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&counters_needed)),
|
||||
"Could not fetch supported counters");
|
||||
|
||||
for(auto& found_counter : counters_needed)
|
||||
{
|
||||
size_t expected = 0;
|
||||
rocprofiler_query_counter_instance_count(*agent, found_counter, &expected);
|
||||
rocprofiler_query_counter_instance_count(agent->id, found_counter, &expected);
|
||||
cap.remaining.push_back(found_counter);
|
||||
cap.expected.emplace(found_counter.handle, expected);
|
||||
const char* name;
|
||||
@@ -165,7 +167,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
|
||||
|
||||
// Select the next counter to collect.
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_create_profile_config(*agent, &(cap.remaining.back()), 1, &profile),
|
||||
rocprofiler_create_profile_config(agent->id, &(cap.remaining.back()), 1, &profile),
|
||||
"Could not construct profile cfg");
|
||||
|
||||
cap.remaining.pop_back();
|
||||
|
||||
@@ -31,7 +31,7 @@ add_library(intercept-table-client SHARED)
|
||||
target_sources(intercept-table-client PRIVATE client.cpp client.hpp)
|
||||
target_link_libraries(
|
||||
intercept-table-client
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
|
||||
rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
|
||||
@@ -282,9 +282,10 @@ rocprofiler_configure(uint32_t version,
|
||||
client_tool_data->emplace_back(
|
||||
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
|
||||
rocprofiler_at_runtime_api_registration(client::api_registration_callback,
|
||||
ROCPROFILER_HSA_LIBRARY,
|
||||
static_cast<void*>(client_tool_data));
|
||||
ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(client::api_registration_callback,
|
||||
ROCPROFILER_HSA_LIBRARY,
|
||||
static_cast<void*>(client_tool_data)),
|
||||
"runtime api registration");
|
||||
|
||||
// create configure data
|
||||
static auto cfg =
|
||||
|
||||
@@ -40,25 +40,27 @@ function(DOCS_EXECUTE_PROCESS)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda)
|
||||
set(CONDA_ROOT ${PROJECT_BINARY_DIR}/external/miniconda)
|
||||
|
||||
if(NOT EXISTS ${CONDA_ROOT})
|
||||
docs_execute_process(${SHELL_CMD} ${PROJECT_BINARY_DIR}/external/miniconda.sh -b -p
|
||||
${PROJECT_BINARY_DIR}/external/miniconda)
|
||||
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda config --set
|
||||
always_yes yes)
|
||||
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda update -c
|
||||
defaults -n base conda)
|
||||
${CONDA_ROOT})
|
||||
docs_execute_process(${CONDA_ROOT}/bin/conda config --set always_yes yes)
|
||||
docs_execute_process(${CONDA_ROOT}/bin/conda update -c defaults -n base conda)
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda/envs/rocprofiler-docs)
|
||||
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda env create -n
|
||||
rocprofiler-docs -f ${CMAKE_CURRENT_LIST_DIR}/environment.yml)
|
||||
if(NOT EXISTS ${CONDA_ROOT}/envs/rocprofiler-docs)
|
||||
docs_execute_process(${CONDA_ROOT}/bin/conda env create -n rocprofiler-docs -f
|
||||
${CMAKE_CURRENT_LIST_DIR}/environment.yml)
|
||||
docs_execute_process(${CONDA_ROOT}/envs/rocprofiler-docs/bin/python -m pip install -r
|
||||
${CMAKE_CURRENT_LIST_DIR}/requirements.txt)
|
||||
endif()
|
||||
|
||||
file(
|
||||
WRITE "${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh"
|
||||
"#!${SHELL_CMD} -e
|
||||
|
||||
PATH=${PROJECT_BINARY_DIR}/external/miniconda/bin:\${PATH}
|
||||
PATH=${CONDA_ROOT}/bin:\${PATH}
|
||||
export PATH
|
||||
|
||||
source activate
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
doxysphinx
|
||||
@@ -103,7 +103,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
|
||||
* instance counting information. The reason for this restriction is that HSA
|
||||
* is not yet loaded on tool_init.
|
||||
*
|
||||
* @param [in] agent rocprofiler agent
|
||||
* @param [in] agent_id rocprofiler agent identifier
|
||||
* @param [in] counter_id counter id (obtained from iterate_agent_supported_counters)
|
||||
* @param [out] instance_count number of instances the counter has
|
||||
* @return ::rocprofiler_status_t
|
||||
@@ -111,7 +111,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
|
||||
* @retval ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND if counter not found
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_counter_id_t counter_id,
|
||||
size_t* instance_count) ROCPROFILER_NONNULL(3);
|
||||
|
||||
@@ -119,6 +119,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
* @brief Callback that gives a list of counters available on an agent. The
|
||||
* counters variable is owned by rocprofiler and should not be free'd.
|
||||
*
|
||||
* @param [in] agent_id Agent ID of the current callback
|
||||
* @param [in] counters An array of counters that are avialable on the agent
|
||||
* @ref rocprofiler_iterate_agent_supported_counters was called on.
|
||||
* @param [in] num_counters Number of counters contained in counters
|
||||
@@ -126,6 +127,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
* @ref rocprofiler_iterate_agent_supported_counters
|
||||
*/
|
||||
typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data);
|
||||
@@ -133,7 +135,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
|
||||
/**
|
||||
* @brief Query Agent Counters Availability.
|
||||
*
|
||||
* @param [in] agent GPU agent
|
||||
* @param [in] agent_id GPU agent identifier
|
||||
* @param [in] cb callback to caller to get counters
|
||||
* @param [in] user_data data to pass into the callback
|
||||
* @return ::rocprofiler_status_t
|
||||
@@ -141,7 +143,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
|
||||
* @retval ROCPROFILER_STATUS_ERROR if no counters found for agent
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent,
|
||||
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_available_counters_cb_t cb,
|
||||
void* user_data) ROCPROFILER_NONNULL(2);
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
* counters for an agent can be queried using
|
||||
* @ref rocprofiler_iterate_agent_supported_counters.
|
||||
*
|
||||
* @param [in] agent Agent identifier
|
||||
* @param [in] agent_id Agent identifier
|
||||
* @param [in] counters_list List of GPU counters
|
||||
* @param [in] counters_count Size of counters list
|
||||
* @param [out] config_id Identifier for GPU counters group
|
||||
@@ -52,7 +52,7 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
*
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_create_profile_config(rocprofiler_agent_t agent,
|
||||
rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_counter_id_t* counters_list,
|
||||
size_t counters_count,
|
||||
rocprofiler_profile_config_id_t* config_id)
|
||||
|
||||
@@ -31,10 +31,11 @@
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "rocprofiler-sdk/marker/api_id.h"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <glog/logging.h>
|
||||
@@ -229,10 +230,17 @@ get_client_ctx()
|
||||
void
|
||||
flush()
|
||||
{
|
||||
LOG(INFO) << "flushing buffers...";
|
||||
for(auto itr : get_buffers().as_array())
|
||||
{
|
||||
if(itr.handle > 0) ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
|
||||
if(itr.handle > 0)
|
||||
{
|
||||
LOG(INFO) << "flushing buffer " << itr.handle;
|
||||
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
|
||||
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "Buffers flushed";
|
||||
}
|
||||
|
||||
void
|
||||
@@ -634,36 +642,38 @@ get_agent_profile(const rocprofiler_agent_t* agent)
|
||||
},
|
||||
[agent, &profile](agent_counter_map_t& data_v) {
|
||||
auto counters_v = counter_vec_t{};
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_agent_supported_counters(
|
||||
*agent,
|
||||
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
|
||||
auto* vec = static_cast<counter_vec_t*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
size_t len = 0;
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
agent->id,
|
||||
[](rocprofiler_agent_id_t,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
void* user_data) {
|
||||
auto* vec = static_cast<counter_vec_t*>(user_data);
|
||||
for(size_t i = 0; i < num_counters; i++)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
size_t len = 0;
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_counter_name(counters[i], &name, &len),
|
||||
"Could not query name");
|
||||
ROCPROFILER_CALL(rocprofiler_query_counter_name(
|
||||
counters[i], &name, &len),
|
||||
"Could not query name");
|
||||
|
||||
if(name && len > 0)
|
||||
{
|
||||
if(tool::get_config().counters.count(name) > 0)
|
||||
vec->emplace_back(counters[i]);
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&counters_v)),
|
||||
"iterate agent supported counters");
|
||||
if(name && len > 0)
|
||||
{
|
||||
if(tool::get_config().counters.count(name) > 0)
|
||||
vec->emplace_back(counters[i]);
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&counters_v)),
|
||||
"iterate agent supported counters");
|
||||
|
||||
if(!counters_v.empty())
|
||||
{
|
||||
auto profile_v = rocprofiler_profile_config_id_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_profile_config(
|
||||
*agent, counters_v.data(), counters_v.size(), &profile_v),
|
||||
agent->id, counters_v.data(), counters_v.size(), &profile_v),
|
||||
"Could not construct profile cfg");
|
||||
profile = profile_v;
|
||||
}
|
||||
@@ -734,6 +744,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
{
|
||||
client_finalizer = fini_func;
|
||||
|
||||
constexpr uint64_t buffer_size = 4096;
|
||||
constexpr uint64_t buffer_watermark = 4096;
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed");
|
||||
|
||||
auto code_obj_ctx = rocprofiler_context_id_t{};
|
||||
@@ -794,8 +807,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
if(tool::get_config().kernel_trace)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
tool_data,
|
||||
@@ -814,8 +827,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
if(tool::get_config().memory_copy_trace)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
nullptr,
|
||||
@@ -834,8 +847,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
if(tool::get_config().hsa_api_trace)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
tool_data,
|
||||
@@ -854,8 +867,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
if(tool::get_config().hip_api_trace || tool::get_config().hip_compiler_api_trace)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
tool_data,
|
||||
@@ -888,8 +901,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
if(tool::get_config().counter_collection)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
4096,
|
||||
2048,
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
buffered_tracing_callback,
|
||||
nullptr,
|
||||
@@ -902,11 +915,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
"Could not setup buffered service");
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed");
|
||||
for(auto itr : get_buffers().as_array())
|
||||
{
|
||||
if(itr.handle > 0)
|
||||
{
|
||||
auto cb_thread = rocprofiler_callback_thread_t{};
|
||||
|
||||
std::atexit([]() {
|
||||
if(client_finalizer && client_identifier) client_finalizer(*client_identifier);
|
||||
});
|
||||
LOG(INFO) << "creating dedicated callback thread for buffer " << itr.handle;
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread),
|
||||
"creating callback thread");
|
||||
|
||||
LOG(INFO) << "assigning buffer " << itr.handle << " to callback thread "
|
||||
<< cb_thread.handle;
|
||||
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, cb_thread),
|
||||
"assigning callback thread");
|
||||
}
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,318 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
class TraceBufferBase
|
||||
{
|
||||
public:
|
||||
static void FlushAll()
|
||||
{
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
for(auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_)
|
||||
trace_buffer->Flush();
|
||||
}
|
||||
|
||||
static void Register(TraceBufferBase* elem)
|
||||
{
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
auto** prev_ptr = &head_;
|
||||
while(*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_)
|
||||
prev_ptr = &(*prev_ptr)->next_;
|
||||
|
||||
elem->next_ = *prev_ptr;
|
||||
*prev_ptr = elem;
|
||||
}
|
||||
|
||||
static void Unregister(TraceBufferBase* elem)
|
||||
{
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
auto** prev_ptr = &head_;
|
||||
while(*prev_ptr != nullptr && *prev_ptr != elem)
|
||||
prev_ptr = &(*prev_ptr)->next_;
|
||||
|
||||
assert(*prev_ptr != nullptr && "elem is not in the list");
|
||||
*prev_ptr = elem->next_;
|
||||
}
|
||||
|
||||
TraceBufferBase(std::string name, int priority)
|
||||
: name_(std::move(name))
|
||||
, priority_(priority)
|
||||
, next_(nullptr)
|
||||
{}
|
||||
|
||||
TraceBufferBase(const TraceBufferBase&) = delete;
|
||||
TraceBufferBase& operator=(const TraceBufferBase&) = delete;
|
||||
|
||||
virtual ~TraceBufferBase() { Unregister(this); }
|
||||
|
||||
virtual void Flush() = 0;
|
||||
|
||||
std::string name() && { return std::move(name_); }
|
||||
const std::string& name() const& { return name_; }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
const int priority_;
|
||||
TraceBufferBase* next_;
|
||||
|
||||
static TraceBufferBase* head_;
|
||||
static std::mutex mutex_;
|
||||
};
|
||||
|
||||
enum TraceEntryState
|
||||
{
|
||||
TRACE_ENTRY_INVALID = 0,
|
||||
TRACE_ENTRY_INIT = 1,
|
||||
TRACE_ENTRY_COMPLETE = 2
|
||||
};
|
||||
|
||||
template <typename Entry, typename Allocator = std::allocator<Entry>>
|
||||
class TraceBuffer : protected TraceBufferBase
|
||||
{
|
||||
public:
|
||||
using callback_t = std::function<void(Entry*)>;
|
||||
|
||||
TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0)
|
||||
: TraceBufferBase(std::move(name), priority)
|
||||
, flush_callback_(std::move(flush_callback))
|
||||
, size_(size)
|
||||
{
|
||||
assert(size_ != 0 && "cannot create an empty trace buffer");
|
||||
|
||||
Entry* write_buffer = allocator_.allocate(size_);
|
||||
assert(write_buffer != nullptr);
|
||||
buffer_list_.push_back(write_buffer);
|
||||
|
||||
read_index_ = 0;
|
||||
write_index_ = {0, write_buffer};
|
||||
|
||||
AllocateFreeBuffer();
|
||||
|
||||
// Add this instance to the link list of all trace buffers in the process.
|
||||
Register(this);
|
||||
}
|
||||
|
||||
~TraceBuffer() override
|
||||
{
|
||||
// Flush the remaining records. After flushing, there should not be any records left in the
|
||||
// trace buffer.
|
||||
Flush();
|
||||
assert(read_index_ == write_index_.load().index);
|
||||
|
||||
// Acquire both the writer and worker lock as we are accessing shared variables they
|
||||
// protect.
|
||||
std::unique_lock writer_lock(write_mutex_, std::defer_lock);
|
||||
std::unique_lock worker_lock(worker_mutex_, std::defer_lock);
|
||||
std::lock(writer_lock, worker_lock);
|
||||
|
||||
// Deallocate the buffers.
|
||||
allocator_.deallocate(write_index_.load().buffer, size_);
|
||||
allocator_.deallocate(free_buffer_, size_);
|
||||
|
||||
// Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional
|
||||
// after waking up, and exits if it does not have a value.
|
||||
if(worker_thread_)
|
||||
{
|
||||
std::thread worker_thread = std::move(worker_thread_.value());
|
||||
{
|
||||
// Tell the worker thread loop to exit.
|
||||
worker_thread_.reset();
|
||||
free_buffer_ = nullptr;
|
||||
worker_cond_.notify_one();
|
||||
}
|
||||
// Release the worker lock to allow the worker thread to exit.
|
||||
worker_lock.unlock();
|
||||
worker_thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
|
||||
// monotonically increasing indices, with read_pointer % size always indexing inside the first
|
||||
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
|
||||
// the next invocation after changing its state to 'complete'.
|
||||
void Flush() override
|
||||
{
|
||||
std::lock_guard lock(write_mutex_);
|
||||
auto write_index = write_index_.load(std::memory_order_relaxed);
|
||||
|
||||
for(auto it = buffer_list_.begin(); it != buffer_list_.end();)
|
||||
{
|
||||
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
|
||||
|
||||
while(read_index_ < std::min(write_index.index, end_of_buffer))
|
||||
{
|
||||
Entry* entry = &(*it)[read_index_ % size_];
|
||||
|
||||
// The entry is not yet complete, stop flushing here.
|
||||
if(entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
|
||||
|
||||
flush_callback_(entry);
|
||||
entry->~Entry();
|
||||
|
||||
++read_index_;
|
||||
}
|
||||
|
||||
// The buffer is still in use or the read pointer did not reach the end of the buffer.
|
||||
if(*it == write_index.buffer || read_index_ != end_of_buffer) return;
|
||||
|
||||
// All entries in the current buffer are now processed. Destroy the buffer and move onto
|
||||
// the next buffer in the list.
|
||||
allocator_.deallocate(*it, size_);
|
||||
it = buffer_list_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
Entry& Emplace(Args... args)
|
||||
{
|
||||
return *new(GetEntry()) Entry(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
Entry* GetEntry()
|
||||
{
|
||||
auto current = write_index_.load(std::memory_order_relaxed);
|
||||
|
||||
while(true)
|
||||
{
|
||||
// If the pointer is at the end of the current buffer, switch to the available free
|
||||
// buffer and notify the worker thread to allocate a new buffer.
|
||||
if(current.index != 0 && current.index % size_ == 0)
|
||||
{
|
||||
std::lock_guard lock(write_mutex_);
|
||||
|
||||
// If the worker thread wasn't already started, start it now. This avoids starting a
|
||||
// new thread when the trace buffer is created.
|
||||
if(!worker_thread_)
|
||||
{
|
||||
std::promise<void> ready;
|
||||
auto future = ready.get_future();
|
||||
{
|
||||
std::lock_guard worker_lock(worker_mutex_);
|
||||
worker_thread_.emplace(
|
||||
&TraceBuffer::WorkerThreadLoop, this, std::move(ready));
|
||||
}
|
||||
future.wait();
|
||||
}
|
||||
|
||||
// Re-check the pointer overflow under the writer lock, another thread could have
|
||||
// beaten us to it and already bumped the write_index_.
|
||||
current = write_index_.load(std::memory_order_relaxed);
|
||||
if(current.index % size_ == 0)
|
||||
{
|
||||
std::unique_lock worker_lock(worker_mutex_);
|
||||
|
||||
// Wait for the free buffer to become available.
|
||||
worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; });
|
||||
|
||||
current.buffer = free_buffer_;
|
||||
buffer_list_.push_back(current.buffer);
|
||||
write_index_.store({current.index + 1, current.buffer},
|
||||
std::memory_order_relaxed);
|
||||
|
||||
// Tell the worker thread to allocate a new free buffer.
|
||||
free_buffer_ = nullptr;
|
||||
worker_cond_.notify_one();
|
||||
|
||||
// We successfully allocated a new buffer, return the first element.
|
||||
return ¤t.buffer[0];
|
||||
}
|
||||
}
|
||||
|
||||
if(write_index_.compare_exchange_weak(
|
||||
current, {current.index + 1, current.buffer}, std::memory_order_relaxed))
|
||||
return ¤t.buffer[current.index % size_];
|
||||
}
|
||||
}
|
||||
|
||||
void AllocateFreeBuffer()
|
||||
{
|
||||
assert(free_buffer_ == nullptr);
|
||||
|
||||
free_buffer_ = allocator_.allocate(size_);
|
||||
assert(free_buffer_ != nullptr);
|
||||
|
||||
for(size_t i = 0; i < size_; ++i)
|
||||
free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void WorkerThreadLoop(std::promise<void> ready)
|
||||
{
|
||||
std::unique_lock lock(worker_mutex_);
|
||||
|
||||
// This worker thread is now ready to accept work.
|
||||
ready.set_value();
|
||||
|
||||
while(true)
|
||||
{
|
||||
worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; });
|
||||
if(!worker_thread_) break;
|
||||
AllocateFreeBuffer();
|
||||
worker_cond_.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
// The WriteIndex is used to store both the index and the buffer associated with that index (the
|
||||
// buffer contains the trace buffer records at [index - index % size, index - index % size_t +
|
||||
// size_ - 1]) in a single atomic variable.
|
||||
struct WriteIndex
|
||||
{
|
||||
uint64_t index;
|
||||
Entry* buffer;
|
||||
};
|
||||
|
||||
const callback_t flush_callback_;
|
||||
const uint64_t size_;
|
||||
|
||||
uint64_t read_index_; // The index of the next record to flush.
|
||||
std::atomic<WriteIndex> write_index_; // The index of the next record that could be written.
|
||||
Entry* free_buffer_{nullptr}; // The next available free buffer.
|
||||
|
||||
std::optional<std::thread> worker_thread_;
|
||||
std::mutex worker_mutex_;
|
||||
std::condition_variable worker_cond_;
|
||||
|
||||
std::mutex write_mutex_;
|
||||
std::list<Entry*> buffer_list_;
|
||||
Allocator allocator_;
|
||||
};
|
||||
|
||||
#define TRACE_BUFFER_INSTANTIATE() \
|
||||
TraceBufferBase* TraceBufferBase::head_ = nullptr; \
|
||||
std::mutex TraceBufferBase::mutex_;
|
||||
@@ -671,6 +671,16 @@ get_agents()
|
||||
return pointers;
|
||||
}
|
||||
|
||||
const rocprofiler_agent_t*
|
||||
get_agent(rocprofiler_agent_id_t id)
|
||||
{
|
||||
for(const auto& itr : get_agents())
|
||||
{
|
||||
if(itr && itr->id.handle == id.handle) return itr;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table)
|
||||
{
|
||||
|
||||
@@ -39,6 +39,9 @@ namespace agent
|
||||
std::vector<const rocprofiler_agent_t*>
|
||||
get_agents();
|
||||
|
||||
const rocprofiler_agent_t*
|
||||
get_agent(rocprofiler_agent_id_t id);
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table);
|
||||
|
||||
|
||||
@@ -96,6 +96,12 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
|
||||
if(rocprofiler::registration::get_init_status() > -1)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
|
||||
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
|
||||
ROCPROFILER_BUFFER_TRACING_EXTERNAL_CORRELATION};
|
||||
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
@@ -92,6 +92,10 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t
|
||||
if(rocprofiler::registration::get_init_status() > -1)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
static auto unsupported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
|
||||
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH};
|
||||
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
|
||||
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
|
||||
@@ -43,7 +44,7 @@ extern "C" {
|
||||
* @param [out] size
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** name, size_t* size)
|
||||
{
|
||||
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap());
|
||||
@@ -71,11 +72,16 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
|
||||
* @param [out] instance_count number of instances the counter has
|
||||
* @return rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_counter_id_t counter_id,
|
||||
size_t* instance_count)
|
||||
{
|
||||
const rocprofiler_agent_t* agent = rocprofiler::agent::get_agent(agent_id);
|
||||
|
||||
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
|
||||
if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap());
|
||||
const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle);
|
||||
if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
|
||||
@@ -93,19 +99,9 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
// For derived metrics, this can be more than one counter. In that case,
|
||||
// we return the maximum instance count among all underlying counters.
|
||||
auto req_counters = rocprofiler::counters::get_required_hardware_counters(
|
||||
rocprofiler::counters::get_ast_map(), std::string(agent.name), *metric_ptr);
|
||||
rocprofiler::counters::get_ast_map(), std::string(agent->name), *metric_ptr);
|
||||
if(!req_counters) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
|
||||
|
||||
// NOTE: to look up instance information, we require HSA be init'd. Reason
|
||||
// for this is the call to get instance information is an HSA call.
|
||||
const auto* maybe_agent = rocprofiler::common::get_val(
|
||||
rocprofiler::hsa::get_queue_controller().get_supported_agents(), agent.id.handle);
|
||||
if(!maybe_agent)
|
||||
{
|
||||
LOG(ERROR) << "HSA must be loaded to obtain instance information.";
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
for(const auto& counter : *req_counters)
|
||||
{
|
||||
if(!counter.special().empty())
|
||||
@@ -116,7 +112,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
|
||||
try
|
||||
{
|
||||
auto dims = rocprofiler::counters::getBlockDimensions(maybe_agent->name(), counter);
|
||||
auto dims = rocprofiler::counters::getBlockDimensions(agent->name, counter);
|
||||
for(const auto& dim : dims)
|
||||
{
|
||||
*instance_count = std::max(static_cast<size_t>(dim.size()), *instance_count);
|
||||
@@ -138,12 +134,15 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
* @param [out] counters_count
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent,
|
||||
rocprofiler_status_t
|
||||
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_available_counters_cb_t cb,
|
||||
void* user_data)
|
||||
{
|
||||
auto metrics = rocprofiler::counters::getMetricsForAgent(agent.name);
|
||||
const auto* agent = rocprofiler::agent::get_agent(agent_id);
|
||||
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
|
||||
|
||||
auto metrics = rocprofiler::counters::getMetricsForAgent(agent->name);
|
||||
std::vector<rocprofiler_counter_id_t> ids;
|
||||
ids.reserve(metrics.size());
|
||||
for(const auto& metric : metrics)
|
||||
@@ -151,7 +150,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t
|
||||
ids.push_back({.handle = metric.id()});
|
||||
}
|
||||
|
||||
return cb(ids.data(), ids.size(), user_data);
|
||||
return cb(agent_id, ids.data(), ids.size(), user_data);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -161,7 +160,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t
|
||||
* @param [out] counter_id counter id associated with the record
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id,
|
||||
rocprofiler_counter_id_t* counter_id)
|
||||
{
|
||||
@@ -170,7 +169,7 @@ rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id,
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t id,
|
||||
rocprofiler_counter_dimension_id_t dim,
|
||||
size_t* pos)
|
||||
@@ -180,7 +179,7 @@ rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t i
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_record_dimension_info(rocprofiler_counter_id_t,
|
||||
rocprofiler_counter_dimension_id_t dim,
|
||||
rocprofiler_record_dimension_info_t* info)
|
||||
|
||||
@@ -183,7 +183,7 @@ queue_cb(const std::shared_ptr<counter_callback_info>& info,
|
||||
if(prof_config->reqired_hw_counters.empty())
|
||||
{
|
||||
auto& config = *prof_config;
|
||||
auto agent_name = std::string(config.agent.name);
|
||||
auto agent_name = std::string(config.agent->name);
|
||||
for(const auto& metric : config.metrics)
|
||||
{
|
||||
auto req_counters =
|
||||
@@ -273,7 +273,7 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
|
||||
|
||||
auto decoded_pkt = EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *pkt);
|
||||
EvaluateAST::read_special_counters(
|
||||
prof_config->agent, prof_config->required_special_counters, decoded_pkt);
|
||||
*prof_config->agent, prof_config->required_special_counters, decoded_pkt);
|
||||
|
||||
prof_config->packets.wlock([&](auto& pkt_vector) {
|
||||
if(pkt)
|
||||
|
||||
@@ -45,7 +45,7 @@ namespace counters
|
||||
// This profile can be shared among many rocprof contexts.
|
||||
struct profile_config
|
||||
{
|
||||
rocprofiler_agent_t agent{};
|
||||
const rocprofiler_agent_t* agent = nullptr;
|
||||
std::vector<counters::Metric> metrics{};
|
||||
// HW counters that must be collected to compute the above
|
||||
// metrics (derived metrics are broken down into hw counters
|
||||
|
||||
@@ -25,11 +25,13 @@
|
||||
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/core.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
#include "rocprofiler-sdk/fwd.h"
|
||||
|
||||
extern "C" {
|
||||
/**
|
||||
@@ -41,12 +43,15 @@ extern "C" {
|
||||
* @param [out] config_id Identifier for GPU counters group
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_create_profile_config(rocprofiler_agent_t agent,
|
||||
rocprofiler_status_t
|
||||
rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id,
|
||||
rocprofiler_counter_id_t* counters_list,
|
||||
size_t counters_count,
|
||||
rocprofiler_profile_config_id_t* config_id)
|
||||
{
|
||||
const auto* agent = ::rocprofiler::agent::get_agent(agent_id);
|
||||
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
|
||||
|
||||
std::shared_ptr<rocprofiler::counters::profile_config> config =
|
||||
std::make_shared<rocprofiler::counters::profile_config>();
|
||||
|
||||
@@ -57,7 +62,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent,
|
||||
|
||||
const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle);
|
||||
if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
|
||||
if(!rocprofiler::counters::checkValidMetric(std::string(agent.name), *metric_ptr))
|
||||
if(!rocprofiler::counters::checkValidMetric(std::string(agent->name), *metric_ptr))
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_METRIC_NOT_VALID_FOR_AGENT;
|
||||
}
|
||||
@@ -70,7 +75,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent,
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_destroy_profile_config(rocprofiler_profile_config_id_t config_id)
|
||||
{
|
||||
rocprofiler::counters::destroy_counter_profile(config_id.handle);
|
||||
|
||||
@@ -100,7 +100,7 @@ def generate_custom(args, cmake_args, ctest_args):
|
||||
".*/counters/parser/.*",
|
||||
]
|
||||
if args.coverage == "samples":
|
||||
codecov_exclude += [".*/lib/common/.*"]
|
||||
codecov_exclude += [".*/lib/common/.*", ".*/lib/rocprofiler-sdk-tool/.*"]
|
||||
|
||||
COVERAGE_EXCLUDE = ";".join(codecov_exclude)
|
||||
|
||||
|
||||
@@ -20,8 +20,16 @@ add_test(
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(tracing-env "${PRELOAD_ENV}"
|
||||
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
|
||||
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
|
||||
set(LOG_LEVEL "warning") # info produces memory leak
|
||||
else()
|
||||
set(LOG_LEVEL "info")
|
||||
endif()
|
||||
|
||||
set(tracing-env
|
||||
"${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}"
|
||||
"ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}"
|
||||
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
|
||||
|
||||
set_tests_properties(rocprofv3-test-trace-execute PROPERTIES LABELS "integration-tests"
|
||||
ENVIRONMENT "${tracing-env}")
|
||||
@@ -44,9 +52,9 @@ add_test(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv)
|
||||
|
||||
set(VALIDATION_FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_hsa_api_trace.csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_kernel_trace.csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv)
|
||||
|
||||
set_tests_properties(
|
||||
@@ -58,8 +66,6 @@ set_tests_properties(
|
||||
DEPENDS
|
||||
rocprofv3-test-trace-execute
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
REQUIRED_FILES
|
||||
"${VALIDATION_FILES}"
|
||||
"AssertionError|HSA_API|HIP_API|MARKER_API|KERNEL_DISPATCH|CODE_OBJECT"
|
||||
ATTACHED_FILES_ON_FAIL
|
||||
"${VALIDATION_FILES}")
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle