Counter API and Samples Updates (#410)

* Update include/rocprofiler-sdk/{counters,profile_config}.h

- use rocprofiler_agent_id_t instead of rocprofiler_agent_t

* Update samples

- use rocprofiler-sdk::rocprofiler-sdk instead of rocprofiler::rocprofiler in cmake
- api_callback_tracing sample roctxProfiler{Pause,Resume}
- api_callback_tracing sample uses ROCTx
- updates to use rocprofiler_agent_id_t

* Update run-ci.py

- exclude rocprofiler-sdk-tool from samples (no sample uses that code)

* Update lib/rocprofiler-sdk-tool/tool.cpp

- Update rocprofiler_iterate_agent_supported_counters to use agent ID

* Update lib/rocprofiler-sdk/counters/core.*

- profile_config has pointer to agent instead of copy

* Update lib/rocprofiler-sdk/agent.*

- provide get_agent(...) func via rocp agent id

* Update lib/rocprofiler-sdk/{buffer,callback}_tracing.cpp

- return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED for enums missing implementation

* Update lib/rocprofiler-sdk/counters.cpp

- update to use rocprofiler_agent_id_t instead of rocprofiler_agent_t

* Update lib/rocprofiler-sdk/profile_config.cpp

- update to use rocprofiler_agent_id_t instead of rocprofiler_agent_t

* Update source/docs

- requirements.txt + install reqs in cmake

* Bump version to 0.1.0

* Update samples/api_callback_tracing/CMakeLists.txt

- LD_LIBRARY_PATH for test

* Update test/rocprofv3/tracing/CMakeLists.txt

- reorder validation files so memory copy comes first

* Update lib/rocprofiler-sdk-tool/tool.cpp

- logging for flushing buffers
- variables for buffer_size and buffer_watermark
  - increase the watermark to a full buffer
- use dedicated threads for each buffer

* Update lib/rocprofiler-sdk-tool/CMakeLists.txt

- test sets ROCPROF_LOG_LEVEL and ROCPROFILER_LOG_LEVEL to info

* Remove lib/rocprofiler-sdk-tool/trace_buffer.hpp

* Update lib/rocprofiler-sdk-tool/CMakeLists.txt

- drop log level to warning when leak sanitizer is enabled (produces small memory leak)
Bu işleme şunda yer alıyor:
Jonathan R. Madsen
2024-01-25 23:47:40 -06:00
işlemeyi yapan: GitHub
ebeveyn c641749fe6
işleme 9a8b6f6b7b
27 değiştirilmiş dosya ile 341 ekleme ve 470 silme
+1 -1
Dosyayı Görüntüle
@@ -1 +1 @@
0.0.0
0.1.0
+1 -1
Dosyayı Görüntüle
@@ -31,7 +31,7 @@ add_library(buffered-api-tracing-client SHARED)
target_sources(buffered-api-tracing-client PRIVATE client.cpp client.hpp)
target_link_libraries(
buffered-api-tracing-client
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
rocprofiler::samples-common-library)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
+14 -12
Dosyayı Görüntüle
@@ -31,28 +31,30 @@ add_library(callback-api-tracing-client SHARED)
target_sources(callback-api-tracing-client PRIVATE client.cpp client.hpp)
target_link_libraries(
callback-api-tracing-client
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
rocprofiler::samples-common-library)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
find_package(Threads REQUIRED)
find_package(rocprofiler-sdk-roctx REQUIRED)
add_executable(callback-api-tracing)
target_sources(callback-api-tracing PRIVATE main.cpp)
target_link_libraries(
callback-api-tracing PRIVATE callback-api-tracing-client Threads::Threads
rocprofiler::samples-build-flags)
callback-api-tracing
PRIVATE callback-api-tracing-client Threads::Threads
rocprofiler-sdk-roctx::rocprofiler-sdk-roctx rocprofiler::samples-build-flags)
add_test(NAME callback-api-tracing COMMAND $<TARGET_FILE:callback-api-tracing>)
set(callback-api-tracing-env
${ROCPROFILER_MEMCHECK_PRELOAD_ENV}
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>"
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>:$ENV{LD_LIBRARY_PATH}"
)
set_tests_properties(
callback-api-tracing
PROPERTIES
TIMEOUT
45
LABELS
"samples"
ENVIRONMENT
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV};HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>"
FAIL_REGULAR_EXPRESSION
"threw an exception")
PROPERTIES TIMEOUT 45 LABELS "samples" ENVIRONMENT "${callback-api-tracing-env}"
FAIL_REGULAR_EXPRESSION "threw an exception")
+96 -4
Dosyayı Görüntüle
@@ -33,6 +33,9 @@
#include "client.hpp"
#include <rocprofiler-sdk/context.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/marker/api_id.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
@@ -54,6 +57,7 @@
#include <ratio>
#include <string>
#include <string_view>
#include <unordered_set>
#include <vector>
namespace client
{
@@ -129,6 +133,11 @@ print_call_stack(const call_stack_t& _call_stack)
callback_name_info
get_callback_id_names()
{
static auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
ROCPROFILER_CALLBACK_TRACING_HSA_API,
ROCPROFILER_CALLBACK_TRACING_HIP_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_API};
auto cb_name_info = callback_name_info{};
//
// callback for each kind operation
@@ -137,8 +146,7 @@ get_callback_id_names()
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
auto* name_info_v = static_cast<callback_name_info*>(data_v);
if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API ||
kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API)
if(supported.count(kindv) > 0)
{
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
@@ -160,8 +168,7 @@ get_callback_id_names()
"query callback tracing kind operation name");
if(name) name_info_v->kind_names[kind] = name;
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API ||
kind == ROCPROFILER_CALLBACK_TRACING_HIP_API)
if(supported.count(kind) > 0)
{
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
@@ -177,6 +184,27 @@ get_callback_id_names()
return cb_name_info;
}
void
tool_tracing_ctrl_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t*,
void* client_data)
{
auto* ctx = static_cast<rocprofiler_context_id_t*>(client_data);
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause)
{
ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing client context");
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API &&
record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
{
ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming client context");
}
}
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
@@ -226,6 +254,58 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
_mutex.unlock();
}
std::vector<uint32_t>
tool_control_init(rocprofiler_context_id_t& primary_ctx)
{
struct RoctxOperations
{
std::vector<uint32_t> core = {};
std::vector<uint32_t> cntrl = {};
};
auto roctx_ops = RoctxOperations();
// get all the operations for ROCPROFILER_CALLBACK_TRACING_MARKER_API and
// separate them into two arrays; one which contains the pause/resume operations
// and one with everything else
ROCPROFILER_CALL(
rocprofiler_iterate_callback_tracing_kind_operations(
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
[](rocprofiler_callback_tracing_kind_t, uint32_t operation_v, void* data_v) {
auto* roctx_ops_v = static_cast<RoctxOperations*>(data_v);
if(operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerPause ||
operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerResume)
roctx_ops_v->cntrl.emplace_back(operation_v);
else
roctx_ops_v->core.emplace_back(operation_v);
return 0;
},
&roctx_ops),
"iterating callback tracing kind operations");
// Create a specialized (throw-away) context for handling ROCTx profiler pause and resume.
// A separate context is used because if the context that is associated with roctxProfilerPause
// disabled that same context, a call to roctxProfilerResume would be ignored because the
// context that enables the callback for that API call is disabled.
auto cntrl_ctx = rocprofiler_context_id_t{};
ROCPROFILER_CALL(rocprofiler_create_context(&cntrl_ctx), "control context creation failed");
// enable callback marker tracing with only the pause/resume operations
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(cntrl_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
roctx_ops.cntrl.data(),
roctx_ops.cntrl.size(),
tool_tracing_ctrl_callback,
&primary_ctx),
"callback tracing service failed to configure");
// start the context so that it is always active
ROCPROFILER_CALL(rocprofiler_start_context(cntrl_ctx), "start of control context");
return roctx_ops.core;
}
int
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
@@ -263,6 +343,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation failed");
// enable the control
auto roctx_ops = tool_control_init(client_ctx);
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(client_ctx,
ROCPROFILER_CALLBACK_TRACING_HSA_API,
@@ -281,6 +364,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
tool_data),
"callback tracing service failed to configure");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(client_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_API,
roctx_ops.data(),
roctx_ops.size(),
tool_tracing_callback,
tool_data),
"callback tracing service failed to configure");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx),
"failure checking context validity");
+31 -5
Dosyayı Görüntüle
@@ -22,7 +22,8 @@
#include "client.hpp"
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include <rocprofiler-sdk-roctx/roctx.h>
#include <chrono>
#include <cstdio>
@@ -65,7 +66,7 @@ verify(int* in, int* out, int M, int N);
} // namespace
__global__ void
transpose_a(int* in, int* out, int M, int N);
transpose_a(const int* in, int* out, int M, int N);
void
run(int rank, int tid, hipStream_t stream, int argc, char** argv);
@@ -76,6 +77,8 @@ main(int argc, char** argv)
client::setup(); // currently does nothing
// client::start(); // currently will fail
auto range_id = roctxRangeStart("main");
int rank = 0;
for(int i = 1; i < argc; ++i)
{
@@ -114,18 +117,34 @@ main(int argc, char** argv)
{
std::vector<std::thread> _threads{};
std::vector<hipStream_t> _streams(nthreads);
roctxMark("stream creation");
for(size_t i = 0; i < nthreads; ++i)
HIP_API_CALL(hipStreamCreate(&_streams.at(i)));
roctxMark("thread creation");
for(size_t i = 1; i < nthreads; ++i)
_threads.emplace_back(run, rank, i, _streams.at(i), argc, argv);
run(rank, 0, _streams.at(0), argc, argv);
roctxMark("thread sync");
for(auto& itr : _threads)
itr.join();
roctxMark("stream destroy");
for(size_t i = 0; i < nthreads; ++i)
HIP_API_CALL(hipStreamDestroy(_streams.at(i)));
}
HIP_API_CALL(hipDeviceSynchronize());
auto tid = roctx_thread_id_t{};
// get the thread id recognized by rocprofiler-sdk from roctx
roctxGetThreadId(&tid);
// pause API tracing
roctxProfilerPause(tid);
// would not expect below to show up in profiler (depends on tool)
HIP_API_CALL(hipDeviceReset());
// resume API tracing
roctxProfilerResume(tid);
roctxRangeStop(range_id);
client::stop();
client::shutdown();
@@ -134,7 +153,7 @@ main(int argc, char** argv)
}
__global__ void
transpose_a(int* in, int* out, int M, int N)
transpose_a(const int* in, int* out, int M, int N)
{
__shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim];
@@ -148,6 +167,10 @@ transpose_a(int* in, int* out, int M, int N)
void
run(int rank, int tid, hipStream_t stream, int argc, char** argv)
{
auto run_name = std::stringstream{};
run_name << __FUNCTION__ << "(" << rank << ", " << tid << ")";
roctxRangePush(run_name.str().c_str());
unsigned int M = 4960 * 2;
unsigned int N = 4960 * 2;
if(argc > 2) nitr = atoll(argv[2]);
@@ -157,8 +180,9 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
std::cout << "[" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl;
_lk.unlock();
std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)};
std::uniform_int_distribution<int> _dist{0, 1000};
auto _seed = std::random_device{}() * (rank + 1) * (tid + 1);
auto _engine = std::default_random_engine{_seed};
auto _dist = std::uniform_int_distribution<int>{0, 1000};
size_t size = sizeof(int) * M * N;
int* inp_matrix = new int[size];
@@ -210,6 +234,8 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
delete[] inp_matrix;
delete[] out_matrix;
roctxRangePop();
}
namespace
+1 -1
Dosyayı Görüntüle
@@ -31,7 +31,7 @@ add_library(code-object-tracing-client SHARED)
target_sources(code-object-tracing-client PRIVATE client.cpp)
target_link_libraries(
code-object-tracing-client
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
rocprofiler::samples-common-library)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
+2 -2
Dosyayı Görüntüle
@@ -32,7 +32,7 @@ target_sources(counter-collection-buffer-client PRIVATE client.cpp client.hpp)
target_link_libraries(
counter-collection-buffer-client
PUBLIC rocprofiler::samples-build-flags
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library)
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
add_executable(counter-collection-buffer)
@@ -60,7 +60,7 @@ target_sources(counter-collection-functional-counter-client
target_link_libraries(
counter-collection-functional-counter-client
PUBLIC rocprofiler::samples-build-flags
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library)
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library)
add_executable(counter-collection-print-functional-counters)
target_sources(counter-collection-print-functional-counters PRIVATE main.cpp)
+17 -15
Dosyayı Görüntüle
@@ -157,20 +157,22 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
std::vector<rocprofiler_counter_id_t> gpu_counters;
// Iterate through the agents and get the counters available on that agent
ROCPROFILER_CALL(
rocprofiler_iterate_agent_supported_counters(
*agent,
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&gpu_counters)),
"Could not fetch supported counters");
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent->id,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&gpu_counters)),
"Could not fetch supported counters");
std::vector<rocprofiler_counter_id_t> collect_counters;
// Look for the counters contained in counters_to_collect in gpu_counters
@@ -190,7 +192,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
// Create a colleciton profile for the counters
rocprofiler_profile_config_id_t profile;
ROCPROFILER_CALL(rocprofiler_create_profile_config(
*agent, collect_counters.data(), collect_counters.size(), &profile),
agent->id, collect_counters.data(), collect_counters.size(), &profile),
"Could not construct profile cfg");
profile_cache.emplace(agent->id.handle, profile);
+18 -16
Dosyayı Görüntüle
@@ -127,25 +127,27 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
if(cap.expected.empty())
{
std::vector<rocprofiler_counter_id_t> counters_needed;
ROCPROFILER_CALL(
rocprofiler_iterate_agent_supported_counters(
*agent,
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&counters_needed)),
"Could not fetch supported counters");
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent->id,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&counters_needed)),
"Could not fetch supported counters");
for(auto& found_counter : counters_needed)
{
size_t expected = 0;
rocprofiler_query_counter_instance_count(*agent, found_counter, &expected);
rocprofiler_query_counter_instance_count(agent->id, found_counter, &expected);
cap.remaining.push_back(found_counter);
cap.expected.emplace(found_counter.handle, expected);
const char* name;
@@ -165,7 +167,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/,
// Select the next counter to collect.
ROCPROFILER_CALL(
rocprofiler_create_profile_config(*agent, &(cap.remaining.back()), 1, &profile),
rocprofiler_create_profile_config(agent->id, &(cap.remaining.back()), 1, &profile),
"Could not construct profile cfg");
cap.remaining.pop_back();
+1 -1
Dosyayı Görüntüle
@@ -31,7 +31,7 @@ add_library(intercept-table-client SHARED)
target_sources(intercept-table-client PRIVATE client.cpp client.hpp)
target_link_libraries(
intercept-table-client
PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
rocprofiler::samples-common-library)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
+4 -3
Dosyayı Görüntüle
@@ -282,9 +282,10 @@ rocprofiler_configure(uint32_t version,
client_tool_data->emplace_back(
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
rocprofiler_at_runtime_api_registration(client::api_registration_callback,
ROCPROFILER_HSA_LIBRARY,
static_cast<void*>(client_tool_data));
ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(client::api_registration_callback,
ROCPROFILER_HSA_LIBRARY,
static_cast<void*>(client_tool_data)),
"runtime api registration");
// create configure data
static auto cfg =
+12 -10
Dosyayı Görüntüle
@@ -40,25 +40,27 @@ function(DOCS_EXECUTE_PROCESS)
endif()
endfunction()
if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda)
set(CONDA_ROOT ${PROJECT_BINARY_DIR}/external/miniconda)
if(NOT EXISTS ${CONDA_ROOT})
docs_execute_process(${SHELL_CMD} ${PROJECT_BINARY_DIR}/external/miniconda.sh -b -p
${PROJECT_BINARY_DIR}/external/miniconda)
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda config --set
always_yes yes)
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda update -c
defaults -n base conda)
${CONDA_ROOT})
docs_execute_process(${CONDA_ROOT}/bin/conda config --set always_yes yes)
docs_execute_process(${CONDA_ROOT}/bin/conda update -c defaults -n base conda)
endif()
if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda/envs/rocprofiler-docs)
docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda env create -n
rocprofiler-docs -f ${CMAKE_CURRENT_LIST_DIR}/environment.yml)
if(NOT EXISTS ${CONDA_ROOT}/envs/rocprofiler-docs)
docs_execute_process(${CONDA_ROOT}/bin/conda env create -n rocprofiler-docs -f
${CMAKE_CURRENT_LIST_DIR}/environment.yml)
docs_execute_process(${CONDA_ROOT}/envs/rocprofiler-docs/bin/python -m pip install -r
${CMAKE_CURRENT_LIST_DIR}/requirements.txt)
endif()
file(
WRITE "${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh"
"#!${SHELL_CMD} -e
PATH=${PROJECT_BINARY_DIR}/external/miniconda/bin:\${PATH}
PATH=${CONDA_ROOT}/bin:\${PATH}
export PATH
source activate
+1
Dosyayı Görüntüle
@@ -0,0 +1 @@
doxysphinx
+6 -4
Dosyayı Görüntüle
@@ -103,7 +103,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
* instance counting information. The reason for this restriction is that HSA
* is not yet loaded on tool_init.
*
* @param [in] agent rocprofiler agent
* @param [in] agent_id rocprofiler agent identifier
* @param [in] counter_id counter id (obtained from iterate_agent_supported_counters)
* @param [out] instance_count number of instances the counter has
* @return ::rocprofiler_status_t
@@ -111,7 +111,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
* @retval ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND if counter not found
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id,
rocprofiler_counter_id_t counter_id,
size_t* instance_count) ROCPROFILER_NONNULL(3);
@@ -119,6 +119,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
* @brief Callback that gives a list of counters available on an agent. The
* counters variable is owned by rocprofiler and should not be free'd.
*
* @param [in] agent_id Agent ID of the current callback
* @param [in] counters An array of counters that are avialable on the agent
* @ref rocprofiler_iterate_agent_supported_counters was called on.
* @param [in] num_counters Number of counters contained in counters
@@ -126,6 +127,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
* @ref rocprofiler_iterate_agent_supported_counters
*/
typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
rocprofiler_agent_id_t agent_id,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data);
@@ -133,7 +135,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
/**
* @brief Query Agent Counters Availability.
*
* @param [in] agent GPU agent
* @param [in] agent_id GPU agent identifier
* @param [in] cb callback to caller to get counters
* @param [in] user_data data to pass into the callback
* @return ::rocprofiler_status_t
@@ -141,7 +143,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)(
* @retval ROCPROFILER_STATUS_ERROR if no counters found for agent
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent,
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id,
rocprofiler_available_counters_cb_t cb,
void* user_data) ROCPROFILER_NONNULL(2);
+2 -2
Dosyayı Görüntüle
@@ -42,7 +42,7 @@ ROCPROFILER_EXTERN_C_INIT
* counters for an agent can be queried using
* @ref rocprofiler_iterate_agent_supported_counters.
*
* @param [in] agent Agent identifier
* @param [in] agent_id Agent identifier
* @param [in] counters_list List of GPU counters
* @param [in] counters_count Size of counters list
* @param [out] config_id Identifier for GPU counters group
@@ -52,7 +52,7 @@ ROCPROFILER_EXTERN_C_INIT
*
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_create_profile_config(rocprofiler_agent_t agent,
rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id,
rocprofiler_counter_id_t* counters_list,
size_t counters_count,
rocprofiler_profile_config_id_t* config_id)
+65 -39
Dosyayı Görüntüle
@@ -31,10 +31,11 @@
#include "lib/common/logging.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "rocprofiler-sdk/marker/api_id.h"
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/internal_threading.h>
#include <rocprofiler-sdk/marker/api_id.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <glog/logging.h>
@@ -229,10 +230,17 @@ get_client_ctx()
void
flush()
{
LOG(INFO) << "flushing buffers...";
for(auto itr : get_buffers().as_array())
{
if(itr.handle > 0) ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
if(itr.handle > 0)
{
LOG(INFO) << "flushing buffer " << itr.handle;
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
}
}
LOG(INFO) << "Buffers flushed";
}
void
@@ -634,36 +642,38 @@ get_agent_profile(const rocprofiler_agent_t* agent)
},
[agent, &profile](agent_counter_map_t& data_v) {
auto counters_v = counter_vec_t{};
ROCPROFILER_CALL(
rocprofiler_iterate_agent_supported_counters(
*agent,
[](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) {
auto* vec = static_cast<counter_vec_t*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
const char* name = nullptr;
size_t len = 0;
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent->id,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
auto* vec = static_cast<counter_vec_t*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
const char* name = nullptr;
size_t len = 0;
ROCPROFILER_CALL(
rocprofiler_query_counter_name(counters[i], &name, &len),
"Could not query name");
ROCPROFILER_CALL(rocprofiler_query_counter_name(
counters[i], &name, &len),
"Could not query name");
if(name && len > 0)
{
if(tool::get_config().counters.count(name) > 0)
vec->emplace_back(counters[i]);
}
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&counters_v)),
"iterate agent supported counters");
if(name && len > 0)
{
if(tool::get_config().counters.count(name) > 0)
vec->emplace_back(counters[i]);
}
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&counters_v)),
"iterate agent supported counters");
if(!counters_v.empty())
{
auto profile_v = rocprofiler_profile_config_id_t{};
ROCPROFILER_CALL(rocprofiler_create_profile_config(
*agent, counters_v.data(), counters_v.size(), &profile_v),
agent->id, counters_v.data(), counters_v.size(), &profile_v),
"Could not construct profile cfg");
profile = profile_v;
}
@@ -734,6 +744,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
client_finalizer = fini_func;
constexpr uint64_t buffer_size = 4096;
constexpr uint64_t buffer_watermark = 4096;
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed");
auto code_obj_ctx = rocprofiler_context_id_t{};
@@ -794,8 +807,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().kernel_trace)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
buffer_size,
buffer_watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_tracing_callback,
tool_data,
@@ -814,8 +827,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().memory_copy_trace)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
buffer_size,
buffer_watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_tracing_callback,
nullptr,
@@ -834,8 +847,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().hsa_api_trace)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
buffer_size,
buffer_watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_tracing_callback,
tool_data,
@@ -854,8 +867,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().hip_api_trace || tool::get_config().hip_compiler_api_trace)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
buffer_size,
buffer_watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_tracing_callback,
tool_data,
@@ -888,8 +901,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().counter_collection)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
4096,
2048,
buffer_size,
buffer_watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_tracing_callback,
nullptr,
@@ -902,11 +915,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
"Could not setup buffered service");
}
ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed");
for(auto itr : get_buffers().as_array())
{
if(itr.handle > 0)
{
auto cb_thread = rocprofiler_callback_thread_t{};
std::atexit([]() {
if(client_finalizer && client_identifier) client_finalizer(*client_identifier);
});
LOG(INFO) << "creating dedicated callback thread for buffer " << itr.handle;
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread),
"creating callback thread");
LOG(INFO) << "assigning buffer " << itr.handle << " to callback thread "
<< cb_thread.handle;
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, cb_thread),
"assigning callback thread");
}
}
ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed");
return 0;
}
-318
Dosyayı Görüntüle
@@ -1,318 +0,0 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <atomic>
#include <cassert>
#include <condition_variable>
#include <functional>
#include <future>
#include <iostream>
#include <list>
#include <mutex>
#include <optional>
#include <sstream>
#include <string>
#include <thread>
class TraceBufferBase
{
public:
static void FlushAll()
{
std::lock_guard lock(mutex_);
for(auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_)
trace_buffer->Flush();
}
static void Register(TraceBufferBase* elem)
{
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while(*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_)
prev_ptr = &(*prev_ptr)->next_;
elem->next_ = *prev_ptr;
*prev_ptr = elem;
}
static void Unregister(TraceBufferBase* elem)
{
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while(*prev_ptr != nullptr && *prev_ptr != elem)
prev_ptr = &(*prev_ptr)->next_;
assert(*prev_ptr != nullptr && "elem is not in the list");
*prev_ptr = elem->next_;
}
TraceBufferBase(std::string name, int priority)
: name_(std::move(name))
, priority_(priority)
, next_(nullptr)
{}
TraceBufferBase(const TraceBufferBase&) = delete;
TraceBufferBase& operator=(const TraceBufferBase&) = delete;
virtual ~TraceBufferBase() { Unregister(this); }
virtual void Flush() = 0;
std::string name() && { return std::move(name_); }
const std::string& name() const& { return name_; }
private:
const std::string name_;
const int priority_;
TraceBufferBase* next_;
static TraceBufferBase* head_;
static std::mutex mutex_;
};
enum TraceEntryState
{
TRACE_ENTRY_INVALID = 0,
TRACE_ENTRY_INIT = 1,
TRACE_ENTRY_COMPLETE = 2
};
template <typename Entry, typename Allocator = std::allocator<Entry>>
class TraceBuffer : protected TraceBufferBase
{
public:
using callback_t = std::function<void(Entry*)>;
TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0)
: TraceBufferBase(std::move(name), priority)
, flush_callback_(std::move(flush_callback))
, size_(size)
{
assert(size_ != 0 && "cannot create an empty trace buffer");
Entry* write_buffer = allocator_.allocate(size_);
assert(write_buffer != nullptr);
buffer_list_.push_back(write_buffer);
read_index_ = 0;
write_index_ = {0, write_buffer};
AllocateFreeBuffer();
// Add this instance to the link list of all trace buffers in the process.
Register(this);
}
~TraceBuffer() override
{
// Flush the remaining records. After flushing, there should not be any records left in the
// trace buffer.
Flush();
assert(read_index_ == write_index_.load().index);
// Acquire both the writer and worker lock as we are accessing shared variables they
// protect.
std::unique_lock writer_lock(write_mutex_, std::defer_lock);
std::unique_lock worker_lock(worker_mutex_, std::defer_lock);
std::lock(writer_lock, worker_lock);
// Deallocate the buffers.
allocator_.deallocate(write_index_.load().buffer, size_);
allocator_.deallocate(free_buffer_, size_);
// Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional
// after waking up, and exits if it does not have a value.
if(worker_thread_)
{
std::thread worker_thread = std::move(worker_thread_.value());
{
// Tell the worker thread loop to exit.
worker_thread_.reset();
free_buffer_ = nullptr;
worker_cond_.notify_one();
}
// Release the worker lock to allow the worker thread to exit.
worker_lock.unlock();
worker_thread.join();
}
}
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
// monotonically increasing indices, with read_pointer % size always indexing inside the first
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
// the next invocation after changing its state to 'complete'.
void Flush() override
{
std::lock_guard lock(write_mutex_);
auto write_index = write_index_.load(std::memory_order_relaxed);
for(auto it = buffer_list_.begin(); it != buffer_list_.end();)
{
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
while(read_index_ < std::min(write_index.index, end_of_buffer))
{
Entry* entry = &(*it)[read_index_ % size_];
// The entry is not yet complete, stop flushing here.
if(entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
flush_callback_(entry);
entry->~Entry();
++read_index_;
}
// The buffer is still in use or the read pointer did not reach the end of the buffer.
if(*it == write_index.buffer || read_index_ != end_of_buffer) return;
// All entries in the current buffer are now processed. Destroy the buffer and move onto
// the next buffer in the list.
allocator_.deallocate(*it, size_);
it = buffer_list_.erase(it);
}
}
template <typename... Args>
Entry& Emplace(Args... args)
{
return *new(GetEntry()) Entry(std::forward<Args>(args)...);
}
private:
Entry* GetEntry()
{
auto current = write_index_.load(std::memory_order_relaxed);
while(true)
{
// If the pointer is at the end of the current buffer, switch to the available free
// buffer and notify the worker thread to allocate a new buffer.
if(current.index != 0 && current.index % size_ == 0)
{
std::lock_guard lock(write_mutex_);
// If the worker thread wasn't already started, start it now. This avoids starting a
// new thread when the trace buffer is created.
if(!worker_thread_)
{
std::promise<void> ready;
auto future = ready.get_future();
{
std::lock_guard worker_lock(worker_mutex_);
worker_thread_.emplace(
&TraceBuffer::WorkerThreadLoop, this, std::move(ready));
}
future.wait();
}
// Re-check the pointer overflow under the writer lock, another thread could have
// beaten us to it and already bumped the write_index_.
current = write_index_.load(std::memory_order_relaxed);
if(current.index % size_ == 0)
{
std::unique_lock worker_lock(worker_mutex_);
// Wait for the free buffer to become available.
worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; });
current.buffer = free_buffer_;
buffer_list_.push_back(current.buffer);
write_index_.store({current.index + 1, current.buffer},
std::memory_order_relaxed);
// Tell the worker thread to allocate a new free buffer.
free_buffer_ = nullptr;
worker_cond_.notify_one();
// We successfully allocated a new buffer, return the first element.
return &current.buffer[0];
}
}
if(write_index_.compare_exchange_weak(
current, {current.index + 1, current.buffer}, std::memory_order_relaxed))
return &current.buffer[current.index % size_];
}
}
void AllocateFreeBuffer()
{
assert(free_buffer_ == nullptr);
free_buffer_ = allocator_.allocate(size_);
assert(free_buffer_ != nullptr);
for(size_t i = 0; i < size_; ++i)
free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed);
}
void WorkerThreadLoop(std::promise<void> ready)
{
std::unique_lock lock(worker_mutex_);
// This worker thread is now ready to accept work.
ready.set_value();
while(true)
{
worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; });
if(!worker_thread_) break;
AllocateFreeBuffer();
worker_cond_.notify_one();
}
}
// The WriteIndex is used to store both the index and the buffer associated with that index (the
// buffer contains the trace buffer records at [index - index % size, index - index % size_t +
// size_ - 1]) in a single atomic variable.
struct WriteIndex
{
uint64_t index;
Entry* buffer;
};
const callback_t flush_callback_;
const uint64_t size_;
uint64_t read_index_; // The index of the next record to flush.
std::atomic<WriteIndex> write_index_; // The index of the next record that could be written.
Entry* free_buffer_{nullptr}; // The next available free buffer.
std::optional<std::thread> worker_thread_;
std::mutex worker_mutex_;
std::condition_variable worker_cond_;
std::mutex write_mutex_;
std::list<Entry*> buffer_list_;
Allocator allocator_;
};
#define TRACE_BUFFER_INSTANTIATE() \
TraceBufferBase* TraceBufferBase::head_ = nullptr; \
std::mutex TraceBufferBase::mutex_;
+10
Dosyayı Görüntüle
@@ -671,6 +671,16 @@ get_agents()
return pointers;
}
const rocprofiler_agent_t*
get_agent(rocprofiler_agent_id_t id)
{
for(const auto& itr : get_agents())
{
if(itr && itr->id.handle == id.handle) return itr;
}
return nullptr;
}
void
construct_agent_cache(::HsaApiTable* table)
{
+3
Dosyayı Görüntüle
@@ -39,6 +39,9 @@ namespace agent
std::vector<const rocprofiler_agent_t*>
get_agents();
const rocprofiler_agent_t*
get_agent(rocprofiler_agent_id_t id);
void
construct_agent_cache(::HsaApiTable* table);
+6
Dosyayı Görüntüle
@@ -96,6 +96,12 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
ROCPROFILER_BUFFER_TRACING_EXTERNAL_CORRELATION};
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
+4
Dosyayı Görüntüle
@@ -92,6 +92,10 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
static auto unsupported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH};
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
+21 -22
Dosyayı Görüntüle
@@ -26,6 +26,7 @@
#include <fmt/core.h>
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
@@ -43,7 +44,7 @@ extern "C" {
* @param [out] size
* @return ::rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** name, size_t* size)
{
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap());
@@ -71,11 +72,16 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char**
* @param [out] instance_count number of instances the counter has
* @return rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
rocprofiler_status_t
rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id,
rocprofiler_counter_id_t counter_id,
size_t* instance_count)
{
const rocprofiler_agent_t* agent = rocprofiler::agent::get_agent(agent_id);
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) return ROCPROFILER_STATUS_ERROR;
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap());
const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle);
if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
@@ -93,19 +99,9 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
// For derived metrics, this can be more than one counter. In that case,
// we return the maximum instance count among all underlying counters.
auto req_counters = rocprofiler::counters::get_required_hardware_counters(
rocprofiler::counters::get_ast_map(), std::string(agent.name), *metric_ptr);
rocprofiler::counters::get_ast_map(), std::string(agent->name), *metric_ptr);
if(!req_counters) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
// NOTE: to look up instance information, we require HSA be init'd. Reason
// for this is the call to get instance information is an HSA call.
const auto* maybe_agent = rocprofiler::common::get_val(
rocprofiler::hsa::get_queue_controller().get_supported_agents(), agent.id.handle);
if(!maybe_agent)
{
LOG(ERROR) << "HSA must be loaded to obtain instance information.";
return ROCPROFILER_STATUS_ERROR;
}
for(const auto& counter : *req_counters)
{
if(!counter.special().empty())
@@ -116,7 +112,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
try
{
auto dims = rocprofiler::counters::getBlockDimensions(maybe_agent->name(), counter);
auto dims = rocprofiler::counters::getBlockDimensions(agent->name, counter);
for(const auto& dim : dims)
{
*instance_count = std::max(static_cast<size_t>(dim.size()), *instance_count);
@@ -138,12 +134,15 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
* @param [out] counters_count
* @return ::rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent,
rocprofiler_status_t
rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id,
rocprofiler_available_counters_cb_t cb,
void* user_data)
{
auto metrics = rocprofiler::counters::getMetricsForAgent(agent.name);
const auto* agent = rocprofiler::agent::get_agent(agent_id);
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
auto metrics = rocprofiler::counters::getMetricsForAgent(agent->name);
std::vector<rocprofiler_counter_id_t> ids;
ids.reserve(metrics.size());
for(const auto& metric : metrics)
@@ -151,7 +150,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t
ids.push_back({.handle = metric.id()});
}
return cb(ids.data(), ids.size(), user_data);
return cb(agent_id, ids.data(), ids.size(), user_data);
}
/**
@@ -161,7 +160,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t
* @param [out] counter_id counter id associated with the record
* @return ::rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id,
rocprofiler_counter_id_t* counter_id)
{
@@ -170,7 +169,7 @@ rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id,
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t id,
rocprofiler_counter_dimension_id_t dim,
size_t* pos)
@@ -180,7 +179,7 @@ rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t i
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_query_record_dimension_info(rocprofiler_counter_id_t,
rocprofiler_counter_dimension_id_t dim,
rocprofiler_record_dimension_info_t* info)
+2 -2
Dosyayı Görüntüle
@@ -183,7 +183,7 @@ queue_cb(const std::shared_ptr<counter_callback_info>& info,
if(prof_config->reqired_hw_counters.empty())
{
auto& config = *prof_config;
auto agent_name = std::string(config.agent.name);
auto agent_name = std::string(config.agent->name);
for(const auto& metric : config.metrics)
{
auto req_counters =
@@ -273,7 +273,7 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
auto decoded_pkt = EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *pkt);
EvaluateAST::read_special_counters(
prof_config->agent, prof_config->required_special_counters, decoded_pkt);
*prof_config->agent, prof_config->required_special_counters, decoded_pkt);
prof_config->packets.wlock([&](auto& pkt_vector) {
if(pkt)
+1 -1
Dosyayı Görüntüle
@@ -45,7 +45,7 @@ namespace counters
// This profile can be shared among many rocprof contexts.
struct profile_config
{
rocprofiler_agent_t agent{};
const rocprofiler_agent_t* agent = nullptr;
std::vector<counters::Metric> metrics{};
// HW counters that must be collected to compute the above
// metrics (derived metrics are broken down into hw counters
+9 -4
Dosyayı Görüntüle
@@ -25,11 +25,13 @@
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
#include "lib/rocprofiler-sdk/counters/core.hpp"
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include "rocprofiler-sdk/fwd.h"
extern "C" {
/**
@@ -41,12 +43,15 @@ extern "C" {
* @param [out] config_id Identifier for GPU counters group
* @return ::rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_create_profile_config(rocprofiler_agent_t agent,
rocprofiler_status_t
rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id,
rocprofiler_counter_id_t* counters_list,
size_t counters_count,
rocprofiler_profile_config_id_t* config_id)
{
const auto* agent = ::rocprofiler::agent::get_agent(agent_id);
if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND;
std::shared_ptr<rocprofiler::counters::profile_config> config =
std::make_shared<rocprofiler::counters::profile_config>();
@@ -57,7 +62,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent,
const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle);
if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
if(!rocprofiler::counters::checkValidMetric(std::string(agent.name), *metric_ptr))
if(!rocprofiler::counters::checkValidMetric(std::string(agent->name), *metric_ptr))
{
return ROCPROFILER_STATUS_ERROR_METRIC_NOT_VALID_FOR_AGENT;
}
@@ -70,7 +75,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent,
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_destroy_profile_config(rocprofiler_profile_config_id_t config_id)
{
rocprofiler::counters::destroy_counter_profile(config_id.handle);
+1 -1
Dosyayı Görüntüle
@@ -100,7 +100,7 @@ def generate_custom(args, cmake_args, ctest_args):
".*/counters/parser/.*",
]
if args.coverage == "samples":
codecov_exclude += [".*/lib/common/.*"]
codecov_exclude += [".*/lib/common/.*", ".*/lib/rocprofiler-sdk-tool/.*"]
COVERAGE_EXCLUDE = ";".join(codecov_exclude)
+12 -6
Dosyayı Görüntüle
@@ -20,8 +20,16 @@ add_test(
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(tracing-env "${PRELOAD_ENV}"
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
set(LOG_LEVEL "warning") # info produces memory leak
else()
set(LOG_LEVEL "info")
endif()
set(tracing-env
"${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}"
"ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}"
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
set_tests_properties(rocprofv3-test-trace-execute PROPERTIES LABELS "integration-tests"
ENVIRONMENT "${tracing-env}")
@@ -44,9 +52,9 @@ add_test(
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv)
set(VALIDATION_FILES
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_hsa_api_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_kernel_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv)
set_tests_properties(
@@ -58,8 +66,6 @@ set_tests_properties(
DEPENDS
rocprofv3-test-trace-execute
FAIL_REGULAR_EXPRESSION
"AssertionError"
REQUIRED_FILES
"${VALIDATION_FILES}"
"AssertionError|HSA_API|HIP_API|MARKER_API|KERNEL_DISPATCH|CODE_OBJECT"
ATTACHED_FILES_ON_FAIL
"${VALIDATION_FILES}")