diff --git a/VERSION b/VERSION index 77d6f4ca23..6e8bf73aa5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.0 +0.1.0 diff --git a/samples/api_buffered_tracing/CMakeLists.txt b/samples/api_buffered_tracing/CMakeLists.txt index bf1913a02b..e3cf49a342 100644 --- a/samples/api_buffered_tracing/CMakeLists.txt +++ b/samples/api_buffered_tracing/CMakeLists.txt @@ -31,7 +31,7 @@ add_library(buffered-api-tracing-client SHARED) target_sources(buffered-api-tracing-client PRIVATE client.cpp client.hpp) target_link_libraries( buffered-api-tracing-client - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) diff --git a/samples/api_callback_tracing/CMakeLists.txt b/samples/api_callback_tracing/CMakeLists.txt index 0e794e662a..65c652e2f7 100644 --- a/samples/api_callback_tracing/CMakeLists.txt +++ b/samples/api_callback_tracing/CMakeLists.txt @@ -31,28 +31,30 @@ add_library(callback-api-tracing-client SHARED) target_sources(callback-api-tracing-client PRIVATE client.cpp client.hpp) target_link_libraries( callback-api-tracing-client - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) + find_package(Threads REQUIRED) +find_package(rocprofiler-sdk-roctx REQUIRED) add_executable(callback-api-tracing) target_sources(callback-api-tracing PRIVATE main.cpp) target_link_libraries( - callback-api-tracing PRIVATE callback-api-tracing-client Threads::Threads - rocprofiler::samples-build-flags) + callback-api-tracing + PRIVATE callback-api-tracing-client Threads::Threads + rocprofiler-sdk-roctx::rocprofiler-sdk-roctx rocprofiler::samples-build-flags) add_test(NAME callback-api-tracing COMMAND $) +set(callback-api-tracing-env + ${ROCPROFILER_MEMCHECK_PRELOAD_ENV} + "HSA_TOOLS_LIB=$" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + set_tests_properties( callback-api-tracing - PROPERTIES - TIMEOUT - 45 - LABELS - "samples" - ENVIRONMENT - "${ROCPROFILER_MEMCHECK_PRELOAD_ENV};HSA_TOOLS_LIB=$" - FAIL_REGULAR_EXPRESSION - "threw an exception") + PROPERTIES TIMEOUT 45 LABELS "samples" ENVIRONMENT "${callback-api-tracing-env}" + FAIL_REGULAR_EXPRESSION "threw an exception") diff --git a/samples/api_callback_tracing/client.cpp b/samples/api_callback_tracing/client.cpp index 5fe963ba84..c2e70a712d 100644 --- a/samples/api_callback_tracing/client.cpp +++ b/samples/api_callback_tracing/client.cpp @@ -33,6 +33,9 @@ #include "client.hpp" +#include +#include +#include #include #include @@ -54,6 +57,7 @@ #include #include #include +#include #include namespace client { @@ -129,6 +133,11 @@ print_call_stack(const call_stack_t& _call_stack) callback_name_info get_callback_id_names() { + static auto supported = std::unordered_set{ + ROCPROFILER_CALLBACK_TRACING_HSA_API, + ROCPROFILER_CALLBACK_TRACING_HIP_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_API}; + auto cb_name_info = callback_name_info{}; // // callback for each kind operation @@ -137,8 +146,7 @@ get_callback_id_names() [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { auto* name_info_v = static_cast(data_v); - if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API || - kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API) + if(supported.count(kindv) > 0) { const char* name = nullptr; ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( @@ -160,8 +168,7 @@ get_callback_id_names() "query callback tracing kind operation name"); if(name) name_info_v->kind_names[kind] = name; - if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API || - kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + if(supported.count(kind) > 0) { ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations( kind, tracing_kind_operation_cb, static_cast(data)), @@ -177,6 +184,27 @@ get_callback_id_names() return cb_name_info; } +void +tool_tracing_ctrl_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t*, + void* client_data) +{ + auto* ctx = static_cast(client_data); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER && + record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API && + record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause) + { + ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing client context"); + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && + record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API && + record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume) + { + ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming client context"); + } +} + void tool_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, @@ -226,6 +254,58 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, _mutex.unlock(); } +std::vector +tool_control_init(rocprofiler_context_id_t& primary_ctx) +{ + struct RoctxOperations + { + std::vector core = {}; + std::vector cntrl = {}; + }; + + auto roctx_ops = RoctxOperations(); + + // get all the operations for ROCPROFILER_CALLBACK_TRACING_MARKER_API and + // separate them into two arrays; one which contains the pause/resume operations + // and one with everything else + ROCPROFILER_CALL( + rocprofiler_iterate_callback_tracing_kind_operations( + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + [](rocprofiler_callback_tracing_kind_t, uint32_t operation_v, void* data_v) { + auto* roctx_ops_v = static_cast(data_v); + if(operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerPause || + operation_v == ROCPROFILER_MARKER_API_ID_roctxProfilerResume) + roctx_ops_v->cntrl.emplace_back(operation_v); + else + roctx_ops_v->core.emplace_back(operation_v); + return 0; + }, + &roctx_ops), + "iterating callback tracing kind operations"); + + // Create a specialized (throw-away) context for handling ROCTx profiler pause and resume. + // A separate context is used because if the context that is associated with roctxProfilerPause + // disabled that same context, a call to roctxProfilerResume would be ignored because the + // context that enables the callback for that API call is disabled. + auto cntrl_ctx = rocprofiler_context_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_context(&cntrl_ctx), "control context creation failed"); + + // enable callback marker tracing with only the pause/resume operations + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(cntrl_ctx, + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + roctx_ops.cntrl.data(), + roctx_ops.cntrl.size(), + tool_tracing_ctrl_callback, + &primary_ctx), + "callback tracing service failed to configure"); + + // start the context so that it is always active + ROCPROFILER_CALL(rocprofiler_start_context(cntrl_ctx), "start of control context"); + + return roctx_ops.core; +} + int tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { @@ -263,6 +343,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation failed"); + // enable the control + auto roctx_ops = tool_control_init(client_ctx); + ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(client_ctx, ROCPROFILER_CALLBACK_TRACING_HSA_API, @@ -281,6 +364,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) tool_data), "callback tracing service failed to configure"); + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(client_ctx, + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + roctx_ops.data(), + roctx_ops.size(), + tool_tracing_callback, + tool_data), + "callback tracing service failed to configure"); + int valid_ctx = 0; ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx), "failure checking context validity"); diff --git a/samples/api_callback_tracing/main.cpp b/samples/api_callback_tracing/main.cpp index 3c15e0f6d5..99d385263c 100644 --- a/samples/api_callback_tracing/main.cpp +++ b/samples/api_callback_tracing/main.cpp @@ -22,7 +22,8 @@ #include "client.hpp" -#include "hip/hip_runtime.h" +#include +#include #include #include @@ -65,7 +66,7 @@ verify(int* in, int* out, int M, int N); } // namespace __global__ void -transpose_a(int* in, int* out, int M, int N); +transpose_a(const int* in, int* out, int M, int N); void run(int rank, int tid, hipStream_t stream, int argc, char** argv); @@ -76,6 +77,8 @@ main(int argc, char** argv) client::setup(); // currently does nothing // client::start(); // currently will fail + auto range_id = roctxRangeStart("main"); + int rank = 0; for(int i = 1; i < argc; ++i) { @@ -114,18 +117,34 @@ main(int argc, char** argv) { std::vector _threads{}; std::vector _streams(nthreads); + roctxMark("stream creation"); for(size_t i = 0; i < nthreads; ++i) HIP_API_CALL(hipStreamCreate(&_streams.at(i))); + roctxMark("thread creation"); for(size_t i = 1; i < nthreads; ++i) _threads.emplace_back(run, rank, i, _streams.at(i), argc, argv); run(rank, 0, _streams.at(0), argc, argv); + roctxMark("thread sync"); for(auto& itr : _threads) itr.join(); + roctxMark("stream destroy"); for(size_t i = 0; i < nthreads; ++i) HIP_API_CALL(hipStreamDestroy(_streams.at(i))); } + HIP_API_CALL(hipDeviceSynchronize()); + + auto tid = roctx_thread_id_t{}; + // get the thread id recognized by rocprofiler-sdk from roctx + roctxGetThreadId(&tid); + // pause API tracing + roctxProfilerPause(tid); + // would not expect below to show up in profiler (depends on tool) HIP_API_CALL(hipDeviceReset()); + // resume API tracing + roctxProfilerResume(tid); + + roctxRangeStop(range_id); client::stop(); client::shutdown(); @@ -134,7 +153,7 @@ main(int argc, char** argv) } __global__ void -transpose_a(int* in, int* out, int M, int N) +transpose_a(const int* in, int* out, int M, int N) { __shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim]; @@ -148,6 +167,10 @@ transpose_a(int* in, int* out, int M, int N) void run(int rank, int tid, hipStream_t stream, int argc, char** argv) { + auto run_name = std::stringstream{}; + run_name << __FUNCTION__ << "(" << rank << ", " << tid << ")"; + roctxRangePush(run_name.str().c_str()); + unsigned int M = 4960 * 2; unsigned int N = 4960 * 2; if(argc > 2) nitr = atoll(argv[2]); @@ -157,8 +180,9 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) std::cout << "[" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl; _lk.unlock(); - std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)}; - std::uniform_int_distribution _dist{0, 1000}; + auto _seed = std::random_device{}() * (rank + 1) * (tid + 1); + auto _engine = std::default_random_engine{_seed}; + auto _dist = std::uniform_int_distribution{0, 1000}; size_t size = sizeof(int) * M * N; int* inp_matrix = new int[size]; @@ -210,6 +234,8 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) delete[] inp_matrix; delete[] out_matrix; + + roctxRangePop(); } namespace diff --git a/samples/code_object_tracing/CMakeLists.txt b/samples/code_object_tracing/CMakeLists.txt index 4fdbf787da..5561feee93 100644 --- a/samples/code_object_tracing/CMakeLists.txt +++ b/samples/code_object_tracing/CMakeLists.txt @@ -31,7 +31,7 @@ add_library(code-object-tracing-client SHARED) target_sources(code-object-tracing-client PRIVATE client.cpp) target_link_libraries( code-object-tracing-client - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) diff --git a/samples/counter_collection/CMakeLists.txt b/samples/counter_collection/CMakeLists.txt index e447bc29e8..59a8987d71 100644 --- a/samples/counter_collection/CMakeLists.txt +++ b/samples/counter_collection/CMakeLists.txt @@ -32,7 +32,7 @@ target_sources(counter-collection-buffer-client PRIVATE client.cpp client.hpp) target_link_libraries( counter-collection-buffer-client PUBLIC rocprofiler::samples-build-flags - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library) + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) add_executable(counter-collection-buffer) @@ -60,7 +60,7 @@ target_sources(counter-collection-functional-counter-client target_link_libraries( counter-collection-functional-counter-client PUBLIC rocprofiler::samples-build-flags - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library) + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library) add_executable(counter-collection-print-functional-counters) target_sources(counter-collection-print-functional-counters PRIVATE main.cpp) diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index e1b516a4ed..df69c8ec89 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -157,20 +157,22 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, std::vector gpu_counters; // Iterate through the agents and get the counters available on that agent - ROCPROFILER_CALL( - rocprofiler_iterate_agent_supported_counters( - *agent, - [](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) { - std::vector* vec = - static_cast*>(user_data); - for(size_t i = 0; i < num_counters; i++) - { - vec->push_back(counters[i]); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - static_cast(&gpu_counters)), - "Could not fetch supported counters"); + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + agent->id, + [](rocprofiler_agent_id_t, + rocprofiler_counter_id_t* counters, + size_t num_counters, + void* user_data) { + std::vector* vec = + static_cast*>(user_data); + for(size_t i = 0; i < num_counters; i++) + { + vec->push_back(counters[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + static_cast(&gpu_counters)), + "Could not fetch supported counters"); std::vector collect_counters; // Look for the counters contained in counters_to_collect in gpu_counters @@ -190,7 +192,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, // Create a colleciton profile for the counters rocprofiler_profile_config_id_t profile; ROCPROFILER_CALL(rocprofiler_create_profile_config( - *agent, collect_counters.data(), collect_counters.size(), &profile), + agent->id, collect_counters.data(), collect_counters.size(), &profile), "Could not construct profile cfg"); profile_cache.emplace(agent->id.handle, profile); diff --git a/samples/counter_collection/print_functional_counters.cpp b/samples/counter_collection/print_functional_counters.cpp index 6ea69db229..297169977a 100644 --- a/samples/counter_collection/print_functional_counters.cpp +++ b/samples/counter_collection/print_functional_counters.cpp @@ -127,25 +127,27 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, if(cap.expected.empty()) { std::vector counters_needed; - ROCPROFILER_CALL( - rocprofiler_iterate_agent_supported_counters( - *agent, - [](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) { - std::vector* vec = - static_cast*>(user_data); - for(size_t i = 0; i < num_counters; i++) - { - vec->push_back(counters[i]); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - static_cast(&counters_needed)), - "Could not fetch supported counters"); + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + agent->id, + [](rocprofiler_agent_id_t, + rocprofiler_counter_id_t* counters, + size_t num_counters, + void* user_data) { + std::vector* vec = + static_cast*>(user_data); + for(size_t i = 0; i < num_counters; i++) + { + vec->push_back(counters[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + static_cast(&counters_needed)), + "Could not fetch supported counters"); for(auto& found_counter : counters_needed) { size_t expected = 0; - rocprofiler_query_counter_instance_count(*agent, found_counter, &expected); + rocprofiler_query_counter_instance_count(agent->id, found_counter, &expected); cap.remaining.push_back(found_counter); cap.expected.emplace(found_counter.handle, expected); const char* name; @@ -165,7 +167,7 @@ dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, // Select the next counter to collect. ROCPROFILER_CALL( - rocprofiler_create_profile_config(*agent, &(cap.remaining.back()), 1, &profile), + rocprofiler_create_profile_config(agent->id, &(cap.remaining.back()), 1, &profile), "Could not construct profile cfg"); cap.remaining.pop_back(); diff --git a/samples/intercept_table/CMakeLists.txt b/samples/intercept_table/CMakeLists.txt index 2f6636f630..e4dd185672 100644 --- a/samples/intercept_table/CMakeLists.txt +++ b/samples/intercept_table/CMakeLists.txt @@ -31,7 +31,7 @@ add_library(intercept-table-client SHARED) target_sources(intercept-table-client PRIVATE client.cpp client.hpp) target_link_libraries( intercept-table-client - PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) diff --git a/samples/intercept_table/client.cpp b/samples/intercept_table/client.cpp index f7c12d92d1..132cc4c8fc 100644 --- a/samples/intercept_table/client.cpp +++ b/samples/intercept_table/client.cpp @@ -282,9 +282,10 @@ rocprofiler_configure(uint32_t version, client_tool_data->emplace_back( client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); - rocprofiler_at_runtime_api_registration(client::api_registration_callback, - ROCPROFILER_HSA_LIBRARY, - static_cast(client_tool_data)); + ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration(client::api_registration_callback, + ROCPROFILER_HSA_LIBRARY, + static_cast(client_tool_data)), + "runtime api registration"); // create configure data static auto cfg = diff --git a/source/docs/CMakeLists.txt b/source/docs/CMakeLists.txt index 68f5571ab9..b58e130613 100644 --- a/source/docs/CMakeLists.txt +++ b/source/docs/CMakeLists.txt @@ -40,25 +40,27 @@ function(DOCS_EXECUTE_PROCESS) endif() endfunction() -if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda) +set(CONDA_ROOT ${PROJECT_BINARY_DIR}/external/miniconda) + +if(NOT EXISTS ${CONDA_ROOT}) docs_execute_process(${SHELL_CMD} ${PROJECT_BINARY_DIR}/external/miniconda.sh -b -p - ${PROJECT_BINARY_DIR}/external/miniconda) - docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda config --set - always_yes yes) - docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda update -c - defaults -n base conda) + ${CONDA_ROOT}) + docs_execute_process(${CONDA_ROOT}/bin/conda config --set always_yes yes) + docs_execute_process(${CONDA_ROOT}/bin/conda update -c defaults -n base conda) endif() -if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda/envs/rocprofiler-docs) - docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda env create -n - rocprofiler-docs -f ${CMAKE_CURRENT_LIST_DIR}/environment.yml) +if(NOT EXISTS ${CONDA_ROOT}/envs/rocprofiler-docs) + docs_execute_process(${CONDA_ROOT}/bin/conda env create -n rocprofiler-docs -f + ${CMAKE_CURRENT_LIST_DIR}/environment.yml) + docs_execute_process(${CONDA_ROOT}/envs/rocprofiler-docs/bin/python -m pip install -r + ${CMAKE_CURRENT_LIST_DIR}/requirements.txt) endif() file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh" "#!${SHELL_CMD} -e -PATH=${PROJECT_BINARY_DIR}/external/miniconda/bin:\${PATH} +PATH=${CONDA_ROOT}/bin:\${PATH} export PATH source activate diff --git a/source/docs/requirements.txt b/source/docs/requirements.txt new file mode 100644 index 0000000000..efc23a7e9f --- /dev/null +++ b/source/docs/requirements.txt @@ -0,0 +1 @@ +doxysphinx diff --git a/source/include/rocprofiler-sdk/counters.h b/source/include/rocprofiler-sdk/counters.h index 80bd77041a..5ea937de87 100644 --- a/source/include/rocprofiler-sdk/counters.h +++ b/source/include/rocprofiler-sdk/counters.h @@ -103,7 +103,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** * instance counting information. The reason for this restriction is that HSA * is not yet loaded on tool_init. * - * @param [in] agent rocprofiler agent + * @param [in] agent_id rocprofiler agent identifier * @param [in] counter_id counter id (obtained from iterate_agent_supported_counters) * @param [out] instance_count number of instances the counter has * @return ::rocprofiler_status_t @@ -111,7 +111,7 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** * @retval ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND if counter not found */ rocprofiler_status_t ROCPROFILER_API -rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, +rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t counter_id, size_t* instance_count) ROCPROFILER_NONNULL(3); @@ -119,6 +119,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, * @brief Callback that gives a list of counters available on an agent. The * counters variable is owned by rocprofiler and should not be free'd. * + * @param [in] agent_id Agent ID of the current callback * @param [in] counters An array of counters that are avialable on the agent * @ref rocprofiler_iterate_agent_supported_counters was called on. * @param [in] num_counters Number of counters contained in counters @@ -126,6 +127,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, * @ref rocprofiler_iterate_agent_supported_counters */ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)( + rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data); @@ -133,7 +135,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)( /** * @brief Query Agent Counters Availability. * - * @param [in] agent GPU agent + * @param [in] agent_id GPU agent identifier * @param [in] cb callback to caller to get counters * @param [in] user_data data to pass into the callback * @return ::rocprofiler_status_t @@ -141,7 +143,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)( * @retval ROCPROFILER_STATUS_ERROR if no counters found for agent */ rocprofiler_status_t ROCPROFILER_API -rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent, +rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id, rocprofiler_available_counters_cb_t cb, void* user_data) ROCPROFILER_NONNULL(2); diff --git a/source/include/rocprofiler-sdk/profile_config.h b/source/include/rocprofiler-sdk/profile_config.h index c265d3c93a..e7c97094f0 100644 --- a/source/include/rocprofiler-sdk/profile_config.h +++ b/source/include/rocprofiler-sdk/profile_config.h @@ -42,7 +42,7 @@ ROCPROFILER_EXTERN_C_INIT * counters for an agent can be queried using * @ref rocprofiler_iterate_agent_supported_counters. * - * @param [in] agent Agent identifier + * @param [in] agent_id Agent identifier * @param [in] counters_list List of GPU counters * @param [in] counters_count Size of counters list * @param [out] config_id Identifier for GPU counters group @@ -52,7 +52,7 @@ ROCPROFILER_EXTERN_C_INIT * */ rocprofiler_status_t ROCPROFILER_API -rocprofiler_create_profile_config(rocprofiler_agent_t agent, +rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t* counters_list, size_t counters_count, rocprofiler_profile_config_id_t* config_id) diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 9139b30ba8..7512bf83b3 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -31,10 +31,11 @@ #include "lib/common/logging.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" -#include "rocprofiler-sdk/marker/api_id.h" #include #include +#include +#include #include #include @@ -229,10 +230,17 @@ get_client_ctx() void flush() { + LOG(INFO) << "flushing buffers..."; for(auto itr : get_buffers().as_array()) { - if(itr.handle > 0) ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush"); + if(itr.handle > 0) + { + LOG(INFO) << "flushing buffer " << itr.handle; + ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush"); + ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush"); + } } + LOG(INFO) << "Buffers flushed"; } void @@ -634,36 +642,38 @@ get_agent_profile(const rocprofiler_agent_t* agent) }, [agent, &profile](agent_counter_map_t& data_v) { auto counters_v = counter_vec_t{}; - ROCPROFILER_CALL( - rocprofiler_iterate_agent_supported_counters( - *agent, - [](rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data) { - auto* vec = static_cast(user_data); - for(size_t i = 0; i < num_counters; i++) - { - const char* name = nullptr; - size_t len = 0; + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + agent->id, + [](rocprofiler_agent_id_t, + rocprofiler_counter_id_t* counters, + size_t num_counters, + void* user_data) { + auto* vec = static_cast(user_data); + for(size_t i = 0; i < num_counters; i++) + { + const char* name = nullptr; + size_t len = 0; - ROCPROFILER_CALL( - rocprofiler_query_counter_name(counters[i], &name, &len), - "Could not query name"); + ROCPROFILER_CALL(rocprofiler_query_counter_name( + counters[i], &name, &len), + "Could not query name"); - if(name && len > 0) - { - if(tool::get_config().counters.count(name) > 0) - vec->emplace_back(counters[i]); - } - } - return ROCPROFILER_STATUS_SUCCESS; - }, - static_cast(&counters_v)), - "iterate agent supported counters"); + if(name && len > 0) + { + if(tool::get_config().counters.count(name) > 0) + vec->emplace_back(counters[i]); + } + } + return ROCPROFILER_STATUS_SUCCESS; + }, + static_cast(&counters_v)), + "iterate agent supported counters"); if(!counters_v.empty()) { auto profile_v = rocprofiler_profile_config_id_t{}; ROCPROFILER_CALL(rocprofiler_create_profile_config( - *agent, counters_v.data(), counters_v.size(), &profile_v), + agent->id, counters_v.data(), counters_v.size(), &profile_v), "Could not construct profile cfg"); profile = profile_v; } @@ -734,6 +744,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { client_finalizer = fini_func; + constexpr uint64_t buffer_size = 4096; + constexpr uint64_t buffer_watermark = 4096; + ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed"); auto code_obj_ctx = rocprofiler_context_id_t{}; @@ -794,8 +807,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().kernel_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 4096, - 2048, + buffer_size, + buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, @@ -814,8 +827,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().memory_copy_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 4096, - 2048, + buffer_size, + buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, nullptr, @@ -834,8 +847,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().hsa_api_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 4096, - 2048, + buffer_size, + buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, @@ -854,8 +867,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().hip_api_trace || tool::get_config().hip_compiler_api_trace) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 4096, - 2048, + buffer_size, + buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, tool_data, @@ -888,8 +901,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().counter_collection) { ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 4096, - 2048, + buffer_size, + buffer_watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, buffered_tracing_callback, nullptr, @@ -902,11 +915,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) "Could not setup buffered service"); } - ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed"); + for(auto itr : get_buffers().as_array()) + { + if(itr.handle > 0) + { + auto cb_thread = rocprofiler_callback_thread_t{}; - std::atexit([]() { - if(client_finalizer && client_identifier) client_finalizer(*client_identifier); - }); + LOG(INFO) << "creating dedicated callback thread for buffer " << itr.handle; + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread), + "creating callback thread"); + + LOG(INFO) << "assigning buffer " << itr.handle << " to callback thread " + << cb_thread.handle; + ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, cb_thread), + "assigning callback thread"); + } + } + + ROCPROFILER_CALL(rocprofiler_start_context(get_client_ctx()), "start context failed"); return 0; } diff --git a/source/lib/rocprofiler-sdk-tool/trace_buffer.hpp b/source/lib/rocprofiler-sdk-tool/trace_buffer.hpp deleted file mode 100644 index dfe9d9843f..0000000000 --- a/source/lib/rocprofiler-sdk-tool/trace_buffer.hpp +++ /dev/null @@ -1,318 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -class TraceBufferBase -{ -public: - static void FlushAll() - { - std::lock_guard lock(mutex_); - - for(auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_) - trace_buffer->Flush(); - } - - static void Register(TraceBufferBase* elem) - { - std::lock_guard lock(mutex_); - - auto** prev_ptr = &head_; - while(*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_) - prev_ptr = &(*prev_ptr)->next_; - - elem->next_ = *prev_ptr; - *prev_ptr = elem; - } - - static void Unregister(TraceBufferBase* elem) - { - std::lock_guard lock(mutex_); - - auto** prev_ptr = &head_; - while(*prev_ptr != nullptr && *prev_ptr != elem) - prev_ptr = &(*prev_ptr)->next_; - - assert(*prev_ptr != nullptr && "elem is not in the list"); - *prev_ptr = elem->next_; - } - - TraceBufferBase(std::string name, int priority) - : name_(std::move(name)) - , priority_(priority) - , next_(nullptr) - {} - - TraceBufferBase(const TraceBufferBase&) = delete; - TraceBufferBase& operator=(const TraceBufferBase&) = delete; - - virtual ~TraceBufferBase() { Unregister(this); } - - virtual void Flush() = 0; - - std::string name() && { return std::move(name_); } - const std::string& name() const& { return name_; } - -private: - const std::string name_; - const int priority_; - TraceBufferBase* next_; - - static TraceBufferBase* head_; - static std::mutex mutex_; -}; - -enum TraceEntryState -{ - TRACE_ENTRY_INVALID = 0, - TRACE_ENTRY_INIT = 1, - TRACE_ENTRY_COMPLETE = 2 -}; - -template > -class TraceBuffer : protected TraceBufferBase -{ -public: - using callback_t = std::function; - - TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0) - : TraceBufferBase(std::move(name), priority) - , flush_callback_(std::move(flush_callback)) - , size_(size) - { - assert(size_ != 0 && "cannot create an empty trace buffer"); - - Entry* write_buffer = allocator_.allocate(size_); - assert(write_buffer != nullptr); - buffer_list_.push_back(write_buffer); - - read_index_ = 0; - write_index_ = {0, write_buffer}; - - AllocateFreeBuffer(); - - // Add this instance to the link list of all trace buffers in the process. - Register(this); - } - - ~TraceBuffer() override - { - // Flush the remaining records. After flushing, there should not be any records left in the - // trace buffer. - Flush(); - assert(read_index_ == write_index_.load().index); - - // Acquire both the writer and worker lock as we are accessing shared variables they - // protect. - std::unique_lock writer_lock(write_mutex_, std::defer_lock); - std::unique_lock worker_lock(worker_mutex_, std::defer_lock); - std::lock(writer_lock, worker_lock); - - // Deallocate the buffers. - allocator_.deallocate(write_index_.load().buffer, size_); - allocator_.deallocate(free_buffer_, size_); - - // Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional - // after waking up, and exits if it does not have a value. - if(worker_thread_) - { - std::thread worker_thread = std::move(worker_thread_.value()); - { - // Tell the worker thread loop to exit. - worker_thread_.reset(); - free_buffer_ = nullptr; - worker_cond_.notify_one(); - } - // Release the worker lock to allow the worker thread to exit. - worker_lock.unlock(); - worker_thread.join(); - } - } - - // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are - // monotonically increasing indices, with read_pointer % size always indexing inside the first - // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with - // the next invocation after changing its state to 'complete'. - void Flush() override - { - std::lock_guard lock(write_mutex_); - auto write_index = write_index_.load(std::memory_order_relaxed); - - for(auto it = buffer_list_.begin(); it != buffer_list_.end();) - { - auto end_of_buffer = read_index_ - read_index_ % size_ + size_; - - while(read_index_ < std::min(write_index.index, end_of_buffer)) - { - Entry* entry = &(*it)[read_index_ % size_]; - - // The entry is not yet complete, stop flushing here. - if(entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; - - flush_callback_(entry); - entry->~Entry(); - - ++read_index_; - } - - // The buffer is still in use or the read pointer did not reach the end of the buffer. - if(*it == write_index.buffer || read_index_ != end_of_buffer) return; - - // All entries in the current buffer are now processed. Destroy the buffer and move onto - // the next buffer in the list. - allocator_.deallocate(*it, size_); - it = buffer_list_.erase(it); - } - } - - template - Entry& Emplace(Args... args) - { - return *new(GetEntry()) Entry(std::forward(args)...); - } - -private: - Entry* GetEntry() - { - auto current = write_index_.load(std::memory_order_relaxed); - - while(true) - { - // If the pointer is at the end of the current buffer, switch to the available free - // buffer and notify the worker thread to allocate a new buffer. - if(current.index != 0 && current.index % size_ == 0) - { - std::lock_guard lock(write_mutex_); - - // If the worker thread wasn't already started, start it now. This avoids starting a - // new thread when the trace buffer is created. - if(!worker_thread_) - { - std::promise ready; - auto future = ready.get_future(); - { - std::lock_guard worker_lock(worker_mutex_); - worker_thread_.emplace( - &TraceBuffer::WorkerThreadLoop, this, std::move(ready)); - } - future.wait(); - } - - // Re-check the pointer overflow under the writer lock, another thread could have - // beaten us to it and already bumped the write_index_. - current = write_index_.load(std::memory_order_relaxed); - if(current.index % size_ == 0) - { - std::unique_lock worker_lock(worker_mutex_); - - // Wait for the free buffer to become available. - worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; }); - - current.buffer = free_buffer_; - buffer_list_.push_back(current.buffer); - write_index_.store({current.index + 1, current.buffer}, - std::memory_order_relaxed); - - // Tell the worker thread to allocate a new free buffer. - free_buffer_ = nullptr; - worker_cond_.notify_one(); - - // We successfully allocated a new buffer, return the first element. - return ¤t.buffer[0]; - } - } - - if(write_index_.compare_exchange_weak( - current, {current.index + 1, current.buffer}, std::memory_order_relaxed)) - return ¤t.buffer[current.index % size_]; - } - } - - void AllocateFreeBuffer() - { - assert(free_buffer_ == nullptr); - - free_buffer_ = allocator_.allocate(size_); - assert(free_buffer_ != nullptr); - - for(size_t i = 0; i < size_; ++i) - free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed); - } - - void WorkerThreadLoop(std::promise ready) - { - std::unique_lock lock(worker_mutex_); - - // This worker thread is now ready to accept work. - ready.set_value(); - - while(true) - { - worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; }); - if(!worker_thread_) break; - AllocateFreeBuffer(); - worker_cond_.notify_one(); - } - } - - // The WriteIndex is used to store both the index and the buffer associated with that index (the - // buffer contains the trace buffer records at [index - index % size, index - index % size_t + - // size_ - 1]) in a single atomic variable. - struct WriteIndex - { - uint64_t index; - Entry* buffer; - }; - - const callback_t flush_callback_; - const uint64_t size_; - - uint64_t read_index_; // The index of the next record to flush. - std::atomic write_index_; // The index of the next record that could be written. - Entry* free_buffer_{nullptr}; // The next available free buffer. - - std::optional worker_thread_; - std::mutex worker_mutex_; - std::condition_variable worker_cond_; - - std::mutex write_mutex_; - std::list buffer_list_; - Allocator allocator_; -}; - -#define TRACE_BUFFER_INSTANTIATE() \ - TraceBufferBase* TraceBufferBase::head_ = nullptr; \ - std::mutex TraceBufferBase::mutex_; diff --git a/source/lib/rocprofiler-sdk/agent.cpp b/source/lib/rocprofiler-sdk/agent.cpp index f873cc9545..e8fb7b0f78 100644 --- a/source/lib/rocprofiler-sdk/agent.cpp +++ b/source/lib/rocprofiler-sdk/agent.cpp @@ -671,6 +671,16 @@ get_agents() return pointers; } +const rocprofiler_agent_t* +get_agent(rocprofiler_agent_id_t id) +{ + for(const auto& itr : get_agents()) + { + if(itr && itr->id.handle == id.handle) return itr; + } + return nullptr; +} + void construct_agent_cache(::HsaApiTable* table) { diff --git a/source/lib/rocprofiler-sdk/agent.hpp b/source/lib/rocprofiler-sdk/agent.hpp index 2c17ea6b0c..b01132ebfe 100644 --- a/source/lib/rocprofiler-sdk/agent.hpp +++ b/source/lib/rocprofiler-sdk/agent.hpp @@ -39,6 +39,9 @@ namespace agent std::vector get_agents(); +const rocprofiler_agent_t* +get_agent(rocprofiler_agent_id_t id); + void construct_agent_cache(::HsaApiTable* table); diff --git a/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 5d9348f643..255afe6c82 100644 --- a/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -96,6 +96,12 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c if(rocprofiler::registration::get_init_status() > -1) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + static auto unsupported = std::unordered_set{ + ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, + ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, + ROCPROFILER_BUFFER_TRACING_EXTERNAL_CORRELATION}; + if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id); if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; diff --git a/source/lib/rocprofiler-sdk/callback_tracing.cpp b/source/lib/rocprofiler-sdk/callback_tracing.cpp index dc979a338e..774325a31d 100644 --- a/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -92,6 +92,10 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t if(rocprofiler::registration::get_init_status() > -1) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + static auto unsupported = std::unordered_set{ + ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH}; + if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id); if(!ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; diff --git a/source/lib/rocprofiler-sdk/counters.cpp b/source/lib/rocprofiler-sdk/counters.cpp index bd234fdf66..adda2d8176 100644 --- a/source/lib/rocprofiler-sdk/counters.cpp +++ b/source/lib/rocprofiler-sdk/counters.cpp @@ -26,6 +26,7 @@ #include #include "lib/common/synchronized.hpp" +#include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/aql/helpers.hpp" #include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp" #include "lib/rocprofiler-sdk/counters/id_decode.hpp" @@ -43,7 +44,7 @@ extern "C" { * @param [out] size * @return ::rocprofiler_status_t */ -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** name, size_t* size) { const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap()); @@ -71,11 +72,16 @@ rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** * @param [out] instance_count number of instances the counter has * @return rocprofiler_status_t */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, +rocprofiler_status_t +rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t counter_id, size_t* instance_count) { + const rocprofiler_agent_t* agent = rocprofiler::agent::get_agent(agent_id); + + if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND; + if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) return ROCPROFILER_STATUS_ERROR; + const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getMetricIdMap()); const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle); if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; @@ -93,19 +99,9 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, // For derived metrics, this can be more than one counter. In that case, // we return the maximum instance count among all underlying counters. auto req_counters = rocprofiler::counters::get_required_hardware_counters( - rocprofiler::counters::get_ast_map(), std::string(agent.name), *metric_ptr); + rocprofiler::counters::get_ast_map(), std::string(agent->name), *metric_ptr); if(!req_counters) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; - // NOTE: to look up instance information, we require HSA be init'd. Reason - // for this is the call to get instance information is an HSA call. - const auto* maybe_agent = rocprofiler::common::get_val( - rocprofiler::hsa::get_queue_controller().get_supported_agents(), agent.id.handle); - if(!maybe_agent) - { - LOG(ERROR) << "HSA must be loaded to obtain instance information."; - return ROCPROFILER_STATUS_ERROR; - } - for(const auto& counter : *req_counters) { if(!counter.special().empty()) @@ -116,7 +112,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, try { - auto dims = rocprofiler::counters::getBlockDimensions(maybe_agent->name(), counter); + auto dims = rocprofiler::counters::getBlockDimensions(agent->name, counter); for(const auto& dim : dims) { *instance_count = std::max(static_cast(dim.size()), *instance_count); @@ -138,12 +134,15 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, * @param [out] counters_count * @return ::rocprofiler_status_t */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t agent, +rocprofiler_status_t +rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_id_t agent_id, rocprofiler_available_counters_cb_t cb, void* user_data) { - auto metrics = rocprofiler::counters::getMetricsForAgent(agent.name); + const auto* agent = rocprofiler::agent::get_agent(agent_id); + if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND; + + auto metrics = rocprofiler::counters::getMetricsForAgent(agent->name); std::vector ids; ids.reserve(metrics.size()); for(const auto& metric : metrics) @@ -151,7 +150,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t ids.push_back({.handle = metric.id()}); } - return cb(ids.data(), ids.size(), user_data); + return cb(agent_id, ids.data(), ids.size(), user_data); } /** @@ -161,7 +160,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t * @param [out] counter_id counter id associated with the record * @return ::rocprofiler_status_t */ -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id, rocprofiler_counter_id_t* counter_id) { @@ -170,7 +169,7 @@ rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id, return ROCPROFILER_STATUS_SUCCESS; } -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t id, rocprofiler_counter_dimension_id_t dim, size_t* pos) @@ -180,7 +179,7 @@ rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t i return ROCPROFILER_STATUS_SUCCESS; } -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_query_record_dimension_info(rocprofiler_counter_id_t, rocprofiler_counter_dimension_id_t dim, rocprofiler_record_dimension_info_t* info) diff --git a/source/lib/rocprofiler-sdk/counters/core.cpp b/source/lib/rocprofiler-sdk/counters/core.cpp index f493a07cfc..04c7aed9b6 100644 --- a/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/core.cpp @@ -183,7 +183,7 @@ queue_cb(const std::shared_ptr& info, if(prof_config->reqired_hw_counters.empty()) { auto& config = *prof_config; - auto agent_name = std::string(config.agent.name); + auto agent_name = std::string(config.agent->name); for(const auto& metric : config.metrics) { auto req_counters = @@ -273,7 +273,7 @@ completed_cb(const std::shared_ptr& info, auto decoded_pkt = EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *pkt); EvaluateAST::read_special_counters( - prof_config->agent, prof_config->required_special_counters, decoded_pkt); + *prof_config->agent, prof_config->required_special_counters, decoded_pkt); prof_config->packets.wlock([&](auto& pkt_vector) { if(pkt) diff --git a/source/lib/rocprofiler-sdk/counters/core.hpp b/source/lib/rocprofiler-sdk/counters/core.hpp index 7c70087c5c..2ed3638dc2 100644 --- a/source/lib/rocprofiler-sdk/counters/core.hpp +++ b/source/lib/rocprofiler-sdk/counters/core.hpp @@ -45,7 +45,7 @@ namespace counters // This profile can be shared among many rocprof contexts. struct profile_config { - rocprofiler_agent_t agent{}; + const rocprofiler_agent_t* agent = nullptr; std::vector metrics{}; // HW counters that must be collected to compute the above // metrics (derived metrics are broken down into hw counters diff --git a/source/lib/rocprofiler-sdk/profile_config.cpp b/source/lib/rocprofiler-sdk/profile_config.cpp index 3d253e0f96..6dd69228ca 100644 --- a/source/lib/rocprofiler-sdk/profile_config.cpp +++ b/source/lib/rocprofiler-sdk/profile_config.cpp @@ -25,11 +25,13 @@ #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/aql/helpers.hpp" #include "lib/rocprofiler-sdk/counters/core.hpp" #include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp" #include "lib/rocprofiler-sdk/counters/metrics.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include "rocprofiler-sdk/fwd.h" extern "C" { /** @@ -41,12 +43,15 @@ extern "C" { * @param [out] config_id Identifier for GPU counters group * @return ::rocprofiler_status_t */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_create_profile_config(rocprofiler_agent_t agent, +rocprofiler_status_t +rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t* counters_list, size_t counters_count, rocprofiler_profile_config_id_t* config_id) { + const auto* agent = ::rocprofiler::agent::get_agent(agent_id); + if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND; + std::shared_ptr config = std::make_shared(); @@ -57,7 +62,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent, const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle); if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; - if(!rocprofiler::counters::checkValidMetric(std::string(agent.name), *metric_ptr)) + if(!rocprofiler::counters::checkValidMetric(std::string(agent->name), *metric_ptr)) { return ROCPROFILER_STATUS_ERROR_METRIC_NOT_VALID_FOR_AGENT; } @@ -70,7 +75,7 @@ rocprofiler_create_profile_config(rocprofiler_agent_t agent, return ROCPROFILER_STATUS_SUCCESS; } -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_destroy_profile_config(rocprofiler_profile_config_id_t config_id) { rocprofiler::counters::destroy_counter_profile(config_id.handle); diff --git a/source/scripts/run-ci.py b/source/scripts/run-ci.py index 54111b096e..c966c72b71 100755 --- a/source/scripts/run-ci.py +++ b/source/scripts/run-ci.py @@ -100,7 +100,7 @@ def generate_custom(args, cmake_args, ctest_args): ".*/counters/parser/.*", ] if args.coverage == "samples": - codecov_exclude += [".*/lib/common/.*"] + codecov_exclude += [".*/lib/common/.*", ".*/lib/rocprofiler-sdk-tool/.*"] COVERAGE_EXCLUDE = ";".join(codecov_exclude) diff --git a/tests/rocprofv3/tracing/CMakeLists.txt b/tests/rocprofv3/tracing/CMakeLists.txt index afff7204d1..a5a95cd1ea 100644 --- a/tests/rocprofv3/tracing/CMakeLists.txt +++ b/tests/rocprofv3/tracing/CMakeLists.txt @@ -20,8 +20,16 @@ add_test( string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") -set(tracing-env "${PRELOAD_ENV}" - "HSA_TOOLS_LIB=$") +if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") + set(LOG_LEVEL "warning") # info produces memory leak +else() + set(LOG_LEVEL "info") +endif() + +set(tracing-env + "${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}" + "ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}" + "HSA_TOOLS_LIB=$") set_tests_properties(rocprofv3-test-trace-execute PROPERTIES LABELS "integration-tests" ENVIRONMENT "${tracing-env}") @@ -44,9 +52,9 @@ add_test( ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv) set(VALIDATION_FILES + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_hsa_api_trace.csv ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_kernel_trace.csv - ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv) set_tests_properties( @@ -58,8 +66,6 @@ set_tests_properties( DEPENDS rocprofv3-test-trace-execute FAIL_REGULAR_EXPRESSION - "AssertionError" - REQUIRED_FILES - "${VALIDATION_FILES}" + "AssertionError|HSA_API|HIP_API|MARKER_API|KERNEL_DISPATCH|CODE_OBJECT" ATTACHED_FILES_ON_FAIL "${VALIDATION_FILES}")