From 76717efa644fce5c4f25341dff6297ff2402ec37 Mon Sep 17 00:00:00 2001 From: vlaindic <139573562+vlaindic@users.noreply.github.com> Date: Thu, 31 Aug 2023 00:08:48 +0200 Subject: [PATCH] PC sampling examples: single user, multiple agents (#31) * PC sampling examples: single user, multiple agents The example assumes that a single user activates PC sampling service on multiple agents. Preferably, the user chooses stochastic sampling over host-trap sampling. * cmake formatting (cmake-format) (#33) Co-authored-by: vlaindic * source formatting (clang-format v11) (#32) Co-authored-by: vlaindic * pc sampling samples: anynomus namespace and string_view --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: vlaindic --- samples/pc_sampling/CMakeLists.txt | 6 + samples/pc_sampling/common.h | 7 +- .../single-user-multiple-agents.cpp | 199 ++++++++++++++++++ 3 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 samples/pc_sampling/single-user-multiple-agents.cpp diff --git a/samples/pc_sampling/CMakeLists.txt b/samples/pc_sampling/CMakeLists.txt index 86ae370891..197b6b7989 100644 --- a/samples/pc_sampling/CMakeLists.txt +++ b/samples/pc_sampling/CMakeLists.txt @@ -14,3 +14,9 @@ target_sources(pc_sampling_single-user-host-trap-retry PRIVATE common.h single-user-host-trap-retries-service-instantiation.cpp) target_link_libraries(pc_sampling_single-user-host-trap-retry PRIVATE rocprofiler::rocprofiler-library) + +add_executable(pc_sampling_single-user-multiple-agents) +target_sources(pc_sampling_single-user-multiple-agents + PRIVATE common.h single-user-multiple-agents.cpp) +target_link_libraries(pc_sampling_single-user-multiple-agents + PRIVATE rocprofiler::rocprofiler-library) diff --git a/samples/pc_sampling/common.h b/samples/pc_sampling/common.h index 9880ca43ea..054a9b2041 100644 --- a/samples/pc_sampling/common.h +++ b/samples/pc_sampling/common.h @@ -7,9 +7,12 @@ #include #include #include +#include +#include -constexpr size_t BUFFER_SIZE_BYTES = 4096; -constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); +constexpr size_t BUFFER_SIZE_BYTES = 4096; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); +const std::string_view MI200_NAME = "gfx90a"; #define ROCPROFILER_CALL(result, msg) \ { \ diff --git a/samples/pc_sampling/single-user-multiple-agents.cpp b/samples/pc_sampling/single-user-multiple-agents.cpp new file mode 100644 index 0000000000..62ae36ef43 --- /dev/null +++ b/samples/pc_sampling/single-user-multiple-agents.cpp @@ -0,0 +1,199 @@ +// Vladimir: The example that shows how a single user can use PC sampling +// on multiple GPU agents. + +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +namespace +{ +// GPU agents supporting some kind of PC sampling +std::vector gpu_agents; +std::vector contexts; +std::vector buffer_ids; + +rocprofiler_status_t +find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_t** agents, + size_t num_agents, + void* data) +{ + // data is required + if(!data) return ROCPROFILER_STATUS_ERROR; + + auto* _out_agents = static_cast*>(data); + // find the first GPU agent + for(size_t i = 0; i < num_agents; i++) + { + if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) + { + // Skip GPU agents not supporting PC sampling + // Vladimir: The assumption is that if a GPU agent does not support PC sampling, + // the size is 0. + if(agents[i]->pc_sampling_configs.size == 0) continue; + + _out_agents->push_back(*agents[i]); + + printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + __FUNCTION__, + agents[i]->name, + agents[i]->id.handle, + agents[i]->type, + agents[i]->pc_sampling_configs.size); + return ROCPROFILER_STATUS_SUCCESS; + } + else + { + printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + __FUNCTION__, + agents[i]->name, + agents[i]->id.handle, + agents[i]->type, + agents[i]->pc_sampling_configs.size); + } + } + + return !_out_agents->empty() ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR; +} + +void +find_all_gpu_agents_supporting_pc_sampling() +{ + // This function returns the all gpu agents supporting some kind of PC sampling + std::vector gpu_agents; + ROCPROFILER_CALL( + rocprofiler_query_available_agents(&find_all_gpu_agents_supporting_pc_sampling_impl, + sizeof(rocprofiler_agent_t), + static_cast(&gpu_agents)), + "Failed to find GPU agents"); +} +} // namespace + +void +configure_host_trap_sampling(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_t gpu_agent) +{ + // Vladimir: Does MI200 have only one configuration? + assert(gpu_agent.pc_sampling_configs.size == 1); + + // Extract the configuration + auto host_trap_config = gpu_agent.pc_sampling_configs.data[0]; + + // The mean of min_interval and max_interval + auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2; + + ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, + gpu_agent, + host_trap_config.method, + host_trap_config.unit, + interval, + buffer_id), + "Cannot create host-trap PC sampling service"); +} + +rocprofiler_pc_sampling_configuration_t +extract_stochastic_config(rocprofiler_pc_sampling_config_array_t* configs) +{ + // Iterate over an array of configurations and return the first one + // with stochasting method. + for(int i = 0; i < configs->size; i++) + { + if(configs->data[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) + { + return configs->data[i]; + } + } + printf("Improper use of the `extract_stochastic_config` function."); + exit(-1); +} + +void +configure_stochastic_sampling(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_t gpu_agent) +{ + // Find the configuration matching stochastic sampling in cycles + rocprofiler_pc_sampling_configuration_t stochastic_config = + extract_stochastic_config(&gpu_agent.pc_sampling_configs); + + // The mean of min_interval and max_interval + auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2; + + ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, + gpu_agent, + stochastic_config.method, + stochastic_config.unit, + interval, + buffer_id), + "Cannot create stochastic PC sampling service"); +} + +int +main(int /*argc*/, char** /*argv*/) +{ + rocprofiler_status_t status; + + find_all_gpu_agents_supporting_pc_sampling(); + + if(gpu_agents.empty()) + { + printf("No availabe gpu agents\n"); + exit(-1); + } + + // Vladimir: The relations I assumed: + // - a context per gpu agent + // - a buffer per context + // - a pc sampling service per buffer + // How about the following: Single context with mulitple buffers and PC sampling services? + // When starting the context, does it start all PC sampling services at once? + + for(auto gpu_agent : gpu_agents) + { + // creating a context + rocprofiler_context_id_t context_id; + ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); + contexts.push_back(context_id); + + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; + rocprofiler_buffer_id_t buffer_id; + ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, + BUFFER_SIZE_BYTES, + WATERMARK, + drop_buffer_action, + rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id), + "Cannot create pc sampling buffer"); + buffer_ids.push_back(buffer_id); + + if(gpu_agent.name == MI200_NAME) + configure_host_trap_sampling(context_id, buffer_id, gpu_agent); + else + configure_stochastic_sampling(context_id, buffer_id, gpu_agent); + + // Starting the context that should trigger PC sampling + ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); + } + + // Running the applicaiton + run_HIP_app(); + + for(int i = 0; i < gpu_agents.size(); i++) + { + // Stop the context that should stop PC sampling? + ROCPROFILER_CALL(rocprofiler_stop_context(contexts[i]), "Cannot start PC sampling context"); + // Explicit buffer flush, before destroying it + ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_ids[i]), "Cannot destroy buffer"); + // Destroying the buffer + ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_ids[i]), "Cannot destroy buffer"); + } + + return 0; +}