From 1c6af2cb55bdd780a1e96d7ee997a320bce2e908 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 17 Oct 2023 00:39:41 -0500 Subject: [PATCH] Miscellaneous Updates (const-correctness, logic fixes, etc.) (#126) * Update lib/rocprofiler/hsa/hsa.cpp - fix logic for constructing callback_contexts and buffered_contexts arrays * Update include/rocprofiler/{agent,fwd,pc_sampling}.h - remove rocprofiler_pc_sampling_config_array_t due to const problems - update rocprofiler_agent_t to use arrays to const data - remove redundant rocprofiler_query_pc_sampling_agent_configurations - this implementation is quite literally looking up info in the agent struct that was passed * Update lib/rocprofiler/pc_sampling.cpp - remove rocprofiler_query_pc_sampling_agent_configurations * update lib/rocprofiler/agent.cpp - handle const fields - make mi200_pc_sampling_config variable static * Update lib/rocprofiler/tests/agent.cpp - tweak to pc_sampling_configs offset * Update samples/pc_sampling - Update sample to reflect minor tweaks to pc_sampling_configs in rocprofiler_agent_t * Update CI workflow - remove 'if: ${{ always() }}' - I suspect this is why the jobs do not cancel in progress correctly [ROCm/rocprofiler-sdk commit: d1518c65b282f673c0da9e25ba7a2ec7050c722d] --- .../workflows/continuous_integration.yml | 3 -- .../samples/pc_sampling/common.h | 4 +- ...ost-trap-retries-service-instantiation.cpp | 10 ++--- .../single-user-multiple-agents.cpp | 21 +++++----- .../source/include/rocprofiler/agent.h | 14 +++++-- .../source/include/rocprofiler/fwd.h | 9 ----- .../source/include/rocprofiler/pc_sampling.h | 13 ------- .../source/lib/rocprofiler/agent.cpp | 38 +++++++++++-------- .../source/lib/rocprofiler/hsa/hsa.cpp | 18 ++++----- .../source/lib/rocprofiler/pc_sampling.cpp | 9 ----- .../source/lib/rocprofiler/tests/agent.cpp | 3 +- 11 files changed, 59 insertions(+), 83 deletions(-) diff --git a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml index 1bff7f4190..a8ff34be51 100644 --- a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml +++ b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml @@ -58,7 +58,6 @@ jobs: image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1 options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined - if: ${{ always() }} needs: get_latest_mainline_build_number steps: @@ -147,7 +146,6 @@ jobs: image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1 options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined - if: ${{ always() }} needs: get_latest_mainline_build_number steps: @@ -247,7 +245,6 @@ jobs: image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1 options: --privileged --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined - if: ${{ always() }} needs: get_latest_mainline_build_number steps: diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/common.h b/projects/rocprofiler-sdk/samples/pc_sampling/common.h index 41ac6f7766..d3032c6f93 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/common.h +++ b/projects/rocprofiler-sdk/samples/pc_sampling/common.h @@ -54,7 +54,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, _out_agent->name, _out_agent->id.handle, _out_agent->type, - _out_agent->pc_sampling_configs.size); + _out_agent->num_pc_sampling_configs); return ROCPROFILER_STATUS_SUCCESS; } else @@ -64,7 +64,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, agents[i]->name, agents[i]->id.handle, agents[i]->type, - agents[i]->pc_sampling_configs.size); + agents[i]->num_pc_sampling_configs); } } return ROCPROFILER_STATUS_ERROR; diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp index d656d44cd1..575511a55a 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp @@ -57,11 +57,11 @@ second_user() // After failure, the second user queries available configuration and observes the one chosen by // the first user. - size_t config_count = 10; - std::vector configs(config_count); - ROCPROFILER_CALL(rocprofiler_query_pc_sampling_agent_configurations( - *gpu_agent, configs.data(), &config_count), - "The second user cannot query available configurations"); + auto config_count = gpu_agent->num_pc_sampling_configs; + auto configs = std::vector{}; + configs.reserve(config_count); + for(size_t i = 0; i < config_count; ++i) + configs.emplace_back(gpu_agent->pc_sampling_configs[i]); // Only one configuration should be listed, and its parameters should match the parameters set // by the first user. Vladimir: Is it ok to use assertions? In the release mode, they might be diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp index c966d34eeb..f9dd7df8d5 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp @@ -34,7 +34,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen // Skip GPU agents not supporting PC sampling // Vladimir: The assumption is that if a GPU agent does not support PC sampling, // the size is 0. - if(agents[i]->pc_sampling_configs.size == 0) continue; + if(agents[i]->num_pc_sampling_configs == 0) continue; _out_agents->push_back(*agents[i]); @@ -43,7 +43,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen agents[i]->name, agents[i]->id.handle, agents[i]->type, - agents[i]->pc_sampling_configs.size); + agents[i]->num_pc_sampling_configs); return ROCPROFILER_STATUS_SUCCESS; } else @@ -53,7 +53,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen agents[i]->name, agents[i]->id.handle, agents[i]->type, - agents[i]->pc_sampling_configs.size); + agents[i]->num_pc_sampling_configs); } } @@ -78,10 +78,10 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id, rocprofiler_agent_t gpu_agent) { // Vladimir: Does MI200 have only one configuration? - assert(gpu_agent.pc_sampling_configs.size == 1); + assert(gpu_agent.num_pc_sampling_configs == 1); // Extract the configuration - auto host_trap_config = gpu_agent.pc_sampling_configs.data[0]; + auto host_trap_config = gpu_agent.pc_sampling_configs[0]; // The mean of min_interval and max_interval auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2; @@ -96,15 +96,16 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id, } rocprofiler_pc_sampling_configuration_t -extract_stochastic_config(rocprofiler_pc_sampling_config_array_t* configs) +extract_stochastic_config(const rocprofiler_pc_sampling_configuration_t* configs, + size_t num_configs) { // Iterate over an array of configurations and return the first one // with stochasting method. - for(size_t i = 0; i < configs->size; i++) + for(size_t i = 0; i < num_configs; i++) { - if(configs->data[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) + if(configs[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) { - return configs->data[i]; + return configs[i]; } } printf("Improper use of the `extract_stochastic_config` function."); @@ -118,7 +119,7 @@ configure_stochastic_sampling(rocprofiler_context_id_t context_id, { // Find the configuration matching stochastic sampling in cycles rocprofiler_pc_sampling_configuration_t stochastic_config = - extract_stochastic_config(&gpu_agent.pc_sampling_configs); + extract_stochastic_config(gpu_agent.pc_sampling_configs, gpu_agent.num_pc_sampling_configs); // The mean of min_interval and max_interval auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h index fe28a7492a..43184f9cd8 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h @@ -172,14 +172,20 @@ typedef struct rocprofiler_agent_t ///< dimension of a work-group. rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension ///< of a grid. - rocprofiler_agent_mem_bank_t* mem_banks; - rocprofiler_agent_cache_t* caches; - rocprofiler_agent_io_link_t* io_links; + const rocprofiler_agent_mem_bank_t* mem_banks; + const rocprofiler_agent_cache_t* caches; + const rocprofiler_agent_io_link_t* io_links; const char* name; ///< Name of the agent. Will be identical to product name for CPU const char* vendor_name; ///< Vendor of agent (will be AMD) const char* product_name; ///< Marketing name const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc. - rocprofiler_pc_sampling_config_array_t pc_sampling_configs; + uint64_t num_pc_sampling_configs; ///< GPU only. Number of PC sampling modes available for this + ///< device type. Note: if another process is currently using + ///< PC sampling on this agent, this value will be zero so + ///< do not assume the number of PC sampling configurations + ///< based on the device type. + const rocprofiler_pc_sampling_configuration_t* + pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types. } rocprofiler_agent_t; /** diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h index 01cc0a39ab..94e0e40fdc 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h @@ -326,15 +326,6 @@ typedef struct uint64_t handle; } rocprofiler_profile_config_id_t; -/** - * @brief Array of PC Sampling Configurations - */ -typedef struct rocprofiler_pc_sampling_config_array_s -{ - rocprofiler_pc_sampling_configuration_t* data; - size_t size; -} rocprofiler_pc_sampling_config_array_t; - /** * @brief Tracing record * diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/pc_sampling.h b/projects/rocprofiler-sdk/source/include/rocprofiler/pc_sampling.h index bbf1c30d97..f322cf4b74 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/pc_sampling.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/pc_sampling.h @@ -63,19 +63,6 @@ struct rocprofiler_pc_sampling_configuration_s uint64_t flags; }; -/** - * @brief Query PC Sampling Configuration. - * - * @param [in] agent - * @param [out] config - * @param [out] config_count - * @return ::rocprofiler_status_t - */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent, - rocprofiler_pc_sampling_configuration_t* config, - size_t* config_count) ROCPROFILER_NONNULL(2, 3); - /** @} */ ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp index ba9f478994..6a69c6cf45 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp @@ -211,15 +211,20 @@ template void read_property(const MapT& data, const std::string& label, Tp& value) { + using mutable_type = std::remove_const_t; + if constexpr(std::is_enum::value) { - using value_type = std::underlying_type_t; + using value_type = std::underlying_type_t; // never expect this to be true but it does guard against infinite recursion static_assert(!std::is_enum::value, "Expected non-enum type"); auto value_v = static_cast(value); read_property(data, label, value_v); - value = static_cast(value_v); + if constexpr(std::is_const::value) + const_cast(value) = static_cast(value_v); + else + value = static_cast(value_v); } else { @@ -258,7 +263,10 @@ read_property(const MapT& data, const std::string& label, Tp& value) max_value)}; } - value = static_cast(local_value); + if constexpr(std::is_const::value) + const_cast(value) = static_cast(local_value); + else + value = static_cast(local_value); } } @@ -280,7 +288,7 @@ read_topology() using pc_sampling_config_vec_t = std::vector; - auto mi200_pc_sampling_config = pc_sampling_config_vec_t{ + static auto mi200_pc_sampling_config = pc_sampling_config_vec_t{ rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, ROCPROFILER_PC_SAMPLING_UNIT_TIME, 1UL, @@ -412,12 +420,16 @@ read_topology() drmClose(drm_fd); } - constexpr auto gfx90a_version = compute_version(9, 0, 10); - - if(agent_info.gfx_target_version >= gfx90a_version) + // TODO(jomadsen): make contingent on whether this process acquired the PC sampling + // device lock { - agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{ - mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()}; + constexpr auto gfx90a_version = compute_version(9, 0, 10); + + if(agent_info.gfx_target_version >= gfx90a_version) + { + agent_info.pc_sampling_configs = mi200_pc_sampling_config.data(); + agent_info.num_pc_sampling_configs = mi200_pc_sampling_config.size(); + } } } else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU) @@ -467,16 +479,10 @@ read_topology() for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i) { - using heap_type_t = HSA_HEAPTYPE; - using underlying_heap_type_t = std::underlying_type_t; - auto subproperties = read_map(node_path / "mem_banks" / std::to_string(i) / "properties"); - auto _heap_type = underlying_heap_type_t{}; - read_property(subproperties, "heap_type", _heap_type); - agent_info.mem_banks[i].heap_type = static_cast(_heap_type); - + read_property(subproperties, "heap_type", agent_info.mem_banks[i].heap_type); read_property( subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes); read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/hsa.cpp index d076b404e2..11494d3753 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/hsa.cpp @@ -198,22 +198,18 @@ hsa_api_impl::functor(Args&&... args) if(itr->callback_tracer) { // if the given domain + op is not enabled, skip this context - if(!itr->callback_tracer->domains(info_type::callback_domain_idx, - info_type::operation_idx)) - continue; - - callback_contexts.emplace_back( - callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); + if(itr->callback_tracer->domains(info_type::callback_domain_idx, + info_type::operation_idx)) + callback_contexts.emplace_back( + callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); } if(itr->buffered_tracer) { // if the given domain + op is not enabled, skip this context - if(!itr->buffered_tracer->domains(info_type::buffered_domain_idx, - info_type::operation_idx)) - continue; - - buffered_contexts.emplace_back(buffered_context_data{itr}); + if(itr->buffered_tracer->domains(info_type::buffered_domain_idx, + info_type::operation_idx)) + buffered_contexts.emplace_back(buffered_context_data{itr}); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/pc_sampling.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/pc_sampling.cpp index fdd0619beb..4d600954ee 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/pc_sampling.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/pc_sampling.cpp @@ -48,13 +48,4 @@ rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t conte consume_args(context_id, agent, method, unit, interval, buffer_id); return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } - -rocprofiler_status_t -rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent, - rocprofiler_pc_sampling_configuration_t* config, - size_t* config_count) -{ - consume_args(agent, config, config_count); - return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; -} } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp index 71ff2c2c93..e75647efb5 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp @@ -97,7 +97,8 @@ TEST(rocprofiler_lib, agent_abi) EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg; - EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_pc_sampling_configs), 280) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 288) << msg; // Add test for offset of new field above this. Do NOT change any existing values! // If a new field is added, increase this value by the size of the new field(s)