Miscellaneous Updates (const-correctness, logic fixes, etc.) (#126)
* Update lib/rocprofiler/hsa/hsa.cpp
- fix logic for constructing callback_contexts and buffered_contexts arrays
* Update include/rocprofiler/{agent,fwd,pc_sampling}.h
- remove rocprofiler_pc_sampling_config_array_t due to const problems
- update rocprofiler_agent_t to use arrays to const data
- remove redundant rocprofiler_query_pc_sampling_agent_configurations
- this implementation is quite literally looking up info in the agent struct that was passed
* Update lib/rocprofiler/pc_sampling.cpp
- remove rocprofiler_query_pc_sampling_agent_configurations
* update lib/rocprofiler/agent.cpp
- handle const fields
- make mi200_pc_sampling_config variable static
* Update lib/rocprofiler/tests/agent.cpp
- tweak to pc_sampling_configs offset
* Update samples/pc_sampling
- Update sample to reflect minor tweaks to pc_sampling_configs in rocprofiler_agent_t
* Update CI workflow
- remove 'if: ${{ always() }}'
- I suspect this is why the jobs do not cancel in progress correctly
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
a7a971a247
Коммит
d1518c65b2
@@ -58,7 +58,6 @@ jobs:
|
||||
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
|
||||
options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
|
||||
|
||||
if: ${{ always() }}
|
||||
needs: get_latest_mainline_build_number
|
||||
|
||||
steps:
|
||||
@@ -147,7 +146,6 @@ jobs:
|
||||
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
|
||||
options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
|
||||
|
||||
if: ${{ always() }}
|
||||
needs: get_latest_mainline_build_number
|
||||
|
||||
steps:
|
||||
@@ -247,7 +245,6 @@ jobs:
|
||||
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
|
||||
options: --privileged --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
|
||||
|
||||
if: ${{ always() }}
|
||||
needs: get_latest_mainline_build_number
|
||||
|
||||
steps:
|
||||
|
||||
@@ -54,7 +54,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents,
|
||||
_out_agent->name,
|
||||
_out_agent->id.handle,
|
||||
_out_agent->type,
|
||||
_out_agent->pc_sampling_configs.size);
|
||||
_out_agent->num_pc_sampling_configs);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
else
|
||||
@@ -64,7 +64,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents,
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->type,
|
||||
agents[i]->pc_sampling_configs.size);
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
@@ -57,11 +57,11 @@ second_user()
|
||||
|
||||
// After failure, the second user queries available configuration and observes the one chosen by
|
||||
// the first user.
|
||||
size_t config_count = 10;
|
||||
std::vector<rocprofiler_pc_sampling_configuration_t> configs(config_count);
|
||||
ROCPROFILER_CALL(rocprofiler_query_pc_sampling_agent_configurations(
|
||||
*gpu_agent, configs.data(), &config_count),
|
||||
"The second user cannot query available configurations");
|
||||
auto config_count = gpu_agent->num_pc_sampling_configs;
|
||||
auto configs = std::vector<rocprofiler_pc_sampling_configuration_t>{};
|
||||
configs.reserve(config_count);
|
||||
for(size_t i = 0; i < config_count; ++i)
|
||||
configs.emplace_back(gpu_agent->pc_sampling_configs[i]);
|
||||
|
||||
// Only one configuration should be listed, and its parameters should match the parameters set
|
||||
// by the first user. Vladimir: Is it ok to use assertions? In the release mode, they might be
|
||||
|
||||
@@ -34,7 +34,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
|
||||
// Skip GPU agents not supporting PC sampling
|
||||
// Vladimir: The assumption is that if a GPU agent does not support PC sampling,
|
||||
// the size is 0.
|
||||
if(agents[i]->pc_sampling_configs.size == 0) continue;
|
||||
if(agents[i]->num_pc_sampling_configs == 0) continue;
|
||||
|
||||
_out_agents->push_back(*agents[i]);
|
||||
|
||||
@@ -43,7 +43,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->type,
|
||||
agents[i]->pc_sampling_configs.size);
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
else
|
||||
@@ -53,7 +53,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->type,
|
||||
agents[i]->pc_sampling_configs.size);
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,10 +78,10 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_t gpu_agent)
|
||||
{
|
||||
// Vladimir: Does MI200 have only one configuration?
|
||||
assert(gpu_agent.pc_sampling_configs.size == 1);
|
||||
assert(gpu_agent.num_pc_sampling_configs == 1);
|
||||
|
||||
// Extract the configuration
|
||||
auto host_trap_config = gpu_agent.pc_sampling_configs.data[0];
|
||||
auto host_trap_config = gpu_agent.pc_sampling_configs[0];
|
||||
|
||||
// The mean of min_interval and max_interval
|
||||
auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2;
|
||||
@@ -96,15 +96,16 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id,
|
||||
}
|
||||
|
||||
rocprofiler_pc_sampling_configuration_t
|
||||
extract_stochastic_config(rocprofiler_pc_sampling_config_array_t* configs)
|
||||
extract_stochastic_config(const rocprofiler_pc_sampling_configuration_t* configs,
|
||||
size_t num_configs)
|
||||
{
|
||||
// Iterate over an array of configurations and return the first one
|
||||
// with stochasting method.
|
||||
for(size_t i = 0; i < configs->size; i++)
|
||||
for(size_t i = 0; i < num_configs; i++)
|
||||
{
|
||||
if(configs->data[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
if(configs[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
return configs->data[i];
|
||||
return configs[i];
|
||||
}
|
||||
}
|
||||
printf("Improper use of the `extract_stochastic_config` function.");
|
||||
@@ -118,7 +119,7 @@ configure_stochastic_sampling(rocprofiler_context_id_t context_id,
|
||||
{
|
||||
// Find the configuration matching stochastic sampling in cycles
|
||||
rocprofiler_pc_sampling_configuration_t stochastic_config =
|
||||
extract_stochastic_config(&gpu_agent.pc_sampling_configs);
|
||||
extract_stochastic_config(gpu_agent.pc_sampling_configs, gpu_agent.num_pc_sampling_configs);
|
||||
|
||||
// The mean of min_interval and max_interval
|
||||
auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2;
|
||||
|
||||
@@ -172,14 +172,20 @@ typedef struct rocprofiler_agent_t
|
||||
///< dimension of a work-group.
|
||||
rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension
|
||||
///< of a grid.
|
||||
rocprofiler_agent_mem_bank_t* mem_banks;
|
||||
rocprofiler_agent_cache_t* caches;
|
||||
rocprofiler_agent_io_link_t* io_links;
|
||||
const rocprofiler_agent_mem_bank_t* mem_banks;
|
||||
const rocprofiler_agent_cache_t* caches;
|
||||
const rocprofiler_agent_io_link_t* io_links;
|
||||
const char* name; ///< Name of the agent. Will be identical to product name for CPU
|
||||
const char* vendor_name; ///< Vendor of agent (will be AMD)
|
||||
const char* product_name; ///< Marketing name
|
||||
const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc.
|
||||
rocprofiler_pc_sampling_config_array_t pc_sampling_configs;
|
||||
uint64_t num_pc_sampling_configs; ///< GPU only. Number of PC sampling modes available for this
|
||||
///< device type. Note: if another process is currently using
|
||||
///< PC sampling on this agent, this value will be zero so
|
||||
///< do not assume the number of PC sampling configurations
|
||||
///< based on the device type.
|
||||
const rocprofiler_pc_sampling_configuration_t*
|
||||
pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types.
|
||||
} rocprofiler_agent_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -326,15 +326,6 @@ typedef struct
|
||||
uint64_t handle;
|
||||
} rocprofiler_profile_config_id_t;
|
||||
|
||||
/**
|
||||
* @brief Array of PC Sampling Configurations
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_config_array_s
|
||||
{
|
||||
rocprofiler_pc_sampling_configuration_t* data;
|
||||
size_t size;
|
||||
} rocprofiler_pc_sampling_config_array_t;
|
||||
|
||||
/**
|
||||
* @brief Tracing record
|
||||
*
|
||||
|
||||
@@ -63,19 +63,6 @@ struct rocprofiler_pc_sampling_configuration_s
|
||||
uint64_t flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Query PC Sampling Configuration.
|
||||
*
|
||||
* @param [in] agent
|
||||
* @param [out] config
|
||||
* @param [out] config_count
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_configuration_t* config,
|
||||
size_t* config_count) ROCPROFILER_NONNULL(2, 3);
|
||||
|
||||
/** @} */
|
||||
|
||||
ROCPROFILER_EXTERN_C_FINI
|
||||
|
||||
@@ -211,15 +211,20 @@ template <typename MapT, typename Tp>
|
||||
void
|
||||
read_property(const MapT& data, const std::string& label, Tp& value)
|
||||
{
|
||||
using mutable_type = std::remove_const_t<Tp>;
|
||||
|
||||
if constexpr(std::is_enum<Tp>::value)
|
||||
{
|
||||
using value_type = std::underlying_type_t<Tp>;
|
||||
using value_type = std::underlying_type_t<mutable_type>;
|
||||
// never expect this to be true but it does guard against infinite recursion
|
||||
static_assert(!std::is_enum<value_type>::value, "Expected non-enum type");
|
||||
|
||||
auto value_v = static_cast<value_type>(value);
|
||||
read_property(data, label, value_v);
|
||||
value = static_cast<Tp>(value_v);
|
||||
if constexpr(std::is_const<Tp>::value)
|
||||
const_cast<mutable_type&>(value) = static_cast<mutable_type>(value_v);
|
||||
else
|
||||
value = static_cast<Tp>(value_v);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -258,7 +263,10 @@ read_property(const MapT& data, const std::string& label, Tp& value)
|
||||
max_value)};
|
||||
}
|
||||
|
||||
value = static_cast<Tp>(local_value);
|
||||
if constexpr(std::is_const<Tp>::value)
|
||||
const_cast<mutable_type&>(value) = static_cast<mutable_type>(local_value);
|
||||
else
|
||||
value = static_cast<Tp>(local_value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,7 +288,7 @@ read_topology()
|
||||
|
||||
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
|
||||
|
||||
auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
|
||||
static auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
|
||||
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
1UL,
|
||||
@@ -412,12 +420,16 @@ read_topology()
|
||||
drmClose(drm_fd);
|
||||
}
|
||||
|
||||
constexpr auto gfx90a_version = compute_version(9, 0, 10);
|
||||
|
||||
if(agent_info.gfx_target_version >= gfx90a_version)
|
||||
// TODO(jomadsen): make contingent on whether this process acquired the PC sampling
|
||||
// device lock
|
||||
{
|
||||
agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{
|
||||
mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()};
|
||||
constexpr auto gfx90a_version = compute_version(9, 0, 10);
|
||||
|
||||
if(agent_info.gfx_target_version >= gfx90a_version)
|
||||
{
|
||||
agent_info.pc_sampling_configs = mi200_pc_sampling_config.data();
|
||||
agent_info.num_pc_sampling_configs = mi200_pc_sampling_config.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
@@ -467,16 +479,10 @@ read_topology()
|
||||
|
||||
for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i)
|
||||
{
|
||||
using heap_type_t = HSA_HEAPTYPE;
|
||||
using underlying_heap_type_t = std::underlying_type_t<heap_type_t>;
|
||||
|
||||
auto subproperties =
|
||||
read_map(node_path / "mem_banks" / std::to_string(i) / "properties");
|
||||
|
||||
auto _heap_type = underlying_heap_type_t{};
|
||||
read_property(subproperties, "heap_type", _heap_type);
|
||||
agent_info.mem_banks[i].heap_type = static_cast<heap_type_t>(_heap_type);
|
||||
|
||||
read_property(subproperties, "heap_type", agent_info.mem_banks[i].heap_type);
|
||||
read_property(
|
||||
subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes);
|
||||
read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty);
|
||||
|
||||
@@ -198,22 +198,18 @@ hsa_api_impl<Idx>::functor(Args&&... args)
|
||||
if(itr->callback_tracer)
|
||||
{
|
||||
// if the given domain + op is not enabled, skip this context
|
||||
if(!itr->callback_tracer->domains(info_type::callback_domain_idx,
|
||||
info_type::operation_idx))
|
||||
continue;
|
||||
|
||||
callback_contexts.emplace_back(
|
||||
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
|
||||
if(itr->callback_tracer->domains(info_type::callback_domain_idx,
|
||||
info_type::operation_idx))
|
||||
callback_contexts.emplace_back(
|
||||
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
|
||||
}
|
||||
|
||||
if(itr->buffered_tracer)
|
||||
{
|
||||
// if the given domain + op is not enabled, skip this context
|
||||
if(!itr->buffered_tracer->domains(info_type::buffered_domain_idx,
|
||||
info_type::operation_idx))
|
||||
continue;
|
||||
|
||||
buffered_contexts.emplace_back(buffered_context_data{itr});
|
||||
if(itr->buffered_tracer->domains(info_type::buffered_domain_idx,
|
||||
info_type::operation_idx))
|
||||
buffered_contexts.emplace_back(buffered_context_data{itr});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,13 +48,4 @@ rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t conte
|
||||
consume_args(context_id, agent, method, unit, interval, buffer_id);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_configuration_t* config,
|
||||
size_t* config_count)
|
||||
{
|
||||
consume_args(agent, config, config_count);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,7 +97,8 @@ TEST(rocprofiler_lib, agent_abi)
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_pc_sampling_configs), 280) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 288) << msg;
|
||||
// Add test for offset of new field above this. Do NOT change any existing values!
|
||||
|
||||
// If a new field is added, increase this value by the size of the new field(s)
|
||||
|
||||
Ссылка в новой задаче
Block a user