Miscellaneous Updates (const-correctness, logic fixes, etc.) (#126)

* Update lib/rocprofiler/hsa/hsa.cpp

- fix logic for constructing callback_contexts and buffered_contexts arrays

* Update include/rocprofiler/{agent,fwd,pc_sampling}.h

- remove rocprofiler_pc_sampling_config_array_t due to const problems
- update rocprofiler_agent_t to use arrays to const data
- remove redundant rocprofiler_query_pc_sampling_agent_configurations
  - this implementation is quite literally looking up info in the agent struct that was passed

* Update lib/rocprofiler/pc_sampling.cpp

- remove rocprofiler_query_pc_sampling_agent_configurations

* update lib/rocprofiler/agent.cpp

- handle const fields
- make mi200_pc_sampling_config variable static

* Update lib/rocprofiler/tests/agent.cpp

- tweak to pc_sampling_configs offset

* Update samples/pc_sampling

- Update sample to reflect minor tweaks to pc_sampling_configs in rocprofiler_agent_t

* Update CI workflow

- remove 'if: ${{ always() }}'
  - I suspect this is why the jobs do not cancel in progress correctly
Этот коммит содержится в:
Jonathan R. Madsen
2023-10-17 00:39:41 -05:00
коммит произвёл GitHub
родитель a7a971a247
Коммит d1518c65b2
11 изменённых файлов: 59 добавлений и 83 удалений
-3
Просмотреть файл
@@ -58,7 +58,6 @@ jobs:
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
if: ${{ always() }}
needs: get_latest_mainline_build_number
steps:
@@ -147,7 +146,6 @@ jobs:
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
if: ${{ always() }}
needs: get_latest_mainline_build_number
steps:
@@ -247,7 +245,6 @@ jobs:
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
options: --privileged --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
if: ${{ always() }}
needs: get_latest_mainline_build_number
steps:
+2 -2
Просмотреть файл
@@ -54,7 +54,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents,
_out_agent->name,
_out_agent->id.handle,
_out_agent->type,
_out_agent->pc_sampling_configs.size);
_out_agent->num_pc_sampling_configs);
return ROCPROFILER_STATUS_SUCCESS;
}
else
@@ -64,7 +64,7 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents,
agents[i]->name,
agents[i]->id.handle,
agents[i]->type,
agents[i]->pc_sampling_configs.size);
agents[i]->num_pc_sampling_configs);
}
}
return ROCPROFILER_STATUS_ERROR;
+5 -5
Просмотреть файл
@@ -57,11 +57,11 @@ second_user()
// After failure, the second user queries available configuration and observes the one chosen by
// the first user.
size_t config_count = 10;
std::vector<rocprofiler_pc_sampling_configuration_t> configs(config_count);
ROCPROFILER_CALL(rocprofiler_query_pc_sampling_agent_configurations(
*gpu_agent, configs.data(), &config_count),
"The second user cannot query available configurations");
auto config_count = gpu_agent->num_pc_sampling_configs;
auto configs = std::vector<rocprofiler_pc_sampling_configuration_t>{};
configs.reserve(config_count);
for(size_t i = 0; i < config_count; ++i)
configs.emplace_back(gpu_agent->pc_sampling_configs[i]);
// Only one configuration should be listed, and its parameters should match the parameters set
// by the first user. Vladimir: Is it ok to use assertions? In the release mode, they might be
+11 -10
Просмотреть файл
@@ -34,7 +34,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
// Skip GPU agents not supporting PC sampling
// Vladimir: The assumption is that if a GPU agent does not support PC sampling,
// the size is 0.
if(agents[i]->pc_sampling_configs.size == 0) continue;
if(agents[i]->num_pc_sampling_configs == 0) continue;
_out_agents->push_back(*agents[i]);
@@ -43,7 +43,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
agents[i]->name,
agents[i]->id.handle,
agents[i]->type,
agents[i]->pc_sampling_configs.size);
agents[i]->num_pc_sampling_configs);
return ROCPROFILER_STATUS_SUCCESS;
}
else
@@ -53,7 +53,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
agents[i]->name,
agents[i]->id.handle,
agents[i]->type,
agents[i]->pc_sampling_configs.size);
agents[i]->num_pc_sampling_configs);
}
}
@@ -78,10 +78,10 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id,
rocprofiler_agent_t gpu_agent)
{
// Vladimir: Does MI200 have only one configuration?
assert(gpu_agent.pc_sampling_configs.size == 1);
assert(gpu_agent.num_pc_sampling_configs == 1);
// Extract the configuration
auto host_trap_config = gpu_agent.pc_sampling_configs.data[0];
auto host_trap_config = gpu_agent.pc_sampling_configs[0];
// The mean of min_interval and max_interval
auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2;
@@ -96,15 +96,16 @@ configure_host_trap_sampling(rocprofiler_context_id_t context_id,
}
rocprofiler_pc_sampling_configuration_t
extract_stochastic_config(rocprofiler_pc_sampling_config_array_t* configs)
extract_stochastic_config(const rocprofiler_pc_sampling_configuration_t* configs,
size_t num_configs)
{
// Iterate over an array of configurations and return the first one
// with stochasting method.
for(size_t i = 0; i < configs->size; i++)
for(size_t i = 0; i < num_configs; i++)
{
if(configs->data[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
if(configs[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
{
return configs->data[i];
return configs[i];
}
}
printf("Improper use of the `extract_stochastic_config` function.");
@@ -118,7 +119,7 @@ configure_stochastic_sampling(rocprofiler_context_id_t context_id,
{
// Find the configuration matching stochastic sampling in cycles
rocprofiler_pc_sampling_configuration_t stochastic_config =
extract_stochastic_config(&gpu_agent.pc_sampling_configs);
extract_stochastic_config(gpu_agent.pc_sampling_configs, gpu_agent.num_pc_sampling_configs);
// The mean of min_interval and max_interval
auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2;
+10 -4
Просмотреть файл
@@ -172,14 +172,20 @@ typedef struct rocprofiler_agent_t
///< dimension of a work-group.
rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension
///< of a grid.
rocprofiler_agent_mem_bank_t* mem_banks;
rocprofiler_agent_cache_t* caches;
rocprofiler_agent_io_link_t* io_links;
const rocprofiler_agent_mem_bank_t* mem_banks;
const rocprofiler_agent_cache_t* caches;
const rocprofiler_agent_io_link_t* io_links;
const char* name; ///< Name of the agent. Will be identical to product name for CPU
const char* vendor_name; ///< Vendor of agent (will be AMD)
const char* product_name; ///< Marketing name
const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc.
rocprofiler_pc_sampling_config_array_t pc_sampling_configs;
uint64_t num_pc_sampling_configs; ///< GPU only. Number of PC sampling modes available for this
///< device type. Note: if another process is currently using
///< PC sampling on this agent, this value will be zero so
///< do not assume the number of PC sampling configurations
///< based on the device type.
const rocprofiler_pc_sampling_configuration_t*
pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types.
} rocprofiler_agent_t;
/**
-9
Просмотреть файл
@@ -326,15 +326,6 @@ typedef struct
uint64_t handle;
} rocprofiler_profile_config_id_t;
/**
* @brief Array of PC Sampling Configurations
*/
typedef struct rocprofiler_pc_sampling_config_array_s
{
rocprofiler_pc_sampling_configuration_t* data;
size_t size;
} rocprofiler_pc_sampling_config_array_t;
/**
* @brief Tracing record
*
-13
Просмотреть файл
@@ -63,19 +63,6 @@ struct rocprofiler_pc_sampling_configuration_s
uint64_t flags;
};
/**
* @brief Query PC Sampling Configuration.
*
* @param [in] agent
* @param [out] config
* @param [out] config_count
* @return ::rocprofiler_status_t
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
rocprofiler_pc_sampling_configuration_t* config,
size_t* config_count) ROCPROFILER_NONNULL(2, 3);
/** @} */
ROCPROFILER_EXTERN_C_FINI
+22 -16
Просмотреть файл
@@ -211,15 +211,20 @@ template <typename MapT, typename Tp>
void
read_property(const MapT& data, const std::string& label, Tp& value)
{
using mutable_type = std::remove_const_t<Tp>;
if constexpr(std::is_enum<Tp>::value)
{
using value_type = std::underlying_type_t<Tp>;
using value_type = std::underlying_type_t<mutable_type>;
// never expect this to be true but it does guard against infinite recursion
static_assert(!std::is_enum<value_type>::value, "Expected non-enum type");
auto value_v = static_cast<value_type>(value);
read_property(data, label, value_v);
value = static_cast<Tp>(value_v);
if constexpr(std::is_const<Tp>::value)
const_cast<mutable_type&>(value) = static_cast<mutable_type>(value_v);
else
value = static_cast<Tp>(value_v);
}
else
{
@@ -258,7 +263,10 @@ read_property(const MapT& data, const std::string& label, Tp& value)
max_value)};
}
value = static_cast<Tp>(local_value);
if constexpr(std::is_const<Tp>::value)
const_cast<mutable_type&>(value) = static_cast<mutable_type>(local_value);
else
value = static_cast<Tp>(local_value);
}
}
@@ -280,7 +288,7 @@ read_topology()
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
static auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
1UL,
@@ -412,12 +420,16 @@ read_topology()
drmClose(drm_fd);
}
constexpr auto gfx90a_version = compute_version(9, 0, 10);
if(agent_info.gfx_target_version >= gfx90a_version)
// TODO(jomadsen): make contingent on whether this process acquired the PC sampling
// device lock
{
agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{
mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()};
constexpr auto gfx90a_version = compute_version(9, 0, 10);
if(agent_info.gfx_target_version >= gfx90a_version)
{
agent_info.pc_sampling_configs = mi200_pc_sampling_config.data();
agent_info.num_pc_sampling_configs = mi200_pc_sampling_config.size();
}
}
}
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
@@ -467,16 +479,10 @@ read_topology()
for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i)
{
using heap_type_t = HSA_HEAPTYPE;
using underlying_heap_type_t = std::underlying_type_t<heap_type_t>;
auto subproperties =
read_map(node_path / "mem_banks" / std::to_string(i) / "properties");
auto _heap_type = underlying_heap_type_t{};
read_property(subproperties, "heap_type", _heap_type);
agent_info.mem_banks[i].heap_type = static_cast<heap_type_t>(_heap_type);
read_property(subproperties, "heap_type", agent_info.mem_banks[i].heap_type);
read_property(
subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes);
read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty);
+7 -11
Просмотреть файл
@@ -198,22 +198,18 @@ hsa_api_impl<Idx>::functor(Args&&... args)
if(itr->callback_tracer)
{
// if the given domain + op is not enabled, skip this context
if(!itr->callback_tracer->domains(info_type::callback_domain_idx,
info_type::operation_idx))
continue;
callback_contexts.emplace_back(
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
if(itr->callback_tracer->domains(info_type::callback_domain_idx,
info_type::operation_idx))
callback_contexts.emplace_back(
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
}
if(itr->buffered_tracer)
{
// if the given domain + op is not enabled, skip this context
if(!itr->buffered_tracer->domains(info_type::buffered_domain_idx,
info_type::operation_idx))
continue;
buffered_contexts.emplace_back(buffered_context_data{itr});
if(itr->buffered_tracer->domains(info_type::buffered_domain_idx,
info_type::operation_idx))
buffered_contexts.emplace_back(buffered_context_data{itr});
}
}
-9
Просмотреть файл
@@ -48,13 +48,4 @@ rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t conte
consume_args(context_id, agent, method, unit, interval, buffer_id);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
rocprofiler_status_t
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
rocprofiler_pc_sampling_configuration_t* config,
size_t* config_count)
{
consume_args(agent, config, config_count);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
}
+2 -1
Просмотреть файл
@@ -97,7 +97,8 @@ TEST(rocprofiler_lib, agent_abi)
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_pc_sampling_configs), 280) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 288) << msg;
// Add test for offset of new field above this. Do NOT change any existing values!
// If a new field is added, increase this value by the size of the new field(s)