Accumulation metrics support and update counter collection API to aqlprofile_v2 (#915)

* Updating to v3 API

* General fixes

* Extending dimension bits to 54

* Disabling agent profiling tests

* Fixed unit test

* Adding accumulate metric support for parsing counters (#609)

* Adding accumulate metric support for parsing counters

* Adding metric flag

* Updating tests

* source formatting (clang-format v11) (#610)

Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com>

* source formatting (clang-format v11) (#614)

Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com>

* Adding evaluate ast test

* source formatting (clang-format v11) (#633)

Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com>

* Update scanner generated file

* Adding flags to events for aqlprofile

* Fix Mi200 failing test

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com>
Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com>

* Revert "Extending dimension bits to 54"

This reverts commit 3cd6628452484044a93e129f27974f996a0e4c08.

* Removing CU dimension

* Fixing merge conflicts

* Revert "Disabling agent profiling tests"

This reverts commit 7e01518ed8c51fbb0c3b2575e1e0b8f9ddfa8237.

* Fixing merge conflicts

* Fix parser tests

* Adding accumulate metric documentation

* Update counter_collection_services.md

* Update index.md

* fix nested expression use

* Update source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Doc update

---------

Co-authored-by: Benjamin Welton <ben@amd.com>
Co-authored-by: Manjunath P Jakaraddi <manjunath180397@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Manjunath-Jakaraddi <21177428+Manjunath-Jakaraddi@users.noreply.github.com>
Co-authored-by: jrmadsen <6001865+jrmadsen@users.noreply.github.com>
Co-authored-by: Manjunath-Jakaraddi <manjunath.jakaraddi@amd.com>

[ROCm/rocprofiler-sdk commit: a78753d392]
This commit is contained in:
Giovanni Lenzi Baraldi
2024-07-01 21:56:41 -03:00
gecommit door GitHub
bovenliggende 55a6d2fd1b
commit ebad2abe3c
31 gewijzigde bestanden met toevoegingen van 779 en 472 verwijderingen
@@ -15,6 +15,7 @@ subtrees:
- file: buffered_services
- file: pc_sampling
- file: intercept_table
- file: counter_collection_services
- file: _doxygen/html/index
- file: samples
- file: rocprofv3
@@ -0,0 +1,14 @@
# Derived Metrics
## Accumulate metric
### Expression
expr=accumulate(<basic_level_counter>, <resolution>)
### Description
- The accumulate metric is used to sum the values of a basic level counter over a specified number of cycles. By setting the resolution parameter, you can control the frequency of the summing operation:
- HIGH_RES: Sums up the basic counter every clock cycle. Captures the value every single cycle for higher accuracy, suitable for fine-grained analysis.
- LOW_RES: Sums up the basic counter every four clock cycles. Reduces the data points and provides less detailed summing, useful for reducing data volume.
- NONE: Does nothing and is equivalent to collecting basic_level_counter. Outputs the value of the basic counter without any summing operation.
### Usage (derived_counters.xml)
<metric name="MeanOccupancyPerCU" expr=accumulate(SQ_LEVEL_WAVES,HIGH_RES)/reduce(GRBM_GUI_ACTIVE,max)/CU_NUM descr="Mean occupancy per compute unit."></metric>
- MeanOccupancyPerCU: This metric calculates the mean occupancy per compute unit. It uses the accumulate function with HIGH_RES to sum the SQ_LEVEL_WAVES counter at every clock cycle. This sum is then divided by GRBM_GUI_ACTIVE and the number of compute units (CU_NUM) to derive the mean occupancy.
@@ -795,6 +795,8 @@ construct_agent_cache(::HsaApiTable* table)
"{}",
fmt::join(rocp_hsa_agent_node_ids.begin(), rocp_hsa_agent_node_ids.end(), ", "));
get_agent_caches().clear();
get_agent_mapping().clear();
get_agent_mapping().reserve(get_agent_mapping().size() + rocp_agents.size());
auto hsa_agent_node_map = std::unordered_map<uint32_t, hsa_agent_t>{};
@@ -66,9 +66,9 @@ get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& e
rocprofiler_status_t
set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id)
aqlprofile_agent_handle_t agent,
aqlprofile_pmc_event_t event,
size_t sample_id)
{
auto callback =
[](int, int sid, int, int coordinate, const char*, void* userdata) -> hsa_status_t {
@@ -82,8 +82,8 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
return HSA_STATUS_SUCCESS;
};
if(hsa_ven_amd_aqlprofile_iterate_event_coord(
agent, event, sample_id, callback, static_cast<void*>(&id)) != HSA_STATUS_SUCCESS)
if(aqlprofile_iterate_event_coord(agent, event, sample_id, callback, static_cast<void*>(&id)) !=
HSA_STATUS_SUCCESS)
{
return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD;
}
@@ -57,9 +57,9 @@ get_dim_info(rocprofiler_agent_id_t agent,
// Set dimension ids into id for sample
rocprofiler_status_t
set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id);
aqlprofile_agent_handle_t agent,
aqlprofile_pmc_event_t event,
size_t sample_id);
rocprofiler_status_t
set_profiler_active_on_queue(const AmdExtTable& api,
@@ -66,14 +66,15 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t
for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index)
{
_metrics.back().instances.push_back(
{static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id),
block_index,
event_id});
{.block_index = block_index,
.event_id = event_id,
.flags = aqlprofile_pmc_event_flags_t{x.flags()},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id)});
_metrics.back().events.push_back(
{.block_index = block_index,
.event_id = event_id,
.flags = aqlprofile_pmc_event_flags_t{0},
.flags = aqlprofile_pmc_event_flags_t{x.flags()},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id)});
bool validate_event_result;
@@ -86,114 +87,45 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t
&validate_event_result) != HSA_STATUS_SUCCESS);
ROCP_FATAL_IF(!validate_event_result)
<< "Invalid Metric: " << block_index << " " << event_id;
_event_to_metric[std::make_tuple(
static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id),
block_index,
event_id)] = x;
_event_to_metric[_metrics.back().events.back()] = x;
}
}
_events = get_all_events();
}
std::unique_ptr<hsa::CounterAQLPacket>
CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
CounterPacketConstruct::construct_packet(const CoreApiTable& coreapi, const AmdExtTable& ext)
{
auto pkt_ptr = std::make_unique<hsa::CounterAQLPacket>(ext.hsa_amd_memory_pool_free_fn);
auto& pkt = *pkt_ptr;
if(_events.empty())
{
ROCP_TRACE << "No events for pkt";
return pkt_ptr;
}
pkt.empty = false;
const auto* agent_cache =
const auto* agent =
rocprofiler::agent::get_agent_cache(CHECK_NOTNULL(rocprofiler::agent::get_agent(_agent)));
if(!agent_cache)
{
ROCP_FATAL << "No agent cache for agent id: " << _agent.handle;
}
pkt.profile = hsa_ven_amd_aqlprofile_profile_t{
agent_cache->get_hsa_agent(),
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM?
_events.data(),
static_cast<uint32_t>(_events.size()),
nullptr,
0u,
hsa_ven_amd_aqlprofile_descriptor_t{.ptr = nullptr, .size = 0},
hsa_ven_amd_aqlprofile_descriptor_t{.ptr = nullptr, .size = 0}};
auto& profile = pkt.profile;
if(!agent) ROCP_FATAL << "No agent cache for agent id: " << _agent.handle;
hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
ext.hsa_amd_agent_memory_pool_get_info_fn(agent_cache->get_hsa_agent(),
agent_cache->kernarg_pool(),
ext.hsa_amd_agent_memory_pool_get_info_fn(agent->get_hsa_agent(),
agent->kernarg_pool(),
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
static_cast<void*>(&_access));
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
// sharing.
if(_access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)
{
throw std::runtime_error(
fmt::format("Agent {} does not allow memory pool access for counter collection",
agent_cache->get_hsa_agent().handle));
}
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, nullptr), "could not generate packet sizes");
hsa::CounterAQLPacket::CounterMemoryPool pool;
if(profile.command_buffer.size == 0 || profile.output_buffer.size == 0)
{
throw std::runtime_error(
fmt::format("No command or output buffer size set. CMD_BUF={} PROFILE_BUF={}",
profile.command_buffer.size,
profile.output_buffer.size));
}
if(_access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) pool.bIgnoreKernArg = true;
// Allocate buffers and check the results
auto alloc_and_check = [&](auto& pool, auto** mem_loc, auto size) -> bool {
bool malloced = false;
size_t page_aligned = getPageAligned(size);
if(ext.hsa_amd_memory_pool_allocate_fn(
pool, page_aligned, 0, static_cast<void**>(mem_loc)) != HSA_STATUS_SUCCESS)
{
*mem_loc = malloc(page_aligned);
malloced = true;
}
else
{
CHECK(*mem_loc);
hsa_agent_t agent = agent_cache->get_hsa_agent();
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
// sharing.
LOG_IF(FATAL,
ext.hsa_amd_agents_allow_access_fn(1, &agent, nullptr, *mem_loc) !=
HSA_STATUS_SUCCESS)
<< "Error: Allowing access to Command Buffer";
}
return malloced;
};
pool.allocate_fn = ext.hsa_amd_memory_pool_allocate_fn;
pool.allow_access_fn = ext.hsa_amd_agents_allow_access_fn;
pool.free_fn = ext.hsa_amd_memory_pool_free_fn;
pool.api_copy_fn = coreapi.hsa_memory_copy_fn;
pool.fill_fn = ext.hsa_amd_memory_fill_fn;
// Build command and output buffers
pkt.command_buf_mallocd = alloc_and_check(
agent_cache->cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size);
pkt.output_buffer_malloced = alloc_and_check(
agent_cache->kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size);
memset(profile.output_buffer.ptr, 0x0, profile.output_buffer.size);
pool.gpu_agent = agent->get_hsa_agent();
pool.cpu_pool_ = agent->cpu_pool();
pool.kernarg_pool_ = agent->kernarg_pool();
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, &pkt.start), "failed to create start packet");
CHECK_HSA(hsa_ven_amd_aqlprofile_stop(&profile, &pkt.stop), "failed to create stop packet");
CHECK_HSA(hsa_ven_amd_aqlprofile_read(&profile, &pkt.read), "failed to create read packet");
pkt.start.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
pkt.stop.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
pkt.read.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
ROCP_TRACE << fmt::format("Following Packets Generated (output_buffer={}, output_size={}). "
"Start Pkt: {}, Read Pkt: {}, Stop Pkt: {}",
profile.output_buffer.ptr,
profile.output_buffer.size,
pkt.start,
pkt.read,
pkt.stop);
return pkt_ptr;
const auto* aql_agent = rocprofiler::agent::get_aql_agent(agent->get_rocp_agent()->id);
if(aql_agent == nullptr) throw std::runtime_error("Could not get AQL agent!");
if(_events.empty()) ROCP_TRACE << "No events for pkt";
return std::make_unique<hsa::CounterAQLPacket>(*aql_agent, pool, _events);
}
ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache& agent,
@@ -255,10 +187,10 @@ ThreadTraceAQLPacketFactory::construct_unload_marker_packet(uint64_t id)
return std::make_unique<hsa::CodeobjMarkerAQLPacket>(tracepool, id, 0, 0, false, true);
}
std::vector<hsa_ven_amd_aqlprofile_event_t>
std::vector<aqlprofile_pmc_event_t>
CounterPacketConstruct::get_all_events() const
{
std::vector<hsa_ven_amd_aqlprofile_event_t> ret;
std::vector<aqlprofile_pmc_event_t> ret;
for(const auto& metric : _metrics)
{
ret.insert(ret.end(), metric.instances.begin(), metric.instances.end());
@@ -267,11 +199,9 @@ CounterPacketConstruct::get_all_events() const
}
const counters::Metric*
CounterPacketConstruct::event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const
CounterPacketConstruct::event_to_metric(const aqlprofile_pmc_event_t& event) const
{
if(const auto* ptr = rocprofiler::common::get_val(
_event_to_metric,
std::make_tuple(event.block_name, event.block_index, event.counter_id)))
if(const auto* ptr = rocprofiler::common::get_val(_event_to_metric, event))
{
return ptr;
}
@@ -38,6 +38,24 @@
#include "lib/rocprofiler-sdk/thread_trace/att_core.hpp"
#include "rocprofiler-sdk/fwd.h"
inline bool
operator==(aqlprofile_pmc_event_t lhs, aqlprofile_pmc_event_t rhs)
{
if(lhs.block_name != rhs.block_name) return false;
if(lhs.block_index != rhs.block_index) return false;
if(lhs.event_id != rhs.event_id) return false;
return lhs.flags.raw == rhs.flags.raw;
}
inline bool
operator<(aqlprofile_pmc_event_t lhs, aqlprofile_pmc_event_t rhs)
{
if(lhs.block_name != rhs.block_name) return lhs.block_name < rhs.block_name;
if(lhs.block_index != rhs.block_index) return lhs.block_index < rhs.block_index;
if(lhs.event_id != rhs.event_id) return lhs.event_id < rhs.event_id;
return lhs.flags.raw < rhs.flags.raw;
}
namespace rocprofiler
{
namespace aql
@@ -55,11 +73,12 @@ class CounterPacketConstruct
public:
CounterPacketConstruct(rocprofiler_agent_id_t agent,
const std::vector<counters::Metric>& metrics);
std::unique_ptr<hsa::CounterAQLPacket> construct_packet(const AmdExtTable&);
std::unique_ptr<hsa::CounterAQLPacket> construct_packet(const CoreApiTable&,
const AmdExtTable&);
const counters::Metric* event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const;
std::vector<hsa_ven_amd_aqlprofile_event_t> get_all_events() const;
const std::vector<aqlprofile_pmc_event_t>& get_counter_events(const counters::Metric&) const;
const counters::Metric* event_to_metric(const aqlprofile_pmc_event_t& event) const;
std::vector<aqlprofile_pmc_event_t> get_all_events() const;
const std::vector<aqlprofile_pmc_event_t>& get_counter_events(const counters::Metric&) const;
rocprofiler_agent_id_t agent() const { return _agent; }
@@ -73,16 +92,15 @@ private:
protected:
struct AQLProfileMetric
{
counters::Metric metric;
std::vector<hsa_ven_amd_aqlprofile_event_t> instances;
std::vector<aqlprofile_pmc_event_t> events;
counters::Metric metric;
std::vector<aqlprofile_pmc_event_t> instances;
std::vector<aqlprofile_pmc_event_t> events;
};
rocprofiler_agent_id_t _agent;
std::vector<AQLProfileMetric> _metrics;
std::vector<hsa_ven_amd_aqlprofile_event_t> _events;
std::map<std::tuple<hsa_ven_amd_aqlprofile_block_name_t, uint32_t, uint32_t>, counters::Metric>
_event_to_metric;
rocprofiler_agent_id_t _agent;
std::vector<AQLProfileMetric> _metrics;
std::vector<aqlprofile_pmc_event_t> _events;
std::map<aqlprofile_pmc_event_t, counters::Metric> _event_to_metric;
};
class ThreadTraceAQLPacketFactory
@@ -39,6 +39,38 @@ using namespace rocprofiler::counters::test_constants;
namespace rocprofiler
{
AmdExtTable&
get_ext_table()
{
static auto _v = []() {
auto val = AmdExtTable{};
val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info;
val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools;
val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate;
val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free;
val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info;
val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access;
val.hsa_amd_memory_fill_fn = hsa_amd_memory_fill;
return val;
}();
return _v;
}
CoreApiTable&
get_api_table()
{
static auto _v = []() {
auto val = CoreApiTable{};
val.hsa_iterate_agents_fn = hsa_iterate_agents;
val.hsa_agent_get_info_fn = hsa_agent_get_info;
val.hsa_queue_create_fn = hsa_queue_create;
val.hsa_queue_destroy_fn = hsa_queue_destroy;
val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed;
return val;
}();
return _v;
}
auto
findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set<std::string>& metrics)
{
@@ -122,7 +154,9 @@ TEST(aql_profile, packet_generation_single)
{
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
auto test_pkt = pkt.construct_packet(get_ext_table());
auto test_pkt =
pkt.construct_packet(rocprofiler::get_api_table(), rocprofiler::get_ext_table());
EXPECT_TRUE(test_pkt);
}
@@ -141,13 +175,15 @@ TEST(aql_profile, packet_generation_multi)
auto metrics =
rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES", "TA_FLAT_READ_WAVEFRONTS"});
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
auto test_pkt = pkt.construct_packet(get_ext_table());
auto test_pkt =
pkt.construct_packet(rocprofiler::get_api_table(), rocprofiler::get_ext_table());
EXPECT_TRUE(test_pkt);
}
hsa_shut_down();
}
/*
class TestAqlPacket : public rocprofiler::hsa::CounterAQLPacket
{
public:
@@ -183,3 +219,4 @@ TEST(aql_profile, test_aql_packet)
// Why is this valid?
TestAqlPacket test_pkt2(false);
}
*/
@@ -100,12 +100,14 @@ construct_aql_pkt(std::shared_ptr<profile_config>& profile)
}
auto pkts = profile->pkt_generator->construct_packet(
CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table(),
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table());
pkts->start.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->start.completion_signal.handle = 0;
pkts->stop.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->read.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->packets.start_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->packets.stop_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->packets.read_packet.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC);
pkts->packets.start_packet.completion_signal.handle = 0;
return pkts;
}
@@ -303,8 +305,9 @@ read_agent_ctx(const context::context* ctx,
agent->get_rocp_agent()->simd_arrays_per_engine);
// Submit the read packet to the queue
submitPacket(
callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->read);
submitPacket(callback_data.table,
agent->profile_queue(),
(void*) &callback_data.packet->packets.read_packet);
// Submit a barrier packet. This is needed to flush hardware caches. Without this
// the read packet may not have the correct data.
@@ -452,10 +455,11 @@ start_agent_ctx(const context::context* ctx)
continue;
}
callback_data.packet->start.completion_signal = callback_data.start_signal;
callback_data.packet->packets.start_packet.completion_signal = callback_data.start_signal;
callback_data.table.hsa_signal_store_relaxed_fn(callback_data.start_signal, 1);
submitPacket(
callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->start);
submitPacket(callback_data.table,
agent->profile_queue(),
(void*) &callback_data.packet->packets.start_packet);
// Wait for startup to finish before continuing
callback_data.table.hsa_signal_wait_relaxed_fn(callback_data.start_signal,
@@ -511,8 +515,9 @@ stop_agent_ctx(const context::context* ctx)
if(!callback_data.profile->reqired_hw_counters.empty())
{
// Remove when AQL is updated to not require stop to be called first
submitPacket(
callback_data.table, agent->profile_queue(), (void*) &callback_data.packet->stop);
submitPacket(callback_data.table,
agent->profile_queue(),
(void*) &callback_data.packet->packets.stop_packet);
}
// Wait for the stop packet to complete
@@ -129,11 +129,11 @@ counter_callback_info::get_packet(std::unique_ptr<rocprofiler::hsa::AQLPacket>&
{
// If we do not have a packet in the cache, create one.
ret_pkt = profile->pkt_generator->construct_packet(
CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table(),
CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table());
}
ret_pkt->before_krn_pkt.clear();
ret_pkt->after_krn_pkt.clear();
ret_pkt->clear();
packet_return_map.wlock([&](auto& data) { data.emplace(ret_pkt.get(), profile); });
return ROCPROFILER_STATUS_SUCCESS;
@@ -72,7 +72,7 @@ queue_cb(const context::context* ctx,
// Packet generated when no instrumentation is performed. May contain serialization
// packets/barrier packets (and can be empty).
auto no_instrumentation = [&]() {
auto ret_pkt = std::make_unique<rocprofiler::hsa::CounterAQLPacket>(nullptr);
auto ret_pkt = std::make_unique<rocprofiler::hsa::EmptyAQLPacket>();
// If we have a counter collection context but it is not enabled, we still might need
// to add barrier packets to transition from serialized -> unserialized execution. This
// transition is coordinated by the serializer.
@@ -147,13 +147,10 @@ queue_cb(const context::context* ctx,
return ret_pkt;
}
ret_pkt->before_krn_pkt.push_back(ret_pkt->start);
ret_pkt->after_krn_pkt.push_back(ret_pkt->read);
ret_pkt->after_krn_pkt.push_back(ret_pkt->stop);
ret_pkt->populate_before();
ret_pkt->populate_after();
for(auto& aql_pkt : ret_pkt->after_krn_pkt)
{
aql_pkt.completion_signal.handle = 0;
}
return ret_pkt;
}
@@ -207,11 +207,15 @@ EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id,
, _reduce_dimension_set(ast.reduce_dimension_set)
, _out_id(out_id)
{
if(_type == NodeType::REFERENCE_NODE)
if(_type == NodeType::REFERENCE_NODE || _type == NodeType::ACCUMULATE_NODE)
{
try
{
_metric = metrics.at(std::get<std::string>(ast.value));
if(_type == NodeType::ACCUMULATE_NODE)
{
_metric.setflags(static_cast<int>(ast.accumulate_op));
}
} catch(std::exception& e)
{
throw std::runtime_error(
@@ -277,6 +281,7 @@ EvaluateAST::set_dimensions()
_dimension_types = first.size() > second.size() ? first : second;
}
break;
case ACCUMULATE_NODE:
case REFERENCE_NODE:
{
_dimension_types = get_dim_types(_metric);
@@ -377,6 +382,11 @@ EvaluateAST::validate_raw_ast(const std::unordered_map<std::string, Metric>& met
// Dimensionindex values should be within limits for this metric and GPU.
}
break;
case ACCUMULATE_NODE:
{
// Future todo only to be applied on sq metric
}
break;
}
} catch(std::exception& e)
{
@@ -466,39 +476,36 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket
{
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>* data;
const aql::CounterPacketConstruct* pkt_gen;
hsa_agent_t agent;
aqlprofile_agent_handle_t agent;
};
auto agent = CHECK_NOTNULL(rocprofiler::agent::get_agent_cache(
CHECK_NOTNULL(rocprofiler::agent::get_agent(pkt_gen->agent()))))
->get_hsa_agent();
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(pkt_gen->agent()));
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> ret;
if(pkt.empty) return ret;
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = agent};
;
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(
&pkt.profile,
[](hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* data) {
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = aql_agent};
hsa_status_t status = aqlprofile_pmc_iterate_data(
pkt.handle,
[](aqlprofile_pmc_event_t event, uint64_t counter_id, uint64_t counter_value, void* data) {
CHECK(data);
auto& it = *static_cast<it_data*>(data);
if(info_type != HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) return HSA_STATUS_SUCCESS;
const auto* metric = it.pkt_gen->event_to_metric(info_data->pmc_data.event);
auto& it = *static_cast<it_data*>(data);
const auto* metric = it.pkt_gen->event_to_metric(event);
if(!metric) return HSA_STATUS_SUCCESS;
auto& vec = it.data->emplace(metric->id(), std::vector<rocprofiler_record_counter_t>{})
.first->second;
auto& next_rec = vec.emplace_back();
set_counter_in_rec(next_rec.id, {.handle = metric->id()});
// Actual dimension info needs to be used here in the future
auto aql_status = aql::set_dim_id_from_sample(
next_rec.id, it.agent, info_data->pmc_data.event, info_data->sample_id);
auto aql_status = aql::set_dim_id_from_sample(next_rec.id, it.agent, event, counter_id);
CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS)
<< rocprofiler_get_status_string(aql_status);
// set_dim_in_rec(next_rec.id, ROCPROFILER_DIMENSION_NONE, vec.size() - 1);
// Note: in the near future we need to use hw_counter here instead
next_rec.counter_value = info_data->pmc_data.result;
next_rec.counter_value = counter_value;
return HSA_STATUS_SUCCESS;
},
&aql_data);
@@ -522,6 +529,7 @@ EvaluateAST::expand_derived(std::unordered_map<std::string, EvaluateAST>& asts)
_expanded = true;
for(auto& child : _children)
{
if(child._type == NodeType::ACCUMULATE_NODE) continue;
if(auto* ptr = rocprofiler::common::get_val(asts, child.metric().name()))
{
ptr->expand_derived(asts);
@@ -629,6 +637,8 @@ EvaluateAST::evaluate(
.dispatch_id = a.dispatch_id,
.user_data = {.value = 0}};
});
case ACCUMULATE_NODE:
// todo update how to read the hybrid metric
case REFERENCE_NODE:
{
auto* result = rocprofiler::common::get_val(results_map, _metric.id());
@@ -48,8 +48,7 @@ enum DimensionTypes
DIMENSION_SHADER_ENGINE = 1 << 2,
DIMENSION_AGENT = 1 << 3,
DIMENSION_PMC_CHANNEL = 1 << 4,
DIMENSION_CU = 1 << 5,
DIMENSION_LAST = 1 << 6,
DIMENSION_LAST = 1 << 5,
};
enum ReduceOperation
@@ -43,7 +43,7 @@ dimension_map()
{ROCPROFILER_DIMENSION_SHADER_ENGINE, std::string_view("DIMENSION_SHADER_ENGINE")},
{ROCPROFILER_DIMENSION_AGENT, std::string_view("DIMENSION_AGENT")},
{ROCPROFILER_DIMENSION_SHADER_ARRAY, std::string_view("DIMENSION_SHADER_ARRAY")},
{ROCPROFILER_DIMENSION_CU, std::string_view("DIMENSION_CU")},
{ROCPROFILER_DIMENSION_WGP, std::string_view("DIMENSION_WGP")},
{ROCPROFILER_DIMENSION_INSTANCE, std::string_view("DIMENSION_INSTANCE")},
});
return *_v;
@@ -67,7 +67,7 @@ aqlprofile_id_to_rocprof_instance()
{"AID", ROCPROFILER_DIMENSION_AID},
{"SE", ROCPROFILER_DIMENSION_SHADER_ENGINE},
{"SA", ROCPROFILER_DIMENSION_SHADER_ARRAY},
{"CU", ROCPROFILER_DIMENSION_CU},
{"WGP", ROCPROFILER_DIMENSION_WGP},
{"INSTANCE", ROCPROFILER_DIMENSION_INSTANCE},
};
@@ -45,7 +45,7 @@ enum rocprofiler_profile_counter_instance_types
ROCPROFILER_DIMENSION_SHADER_ENGINE, ///< SE dimension of result
ROCPROFILER_DIMENSION_AGENT, ///< Agent dimension
ROCPROFILER_DIMENSION_SHADER_ARRAY, ///< Number of shader arrays
ROCPROFILER_DIMENSION_CU, ///< Number of compute units
ROCPROFILER_DIMENSION_WGP, ///< Number of workgroup processors
ROCPROFILER_DIMENSION_INSTANCE, ///< Number of instances
ROCPROFILER_DIMENSION_LAST
};
@@ -272,7 +272,7 @@ checkValidMetric(const std::string& agent, const Metric& metric)
bool
operator<(Metric const& lhs, Metric const& rhs)
{
return lhs.id() < rhs.id();
return std::tie(lhs.id_, lhs.flags_) < std::tie(rhs.id_, rhs.flags_);
}
bool
@@ -286,7 +286,8 @@ operator==(Metric const& lhs, Metric const& rhs)
x.expression_,
x.special_,
x.id_,
x.empty_);
x.empty_,
x.flags_);
};
return get_tie(lhs) == get_tie(rhs);
}
@@ -64,8 +64,11 @@ public:
const std::string& expression() const { return expression_; }
const std::string& special() const { return special_; }
uint64_t id() const { return id_; }
uint32_t flags() const { return flags_; }
bool empty() const { return empty_; }
void setflags(uint32_t flags) { this->flags_ = flags; }
friend bool operator<(Metric const& lhs, Metric const& rhs);
friend bool operator==(Metric const& lhs, Metric const& rhs);
@@ -78,6 +81,7 @@ private:
std::string special_ = {};
int64_t id_ = -1;
bool empty_ = false;
uint32_t flags_ = 0;
};
using MetricMap = std::unordered_map<std::string, std::vector<Metric>>;
@@ -133,13 +133,14 @@ enum yysymbol_kind_t
YYSYMBOL_NAME = 20, /* NAME */
YYSYMBOL_REDUCE = 21, /* REDUCE */
YYSYMBOL_SELECT = 22, /* SELECT */
YYSYMBOL_LOWER_THAN_ELSE = 23, /* LOWER_THAN_ELSE */
YYSYMBOL_ELSE = 24, /* ELSE */
YYSYMBOL_YYACCEPT = 25, /* $accept */
YYSYMBOL_top = 26, /* top */
YYSYMBOL_exp = 27, /* exp */
YYSYMBOL_reduce_dim_args = 28, /* reduce_dim_args */
YYSYMBOL_select_dim_args = 29 /* select_dim_args */
YYSYMBOL_ACCUMULATE = 23, /* ACCUMULATE */
YYSYMBOL_LOWER_THAN_ELSE = 24, /* LOWER_THAN_ELSE */
YYSYMBOL_ELSE = 25, /* ELSE */
YYSYMBOL_YYACCEPT = 26, /* $accept */
YYSYMBOL_top = 27, /* top */
YYSYMBOL_exp = 28, /* exp */
YYSYMBOL_reduce_dim_args = 29, /* reduce_dim_args */
YYSYMBOL_select_dim_args = 30 /* select_dim_args */
};
typedef enum yysymbol_kind_t yysymbol_kind_t;
@@ -451,21 +452,21 @@ union yyalloc
#endif /* !YYCOPY_NEEDED */
/* YYFINAL -- State number of the termination state. */
#define YYFINAL 11
#define YYFINAL 13
/* YYLAST -- Last index in YYTABLE. */
#define YYLAST 54
#define YYLAST 60
/* YYNTOKENS -- Number of terminals. */
#define YYNTOKENS 25
#define YYNTOKENS 26
/* YYNNTS -- Number of nonterminals. */
#define YYNNTS 5
/* YYNRULES -- Number of rules. */
#define YYNRULES 16
#define YYNRULES 17
/* YYNSTATES -- Number of states. */
#define YYNSTATES 44
#define YYNSTATES 50
/* YYMAXUTOK -- Last valid token kind. */
#define YYMAXUTOK 278
#define YYMAXUTOK 279
/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM
as returned by yylex, with out-of-bounds checking. */
@@ -476,22 +477,22 @@ union yyalloc
/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
as returned by yylex. */
static const yytype_int8 yytranslate[] = {
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24};
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25};
#if YYDEBUG
/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
static const yytype_int8 yyrline[] =
{0, 57, 57, 60, 61, 62, 63, 64, 65, 66, 69, 73, 77, 84, 87, 94, 97};
{0, 58, 58, 61, 62, 63, 64, 65, 66, 67, 70, 75, 79, 83, 90, 93, 100, 103};
#endif
/** Accessing symbol of state STATE. */
@@ -528,6 +529,7 @@ static const char* const yytname[] = {"\"end of file\"",
"NAME",
"REDUCE",
"SELECT",
"ACCUMULATE",
"LOWER_THAN_ELSE",
"ELSE",
"$accept",
@@ -544,7 +546,7 @@ yysymbol_name(yysymbol_kind_t yysymbol)
}
#endif
#define YYPACT_NINF (-3)
#define YYPACT_NINF (-10)
#define yypact_value_is_default(Yyn) ((Yyn) == YYPACT_NINF)
@@ -554,48 +556,50 @@ yysymbol_name(yysymbol_kind_t yysymbol)
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
STATE-NUM. */
static const yytype_int8 yypact[] = {11, 11, -3, -3, 1, 16, 7, 32, 18, 11, 11, -3, 11, 11, 11,
11, -3, -2, 13, 0, 0, -3, -3, 6, 28, 17, 20, -3, 30, 34,
31, 24, 27, 36, 33, 35, 37, -3, 24, 38, 20, -3, -3, -3};
static const yytype_int8 yypact[] = {2, 2, -10, -10, -7, -2, 3, 21, 38, 27, 2, 2, 14,
-10, 2, 2, 2, 2, -10, 0, 23, 18, 13, 13, -10, -10,
16, 28, 25, -9, 26, 37, -10, 39, 30, 36, -10, 29, 33,
42, 40, 41, 43, -10, 29, 44, 26, -10, -10, -10};
/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
Performed when YYTABLE does not specify something else to do. Zero
means the default is an error. */
static const yytype_int8 yydefact[] = {0, 0, 3, 9, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0,
0, 8, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0, 10, 0, 0,
0, 0, 0, 0, 13, 0, 15, 12, 0, 0, 0, 14, 11, 16};
static const yytype_int8 yydefact[] = {0, 0, 3, 9, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0,
0, 8, 0, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 11, 0,
0, 0, 10, 0, 0, 0, 14, 0, 16, 13, 0, 0, 0, 15, 12, 17};
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] = {-3, -3, -1, 14, 9};
static const yytype_int8 yypgoto[] = {-10, -10, -1, 11, 10};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int8 yydefgoto[] = {0, 6, 7, 35, 30};
static const yytype_int8 yydefgoto[] = {0, 7, 8, 41, 35};
/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
positive, shift that token. If negative, reduce the rule whose
number is the opposite. If YYTABLE_NINF, syntax error. */
static const yytype_int8 yytable[] = {8, 12, 13, 14, 15, 14, 15, 11, 17, 18, 9, 19, 20, 21,
22, 23, 12, 13, 14, 15, 1, 12, 13, 14, 15, 10, 25, 27,
16, 2, 24, 3, 4, 5, 28, 12, 13, 14, 15, 26, 29, 31,
32, 33, 34, 36, 37, 39, 42, 43, 38, 0, 41, 0, 40};
static const yytype_int8 yytable[] = {
9, 32, 10, 14, 15, 16, 17, 11, 33, 19, 20, 1, 12, 22, 23, 24, 25, 26, 16, 17, 2,
13, 3, 4, 5, 6, 14, 15, 16, 17, 14, 15, 16, 17, 21, 28, 29, 18, 38, 30, 27, 14,
15, 16, 17, 31, 34, 36, 39, 40, 37, 42, 43, 45, 48, 47, 49, 44, 0, 0, 46};
static const yytype_int8 yycheck[] = {1, 3, 4, 5, 6, 5, 6, 0, 9, 10, 9, 12, 13, 14,
15, 17, 3, 4, 5, 6, 9, 3, 4, 5, 6, 9, 20, 10,
10, 18, 17, 20, 21, 22, 17, 3, 4, 5, 6, 11, 20, 11,
8, 12, 20, 18, 10, 12, 10, 40, 17, -1, 38, -1, 17};
static const yytype_int8 yycheck[] = {1, 10, 9, 3, 4, 5, 6, 9, 17, 10, 11, 9, 9, 14, 15, 16,
17, 17, 5, 6, 18, 0, 20, 21, 22, 23, 3, 4, 5, 6, 3, 4,
5, 6, 20, 17, 20, 10, 8, 11, 17, 3, 4, 5, 6, 20, 20, 10,
12, 20, 11, 18, 10, 12, 10, 44, 46, 17, -1, -1, 17};
/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of
state STATE-NUM. */
static const yytype_int8 yystos[] = {0, 9, 18, 20, 21, 22, 26, 27, 27, 9, 9, 0, 3, 4, 5,
6, 10, 27, 27, 27, 27, 27, 27, 17, 17, 20, 11, 10, 17, 20,
29, 11, 8, 12, 20, 28, 18, 10, 17, 12, 17, 28, 10, 29};
static const yytype_int8 yystos[] = {0, 9, 18, 20, 21, 22, 23, 27, 28, 28, 9, 9, 9,
0, 3, 4, 5, 6, 10, 28, 28, 20, 28, 28, 28, 28,
17, 17, 17, 20, 11, 20, 10, 17, 20, 30, 10, 11, 8,
12, 20, 29, 18, 10, 17, 12, 17, 29, 10, 30};
/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */
static const yytype_int8 yyr1[] =
{0, 25, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29};
{0, 26, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 30, 30};
/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */
static const yytype_int8 yyr2[] = {0, 2, 1, 1, 3, 3, 3, 3, 3, 1, 6, 10, 8, 1, 3, 3, 5};
static const yytype_int8 yyr2[] = {0, 2, 1, 1, 3, 3, 3, 3, 3, 1, 6, 6, 10, 8, 1, 3, 3, 5};
enum
{
@@ -1020,133 +1024,143 @@ yyreduce:
switch(yyn)
{
case 2: /* top: exp */
#line 57 "parser.y"
#line 58 "parser.y"
{
*result = (yyvsp[0].a);
}
#line 1119 "parser.cpp"
#line 1122 "parser.cpp"
break;
case 3: /* exp: NUMBER */
#line 60 "parser.y"
#line 61 "parser.y"
{
(yyval.a) = new RawAST(NUMBER_NODE, (yyvsp[0].d));
}
#line 1125 "parser.cpp"
#line 1128 "parser.cpp"
break;
case 4: /* exp: exp ADD exp */
#line 61 "parser.y"
#line 62 "parser.y"
{
(yyval.a) = new RawAST(ADDITION_NODE, {(yyvsp[-2].a), (yyvsp[0].a)});
}
#line 1131 "parser.cpp"
#line 1134 "parser.cpp"
break;
case 5: /* exp: exp SUB exp */
#line 62 "parser.y"
#line 63 "parser.y"
{
(yyval.a) = new RawAST(SUBTRACTION_NODE, {(yyvsp[-2].a), (yyvsp[0].a)});
}
#line 1137 "parser.cpp"
#line 1140 "parser.cpp"
break;
case 6: /* exp: exp MUL exp */
#line 63 "parser.y"
#line 64 "parser.y"
{
(yyval.a) = new RawAST(MULTIPLY_NODE, {(yyvsp[-2].a), (yyvsp[0].a)});
}
#line 1143 "parser.cpp"
#line 1146 "parser.cpp"
break;
case 7: /* exp: exp DIV exp */
#line 64 "parser.y"
#line 65 "parser.y"
{
(yyval.a) = new RawAST(DIVIDE_NODE, {(yyvsp[-2].a), (yyvsp[0].a)});
}
#line 1149 "parser.cpp"
#line 1152 "parser.cpp"
break;
case 8: /* exp: OP exp CP */
#line 65 "parser.y"
#line 66 "parser.y"
{
(yyval.a) = (yyvsp[-1].a);
}
#line 1155 "parser.cpp"
#line 1158 "parser.cpp"
break;
case 9: /* exp: NAME */
#line 66 "parser.y"
#line 67 "parser.y"
{
(yyval.a) = new RawAST(REFERENCE_NODE, (yyvsp[0].s));
free((yyvsp[0].s));
}
#line 1163 "parser.cpp"
#line 1166 "parser.cpp"
break;
case 10: /* exp: REDUCE OP exp CM NAME CP */
#line 69 "parser.y"
case 10: /* exp: ACCUMULATE OP NAME CM NAME CP */
#line 70 "parser.y"
{
(yyval.a) = new RawAST(ACCUMULATE_NODE, (yyvsp[-3].s), (yyvsp[-1].s));
free((yyvsp[-3].s));
free((yyvsp[-1].s));
}
#line 1176 "parser.cpp"
break;
case 11: /* exp: REDUCE OP exp CM NAME CP */
#line 75 "parser.y"
{
(yyval.a) = new RawAST(REDUCE_NODE, (yyvsp[-3].a), (yyvsp[-1].s), NULL);
free((yyvsp[-1].s));
}
#line 1172 "parser.cpp"
#line 1185 "parser.cpp"
break;
case 11: /* exp: REDUCE OP exp CM NAME CM O_SQ reduce_dim_args C_SQ CP */
#line 73 "parser.y"
case 12: /* exp: REDUCE OP exp CM NAME CM O_SQ reduce_dim_args C_SQ CP */
#line 79 "parser.y"
{
(yyval.a) = new RawAST(REDUCE_NODE, (yyvsp[-7].a), (yyvsp[-5].s), (yyvsp[-2].ll));
free((yyvsp[-5].s));
}
#line 1181 "parser.cpp"
#line 1194 "parser.cpp"
break;
case 12: /* exp: SELECT OP exp CM O_SQ select_dim_args C_SQ CP */
#line 77 "parser.y"
case 13: /* exp: SELECT OP exp CM O_SQ select_dim_args C_SQ CP */
#line 83 "parser.y"
{
(yyval.a) = new RawAST(SELECT_NODE, (yyvsp[-5].a), (yyvsp[-2].ll));
}
#line 1189 "parser.cpp"
#line 1202 "parser.cpp"
break;
case 13: /* reduce_dim_args: NAME */
#line 84 "parser.y"
case 14: /* reduce_dim_args: NAME */
#line 90 "parser.y"
{
(yyval.ll) = new LinkedList((yyvsp[0].s), NULL);
free((yyvsp[0].s));
}
#line 1197 "parser.cpp"
#line 1210 "parser.cpp"
break;
case 14: /* reduce_dim_args: NAME CM reduce_dim_args */
#line 87 "parser.y"
case 15: /* reduce_dim_args: NAME CM reduce_dim_args */
#line 93 "parser.y"
{
(yyval.ll) = new LinkedList((yyvsp[-2].s), (yyvsp[0].ll));
free((yyvsp[-2].s));
}
#line 1205 "parser.cpp"
#line 1218 "parser.cpp"
break;
case 15: /* select_dim_args: NAME EQUALS NUMBER */
#line 94 "parser.y"
case 16: /* select_dim_args: NAME EQUALS NUMBER */
#line 100 "parser.y"
{
(yyval.ll) = new LinkedList((yyvsp[-2].s), (yyvsp[0].d), NULL);
free((yyvsp[-2].s));
}
#line 1213 "parser.cpp"
#line 1226 "parser.cpp"
break;
case 16: /* select_dim_args: NAME EQUALS NUMBER CM select_dim_args */
#line 97 "parser.y"
case 17: /* select_dim_args: NAME EQUALS NUMBER CM select_dim_args */
#line 103 "parser.y"
{
(yyval.ll) = new LinkedList((yyvsp[-4].s), (yyvsp[-2].d), (yyvsp[0].ll));
free((yyvsp[-4].s));
}
#line 1221 "parser.cpp"
#line 1234 "parser.cpp"
break;
#line 1225 "parser.cpp"
#line 1238 "parser.cpp"
default: break;
}
@@ -1320,4 +1334,4 @@ yyreturnlab:
return yyresult;
}
#line 103 "parser.y"
#line 109 "parser.y"
@@ -35,8 +35,8 @@
especially those whose name start with YY_ or yy_. They are
private implementation details that can be changed or removed. */
#ifndef YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED
#define YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED
#ifndef YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED
#define YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 1
@@ -81,8 +81,9 @@ enum yytokentype
NAME = 274, /* NAME */
REDUCE = 275, /* REDUCE */
SELECT = 276, /* SELECT */
LOWER_THAN_ELSE = 277, /* LOWER_THAN_ELSE */
ELSE = 278 /* ELSE */
ACCUMULATE = 277, /* ACCUMULATE */
LOWER_THAN_ELSE = 278, /* LOWER_THAN_ELSE */
ELSE = 279 /* ELSE */
};
typedef enum yytokentype yytoken_kind_t;
#endif
@@ -98,7 +99,7 @@ union YYSTYPE
int64_t d;
char* s;
# line 102 "parser.h"
# line 103 "parser.h"
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
@@ -110,4 +111,4 @@ extern YYSTYPE yylval;
int
yyparse(RawAST** result);
#endif /* !YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_COUNTERS_PARSER_PARSER_H_INCLUDED */
#endif /* !YY_YY_ROCPROFILER_SOURCE_LIB_ROCPROFILER_SDK_COUNTERS_PARSER_PARSER_H_INCLUDED */
@@ -39,6 +39,7 @@ void yyerror(rocprofiler::counters::RawAST**, const char *s) { ROCP_ERROR << s;
%token NUMBER RANGE /* set data type for numbers */
%token NAME /* set data type for variables and user-defined functions */
%token REDUCE SELECT /* set data type for special functions */
%token ACCUMULATE
%type <a> exp /* set data type for expressions */
%type <s> NAME
%type <d> NUMBER
@@ -64,6 +65,11 @@ exp: NUMBER { $$ = new RawAST(NUMBER_NODE, $1); }
| NAME { $$ = new RawAST(REFERENCE_NODE, $1);
free($1);
}
| ACCUMULATE OP NAME CM NAME CP {
$$ = new RawAST(ACCUMULATE_NODE, $3, $5);
free($3);
free($5);
}
| REDUCE OP exp CM NAME CP {
$$ = new RawAST(REDUCE_NODE, $3, $5, NULL);
free($5);
@@ -54,6 +54,14 @@ enum NodeType
SELECT_NODE,
SUBTRACTION_NODE,
CONSTANT_NODE,
ACCUMULATE_NODE
};
enum class ACCUMULATE_OP_TYPE
{
NONE = 0,
LOW_RESOLUTION,
HIGH_RESOLUTION
};
struct LinkedList
@@ -75,8 +83,9 @@ struct LinkedList
struct RawAST
{
// Node type
NodeType type{NONE}; // Operation to perform on the counter set
std::string reduce_op{};
NodeType type{NONE}; // Operation to perform on the counter set
std::string reduce_op{};
ACCUMULATE_OP_TYPE accumulate_op{ACCUMULATE_OP_TYPE::NONE};
// Stores either the name or digit dependening on whether this
// is a name or number
@@ -164,6 +173,20 @@ struct RawAST
}
}
RawAST(NodeType t, const char* v, const char* op)
: type(t)
, value(std::string{CHECK_NOTNULL(v)})
{
CHECK_NOTNULL(op);
static std::unordered_map<std::string_view, ACCUMULATE_OP_TYPE> map = {
{"NONE", ACCUMULATE_OP_TYPE::NONE},
{"LOW_RES", ACCUMULATE_OP_TYPE::LOW_RESOLUTION},
{"HIGH_RES", ACCUMULATE_OP_TYPE::HIGH_RESOLUTION},
};
accumulate_op = map.at(static_cast<std::string_view>(op));
CHECK_EQ(t, ACCUMULATE_NODE);
}
// Select operation constructor. Counter is the counter AST
// to use for the reduce op, refs is the reference set AST.
// dimensions contains the mapping for selecting dimensions
@@ -227,16 +250,26 @@ struct formatter<rocprofiler::counters::RawAST>
{rocprofiler::counters::MULTIPLY_NODE, "MULTIPLY_NODE"},
{rocprofiler::counters::NUMBER_NODE, "NUMBER_NODE"},
{rocprofiler::counters::RANGE_NODE, "RANGE_NODE"},
{rocprofiler::counters::ACCUMULATE_NODE, "ACCUMULATE_NODE"},
{rocprofiler::counters::REDUCE_NODE, "REDUCE_NODE"},
{rocprofiler::counters::REFERENCE_NODE, "REFERENCE_NODE"},
{rocprofiler::counters::SELECT_NODE, "SELECT_NODE"},
{rocprofiler::counters::SUBTRACTION_NODE, "SUBTRACTION_NODE"},
};
auto out = fmt::format_to(ctx.out(),
"{{\"Type\":\"{}\", \"REDUCE_OP\":\"{}\",",
NodeTypeToString.at(ast.type),
ast.reduce_op);
static std::unordered_map<rocprofiler::counters::ACCUMULATE_OP_TYPE, std::string_view>
AccumulateTypeToString = {
{rocprofiler::counters::ACCUMULATE_OP_TYPE::NONE, "NONE"},
{rocprofiler::counters::ACCUMULATE_OP_TYPE::HIGH_RESOLUTION, "HIGH_RES"},
{rocprofiler::counters::ACCUMULATE_OP_TYPE::LOW_RESOLUTION, "LOW_RES"},
};
auto out =
fmt::format_to(ctx.out(),
"{{\"Type\":\"{}\", \"REDUCE_OP\":\"{}\", \"ACCUMULATE_OP\":\"{}\",",
NodeTypeToString.at(ast.type),
ast.reduce_op,
AccumulateTypeToString.at(ast.accumulate_op));
if(const auto* string_val = std::get_if<std::string>(&ast.value))
{
@@ -388,8 +388,8 @@ yy_fatal_error(const char* msg);
(yy_hold_char) = *yy_cp; \
*yy_cp = '\0'; \
(yy_c_buf_p) = yy_cp;
#define YY_NUM_RULES 22
#define YY_END_OF_BUFFER 23
#define YY_NUM_RULES 23
#define YY_END_OF_BUFFER 24
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -397,19 +397,20 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
static const flex_int16_t yy_accept[48] = {
0, 0, 0, 23, 21, 20, 18, 6, 7, 3, 1, 9, 2, 21, 4, 14, 10, 8, 17, 11, 12, 17, 17, 5,
14, 19, 13, 14, 0, 17, 17, 17, 19, 13, 0, 0, 14, 17, 17, 0, 13, 17, 17, 17, 17, 15, 16, 0};
static const flex_int16_t yy_accept[58] = {
0, 0, 0, 24, 22, 21, 19, 6, 7, 3, 1, 9, 2, 22, 4, 14, 10, 8, 18, 11,
12, 18, 18, 18, 5, 14, 20, 13, 14, 0, 18, 18, 18, 18, 20, 13, 0, 0, 14, 18,
18, 18, 0, 13, 18, 18, 18, 18, 18, 18, 18, 15, 16, 18, 18, 18, 17, 0};
static const YY_CHAR yy_ec[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 4, 5,
6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 1, 1, 14, 1,
1, 1, 15, 15, 15, 15, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 17, 1, 18, 1, 15, 1, 15, 15, 19, 20,
15, 15, 15, 15, 15, 15, 15, 17, 1, 18, 1, 15, 1, 19, 15, 20, 21,
21, 15, 15, 15, 15, 15, 15, 22, 15, 15, 15, 15, 15, 23, 24, 25, 26, 15, 15, 15, 15,
15, 1, 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
22, 15, 15, 15, 15, 15, 15, 23, 24, 15, 15, 15, 15, 25, 26, 27, 28, 15, 15, 15, 15,
15, 1, 29, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -418,40 +419,40 @@ static const YY_CHAR yy_ec[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
static const YY_CHAR yy_meta[28] = {0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,
1, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1};
static const YY_CHAR yy_meta[30] = {0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,
3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1};
static const flex_int16_t yy_base[50] = {0, 0, 0, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71,
57, 57, 18, 71, 71, 0, 71, 71, 46, 45, 71, 17, 0,
19, 31, 37, 0, 45, 42, 0, 38, 44, 51, 49, 32, 36,
43, 36, 26, 23, 16, 11, 0, 0, 71, 29, 59};
static const flex_int16_t yy_base[60] = {
0, 0, 0, 81, 82, 82, 82, 82, 82, 82, 82, 82, 82, 68, 68, 20, 82, 82, 0, 82,
82, 58, 55, 54, 82, 19, 0, 21, 28, 39, 0, 55, 53, 50, 0, 33, 45, 60, 59, 42,
41, 46, 55, 54, 41, 44, 43, 34, 39, 32, 33, 0, 0, 34, 20, 17, 0, 82, 31, 57};
static const flex_int16_t yy_def[50] = {0, 47, 1, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 48, 47, 47, 48, 48, 47, 47, 49,
47, 47, 47, 48, 48, 48, 49, 47, 47, 47, 47, 48, 48,
47, 47, 48, 48, 48, 48, 48, 48, 0, 47, 47};
static const flex_int16_t yy_def[60] = {0, 57, 1, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
57, 57, 57, 58, 57, 57, 58, 58, 58, 57, 57, 59, 57, 57, 57,
58, 58, 58, 58, 59, 57, 57, 57, 57, 58, 58, 58, 57, 57, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 0, 57, 57};
static const flex_int16_t yy_nxt[99] = {
0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 18,
18, 18, 18, 21, 22, 18, 18, 23, 26, 24, 27, 33, 29, 28, 28, 34, 46, 45, 28, 28,
34, 26, 44, 27, 35, 43, 35, 28, 40, 36, 33, 39, 28, 39, 34, 40, 40, 42, 41, 34,
32, 36, 32, 36, 38, 37, 31, 30, 25, 24, 47, 3, 47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47
static const flex_int16_t yy_nxt[112] = {
0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 18,
18, 18, 18, 18, 22, 23, 18, 18, 24, 27, 25, 28, 35, 30, 29, 29, 36, 27, 56, 28, 29,
29, 36, 29, 35, 37, 55, 37, 36, 29, 38, 42, 54, 42, 36, 53, 43, 34, 52, 34, 51, 50,
49, 48, 47, 43, 43, 46, 45, 44, 38, 38, 41, 40, 39, 33, 32, 31, 26, 25, 57, 3, 57,
57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
};
57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57};
static const flex_int16_t yy_chk[99] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 15, 24, 15, 26, 48, 24, 15, 26, 44, 43, 24, 15,
26, 27, 42, 27, 28, 41, 28, 27, 40, 28, 33, 34, 27, 34, 33, 39, 34, 38, 37, 33,
49, 36, 49, 35, 31, 30, 22, 21, 14, 13, 3, 47, 47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47
static const flex_int16_t yy_chk[112] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 15, 25, 15, 27, 58, 25, 15, 27, 28, 55, 28, 25,
15, 27, 28, 35, 29, 54, 29, 35, 28, 29, 36, 53, 36, 35, 50, 36, 59, 49, 59, 48, 47,
46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 33, 32, 31, 23, 22, 21, 14, 13, 3, 57, 57,
57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
};
57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57};
/* Table of booleans, true if rule could match eol. */
static const flex_int32_t yy_rule_can_match_eol[23] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
static const flex_int32_t yy_rule_can_match_eol[24] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
};
static yy_state_type yy_last_accepting_state;
@@ -476,9 +477,9 @@ char* yytext;
#include "raw_ast.hpp"
using namespace std;
#define YYDEBUG 1
#line 511 "scanner.cpp"
#line 518 "scanner.cpp"
/* float exponent */
#line 513 "scanner.cpp"
#line 520 "scanner.cpp"
#define INITIAL 0
@@ -713,7 +714,7 @@ YY_DECL
{
#line 15 "scanner.l"
#line 730 "scanner.cpp"
#line 737 "scanner.cpp"
while(/*CONSTCOND*/ 1) /* loops until end-of-file is reached */
{
@@ -740,11 +741,11 @@ YY_DECL
while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
{
yy_current_state = (int) yy_def[yy_current_state];
if(yy_current_state >= 48) yy_c = yy_meta[yy_c];
if(yy_current_state >= 58) yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
++yy_cp;
} while(yy_base[yy_current_state] != 71);
} while(yy_base[yy_current_state] != 82);
yy_find_action:
yy_act = yy_accept[yy_current_state];
@@ -870,40 +871,46 @@ YY_DECL
}
YY_BREAK
case 17: YY_RULE_SETUP
#line 37 "scanner.l"
#line 36 "scanner.l"
{
return ACCUMULATE;
}
YY_BREAK
case 18: YY_RULE_SETUP
#line 38 "scanner.l"
{
yylval.s = strdup(yytext);
return NAME;
}
YY_BREAK
case 18:
/* rule 18 can match eol */
case 19:
/* rule 19 can match eol */
YY_RULE_SETUP
#line 42 "scanner.l"
#line 43 "scanner.l"
{
return EOL;
}
YY_BREAK
case 19: YY_RULE_SETUP
#line 43 "scanner.l"
YY_BREAK
case 20: YY_RULE_SETUP
#line 44 "scanner.l"
{ /* ignore white space */
}
YY_BREAK
case 21: YY_RULE_SETUP
#line 45 "scanner.l"
{
throw std::runtime_error(fmt::format("Mystery character {}", *yytext));
{ /* ignore white space */
}
YY_BREAK
case 22: YY_RULE_SETUP
#line 46 "scanner.l"
{
throw std::runtime_error(fmt::format("Mystery character {}", *yytext));
}
YY_BREAK
case 23: YY_RULE_SETUP
#line 47 "scanner.l"
YY_FATAL_ERROR("flex scanner jammed");
YY_BREAK
#line 909 "scanner.cpp"
#line 921 "scanner.cpp"
case YY_STATE_EOF(INITIAL): yyterminate();
case YY_END_OF_BUFFER:
@@ -1187,7 +1194,7 @@ yy_get_previous_state(void)
while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
{
yy_current_state = (int) yy_def[yy_current_state];
if(yy_current_state >= 48) yy_c = yy_meta[yy_c];
if(yy_current_state >= 58) yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
}
@@ -1215,10 +1222,10 @@ yy_try_NUL_trans(yy_state_type yy_current_state)
while(yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
{
yy_current_state = (int) yy_def[yy_current_state];
if(yy_current_state >= 48) yy_c = yy_meta[yy_c];
if(yy_current_state >= 58) yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
yy_is_jam = (yy_current_state == 47);
yy_is_jam = (yy_current_state == 57);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -1864,4 +1871,4 @@ yyfree(void* ptr)
#define YYTABLES_NAME "yytables"
#line 46 "scanner.l"
#line 47 "scanner.l"
@@ -33,6 +33,7 @@ EXP ([Ee][-+]?[0-9]+)
"reduce" { return REDUCE; }
"select" { return SELECT; }
"accumulate" { return ACCUMULATE; }
[a-z_A-Z][a-z_A-Z0-9]* {
yylval.s = strdup(yytext);
@@ -33,36 +33,48 @@ TEST(parser, base_ops)
{
std::map<std::string, std::string> expressionToExpected = {
{"AB * BA",
"{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"BA\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
{"AB + BA",
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"BA\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
{"CD - ZX",
"{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"CD\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"ZX\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"ZX\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
{"NM / DB",
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"NM\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"DB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"NM\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"DB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"}};
for(auto [op, expected] : expressionToExpected)
@@ -81,51 +93,70 @@ TEST(parser, order_of_ops)
{
std::map<std::string, std::string> expressionToExpected = {
{"(AB + BA) / CD",
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"BA\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[],"
" \"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
"\"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"},
{"(AB / BA) - BN",
"{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", "
"{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"BA\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"AB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"BA\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"BN\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
"\"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"},
{"AD / (CD - ZX)",
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AD\", "
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"AD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"CD\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"SUBTRACTION_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"CD\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"ZX\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", "
"\"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\":\"ZX\", \"Counter_Set\":[], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
"\"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"},
{"MN * (NM / DB)",
"{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"MN\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"NM\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"Value\":\"DB\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"{\"Type\":\"MULTIPLY_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"MN\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"NM\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},"
"{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"DB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"}};
@@ -145,29 +176,37 @@ TEST(parser, reduction)
{
std::vector<std::tuple<std::string, std::string>> expressionToExpected = {
{"reduce(AB, SUM, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])",
"{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[\"3\",\"1\"], \"Select_Dimension_Set\":[]}"},
{"reduce(AB+CD, SUM, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])",
"{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", "
"{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Value\":\"CD\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[\"3\",\"1\"], "
"\"Select_Dimension_Set\":[]}"},
{"reduce(AB,DIV, [DIMENSION_XCC,DIMENSION_SHADER_ENGINE])+reduce(DC,SUM, "
"[DIMENSION_XCC,DIMENSION_SHADER_ENGINE])",
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", "
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"DIV\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[\"3\",\"1\"], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"SUM\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"DC\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"DC\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[\"3\",\"1\"], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"}};
@@ -188,25 +227,30 @@ TEST(parser, DISABLED_selection)
{
std::map<std::string, std::string> expressionToExpected = {
{"select(AB, [SE=1,XCC=0])+select(DC,[SE=2])",
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", "
"{\"Type\":\"ADDITION_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"\"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"XCC\", 0)\",\"(\"SE\", "
"1)\"]},{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"DC\", "
"1)\"]},{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"DC\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"SE\", 2)\"]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}"},
{"select(AB, [SE=2,XCC=1,WGP=3])",
"{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"WGP\", 3)\",\"(\"XCC\", "
"1)\",\"(\"SE\", 2)\"]}"},
{"select(AB, [XCC=0])",
"{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", \"Value\":\"AB\", "
"{\"Type\":\"SELECT_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"AB\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[\"(\"XCC\", 0)\"]}"}};
@@ -216,6 +260,7 @@ TEST(parser, DISABLED_selection)
auto* buf = yy_scan_string(op.c_str());
yyparse(&ast);
ASSERT_TRUE(ast);
auto exp = fmt::format("{}", *ast);
EXPECT_EQ(fmt::format("{}", *ast), expected);
yy_delete_buffer(buf);
delete ast;
@@ -241,6 +286,71 @@ TEST(parser, parse_derived_counters)
}
}
TEST(parser, parse_accum_counter)
{
std::map<std::string, std::string> expressionToExpected = {
{"accumulate(SQ_WAVES,NONE)",
"{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\""
":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
{"accumulate(SQ_WAVES,HIGH_RES)",
"{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"HIGH_RES\", "
"\"Value"
"\":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"},
{"accumulate(SQ_WAVES,LOW_RES)",
"{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"LOW_RES\", "
"\"Value\""
":\"SQ_WAVES\", \"Counter_Set\":[], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"}};
for(auto [op, expected] : expressionToExpected)
{
RawAST* ast = nullptr;
auto* buf = yy_scan_string(op.c_str());
yyparse(&ast);
ASSERT_TRUE(ast);
auto exp = fmt::format("{}", *ast);
EXPECT_EQ(fmt::format("{}", *ast), expected);
yy_delete_buffer(buf);
delete ast;
}
}
TEST(parser, parse_nested_accum_counter)
{
std::map<std::string, std::string> expressionToExpected = {
{"reduce(accumulate(SQ_LEVEL_WAVES,HIGH_RES),sum)/reduce(GRBM_GUI_ACTIVE,max)/CU_NUM",
"{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"DIVIDE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Counter_Set\":[{\"Type\":\"REDUCE_NODE\", "
"\"REDUCE_OP\":\"sum\", \"ACCUMULATE_OP\":\"NONE\", "
"\"Counter_Set\":[{\"Type\":\"ACCUMULATE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"HIGH_RES\", \"Value\":\"SQ_LEVEL_WAVES\", \"Counter_Set\":[], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REDUCE_NODE\", \"REDUCE_OP\":\"max\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Counter_Set\":[{\"Type\":\"REFERENCE_NODE\", "
"\"REDUCE_OP\":\"\", \"ACCUMULATE_OP\":\"NONE\", \"Value\":\"GRBM_GUI_ACTIVE\", "
"\"Counter_Set\":[], \"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]},{\"Type\":\"REFERENCE_NODE\", \"REDUCE_OP\":\"\", "
"\"ACCUMULATE_OP\":\"NONE\", \"Value\":\"CU_NUM\", \"Counter_Set\":[], "
"\"Reduce_Dimension_Set\":[], \"Select_Dimension_Set\":[]}], \"Reduce_Dimension_Set\":[], "
"\"Select_Dimension_Set\":[]}"}};
for(auto [op, expected] : expressionToExpected)
{
RawAST* ast = nullptr;
auto* buf = yy_scan_string(op.c_str());
yyparse(&ast);
ASSERT_TRUE(ast);
auto exp = fmt::format("{}", *ast);
EXPECT_EQ(fmt::format("{}", *ast), expected);
yy_delete_buffer(buf);
delete ast;
}
}
// TEST(parser, parse_complex_counters)
// {
// std::map<std::string, std::string> expressionToExpected = {
@@ -254,6 +254,7 @@ protected:
registration::set_init_status(-1);
context::push_client(1);
test_init();
// rocprofiler_debugger_block();
counters::agent_profile_hsa_registration();
std::string kernel_name = "null_kernel";
@@ -116,28 +116,14 @@ get_client_ctx()
return ctx;
}
struct buf_check
{
size_t expected_size{0};
bool is_special{false};
double special_val{0.0};
};
void
buffered_callback(rocprofiler_context_id_t,
rocprofiler_buffer_id_t,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* user_data,
void* /* user_data */,
uint64_t)
{
buf_check& expected = *static_cast<buf_check*>(user_data);
if(expected.is_special)
{
// Special values are single value constants (from agent_t)
expected.expected_size = 1;
}
std::set<double> seen_data;
std::set<uint64_t> seen_dims;
for(size_t i = 0; i < num_headers; ++i)
@@ -456,33 +442,17 @@ TEST(core, check_callbacks)
ASSERT_TRUE(ret_pkt) << fmt::format("Expected a packet to be generated for - {}",
metric.name());
/**
* Fake some data for the counter
*/
size_t* fake_data = static_cast<size_t*>(ret_pkt->profile.output_buffer.ptr);
for(size_t i = 0; i < (ret_pkt->profile.output_buffer.size / sizeof(size_t)); i++)
{
fake_data[i] = i + 1;
}
/**
* Create the buffer and run test
*/
rocprofiler_buffer_id_t opt_buff_id = {.handle = 0};
buf_check check = {
.expected_size = ret_pkt->profile.output_buffer.size / sizeof(size_t),
.is_special = !metric.special().empty(),
.special_val = (metric.special().empty() ? 0.0
: double(counters::get_agent_property(
std::string_view(metric.name()),
*agent.get_rocp_agent())))};
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
500 * sizeof(size_t),
500 * sizeof(size_t),
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
buffered_callback,
&check,
nullptr,
&opt_buff_id),
"Could not create buffer");
cb_info->buffer = opt_buff_id;
@@ -526,6 +526,74 @@ TEST(evaluate_ast, evaluate_simple_counters)
}
}
TEST(evaulate_ast, evaulate_hybrid_counters)
{
using namespace rocprofiler::counters;
auto get_base_rec_id = [](uint64_t counter_id) {
rocprofiler_counter_instance_id_t base_id = 0;
set_counter_in_rec(base_id, {.handle = counter_id});
return base_id;
};
std::unordered_map<std::string, Metric> metrics = {
{"VOORHEES", Metric("gfx9", "VOORHEES", "a", "a", "a", "", "", 0)},
{"KRUEGER", Metric("gfx9", "KRUEGER", "a", "a", "a", "", "", 1)},
{"MYERS", Metric("gfx9", "MYERS", "a", "a", "a", "", "", 2)},
{"BATES", Metric("gfx9", "BATES", "a", "a", "a", "accumulate(VOORHEES,NONE)", "", 3)},
{"KRAMER", Metric("gfx9", "KRAMER", "a", "a", "a", "accumulate(KRUEGER,LOW_RES)", "", 4)},
{"TORRANCE",
Metric("gfx9", "TORRANCE", "a", "a", "a", "accumulate(MYERS,HIGH_RES)", "", 5)}};
std::unordered_map<std::string, std::vector<rocprofiler_record_counter_t>> base_counter_data = {
{"VOORHEES", construct_test_data_dim(get_base_rec_id(0), {ROCPROFILER_DIMENSION_NONE}, 8)},
{"KRUEGER", construct_test_data_dim(get_base_rec_id(1), {ROCPROFILER_DIMENSION_NONE}, 8)},
{"MYERS", construct_test_data_dim(get_base_rec_id(2), {ROCPROFILER_DIMENSION_NONE}, 8)},
};
std::unordered_map<std::string, std::unordered_map<std::string, EvaluateAST>> asts;
for(const auto& [val, metric] : metrics)
{
RawAST* ast = nullptr;
auto buf = yy_scan_string(metric.expression().empty() ? metric.name().c_str()
: metric.expression().c_str());
yyparse(&ast);
ASSERT_TRUE(ast) << metric.expression() << " " << metric.name();
asts.emplace("gfx9", std::unordered_map<std::string, EvaluateAST>{})
.first->second.emplace(val,
EvaluateAST({.handle = metric.id()}, metrics, *ast, "gfx9"));
yy_delete_buffer(buf);
delete ast;
}
std::vector<
std::tuple<std::string, std::vector<rocprofiler_record_counter_t>, int64_t, uint32_t>>
derived_counters = {
{"BATES", base_counter_data["VOORHEES"], 1, 0},
{"KRAMER", base_counter_data["KRUEGER"], 1, 1},
{"TORRANCE", base_counter_data["MYERS"], 1, 2},
};
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> base_counter_decode;
for(const auto& [name, base_counter_v] : base_counter_data)
{
base_counter_decode[metrics[name].id()] = base_counter_v;
}
for(auto& [name, expected, eval_count, flag] : derived_counters)
{
LOG(INFO) << name;
auto eval_counters =
rocprofiler::counters::get_required_hardware_counters(asts, "gfx9", metrics[name]);
ASSERT_TRUE(eval_counters);
ASSERT_EQ(eval_counters->size(), eval_count);
ASSERT_EQ(eval_counters->begin()->flags(), flag);
std::vector<std::unique_ptr<std::vector<rocprofiler_record_counter_t>>> cache;
asts.at("gfx9").at(name).expand_derived(asts.at("gfx9"));
auto ret = asts.at("gfx9").at(name).evaluate(base_counter_decode, cache);
EXPECT_EQ(ret->size(), expected.size());
}
}
namespace
{
void
@@ -109,7 +109,9 @@ get_ext_table()
val.hsa_amd_vmem_get_alloc_properties_from_handle_fn =
hsa_amd_vmem_get_alloc_properties_from_handle;
val.hsa_amd_agent_set_async_scratch_limit_fn = hsa_amd_agent_set_async_scratch_limit;
val.hsa_amd_queue_get_info_fn = hsa_amd_queue_get_info;
#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x02
val.hsa_amd_queue_get_info_fn = hsa_amd_queue_get_info;
#endif
return val;
}();
return _v;
@@ -36,33 +36,89 @@ namespace rocprofiler
{
namespace hsa
{
CounterAQLPacket::~CounterAQLPacket()
hsa_status_t
CounterAQLPacket::CounterMemoryPool::Alloc(void** ptr, size_t size, desc_t flags, void* data)
{
if(!profile.command_buffer.ptr)
if(size == 0)
{
// pass, nothing malloced
}
else if(!command_buf_mallocd)
{
CHECK_HSA(free_func(profile.command_buffer.ptr), "freeing memory");
}
else
{
::free(profile.command_buffer.ptr);
if(ptr != nullptr) *ptr = nullptr;
return HSA_STATUS_SUCCESS;
}
if(!data) return HSA_STATUS_ERROR;
auto& pool = *reinterpret_cast<CounterAQLPacket::CounterMemoryPool*>(data);
if(!profile.output_buffer.ptr)
{
// pass, nothing malloced
}
else if(!output_buffer_malloced)
{
CHECK_HSA(free_func(profile.output_buffer.ptr), "freeing memory");
}
if(!pool.allocate_fn || !pool.free_fn || !pool.allow_access_fn) return HSA_STATUS_ERROR;
if(!flags.host_access || pool.kernarg_pool_.handle == 0 || !pool.fill_fn)
return HSA_STATUS_ERROR;
hsa_status_t status;
if(!pool.bIgnoreKernArg && flags.memory_hint == AQLPROFILE_MEMORY_HINT_DEVICE_UNCACHED)
status = pool.allocate_fn(pool.kernarg_pool_, size, 0, ptr);
else
{
::free(profile.output_buffer.ptr);
}
status = pool.allocate_fn(pool.cpu_pool_, size, 0, ptr);
if(status != HSA_STATUS_SUCCESS) return status;
status = pool.fill_fn(*ptr, 0u, size / sizeof(uint32_t));
if(status != HSA_STATUS_SUCCESS) return status;
status = pool.allow_access_fn(1, &pool.gpu_agent, nullptr, *ptr);
return status;
}
void
CounterAQLPacket::CounterMemoryPool::Free(void* ptr, void* data)
{
if(ptr == nullptr) return;
assert(data);
auto& pool = *reinterpret_cast<CounterAQLPacket::CounterMemoryPool*>(data);
assert(pool.free_fn);
pool.free_fn(ptr);
}
hsa_status_t
CounterAQLPacket::CounterMemoryPool::Copy(void* dst, const void* src, size_t size, void* data)
{
if(size == 0) return HSA_STATUS_SUCCESS;
if(!data) return HSA_STATUS_ERROR;
auto& pool = *reinterpret_cast<CounterAQLPacket::CounterMemoryPool*>(data);
if(!pool.api_copy_fn) return HSA_STATUS_ERROR;
return pool.api_copy_fn(dst, src, size);
}
CounterAQLPacket::CounterAQLPacket(aqlprofile_agent_handle_t agent,
CounterAQLPacket::CounterMemoryPool _pool,
const std::vector<aqlprofile_pmc_event_t>& events)
: pool(_pool)
{
if(events.empty()) return;
packets.start_packet = null_amd_aql_pm4_packet;
packets.stop_packet = null_amd_aql_pm4_packet;
packets.read_packet = null_amd_aql_pm4_packet;
aqlprofile_pmc_profile_t profile{};
profile.agent = agent;
profile.events = events.data();
profile.event_count = static_cast<uint32_t>(events.size());
hsa_status_t status = aqlprofile_pmc_create_packets(&this->handle,
&this->packets,
profile,
&CounterMemoryPool::Alloc,
&CounterMemoryPool::Free,
&CounterMemoryPool::Copy,
reinterpret_cast<void*>(&pool));
if(status != HSA_STATUS_SUCCESS) ROCP_FATAL << "Could not create PMC packets!";
auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
packets.start_packet.header = header;
packets.stop_packet.header = header;
packets.read_packet.header = header;
empty = false;
}
hsa_status_t
@@ -68,21 +68,17 @@ public:
before_krn_pkt.clear();
after_krn_pkt.clear();
}
bool isEmpty() const { return empty; }
virtual void populate_before() = 0;
virtual void populate_after() = 0;
aqlprofile_handle_t pkt_handle = {.handle = 0};
aqlprofile_handle_t GetHandle() const { return handle; }
aqlprofile_handle_t handle = {.handle = 0};
bool empty = {true};
bool empty = {true};
hsa_ven_amd_aqlprofile_profile_t profile = {};
hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet;
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 3> before_krn_pkt = {};
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 2> after_krn_pkt = {};
bool isEmpty() const { return empty; }
};
class EmptyAQLPacket : public AQLPacket
@@ -100,22 +96,46 @@ class CounterAQLPacket : public AQLPacket
friend class rocprofiler::aql::CounterPacketConstruct;
using memory_pool_free_func_t = decltype(::hsa_amd_memory_pool_free)*;
public:
CounterAQLPacket(memory_pool_free_func_t func)
: free_func{func} {};
~CounterAQLPacket() override;
void populate_before() override { before_krn_pkt.push_back(start); };
void populate_after() override
struct CounterMemoryPool
{
after_krn_pkt.push_back(stop);
after_krn_pkt.push_back(read);
using desc_t = aqlprofile_buffer_desc_flags_t;
hsa_agent_t gpu_agent;
hsa_amd_memory_pool_t cpu_pool_;
hsa_amd_memory_pool_t kernarg_pool_;
decltype(hsa_amd_memory_pool_allocate)* allocate_fn;
decltype(hsa_amd_agents_allow_access)* allow_access_fn;
decltype(hsa_amd_memory_pool_free)* free_fn;
decltype(hsa_amd_memory_fill)* fill_fn;
decltype(hsa_memory_copy)* api_copy_fn;
bool bIgnoreKernArg;
static void Free(void* ptr, void* data);
static hsa_status_t Alloc(void** ptr, size_t size, desc_t flags, void* data);
static hsa_status_t Copy(void* dst, const void* src, size_t size, void* data);
};
public:
CounterAQLPacket(aqlprofile_agent_handle_t agent,
CounterMemoryPool pool,
const std::vector<aqlprofile_pmc_event_t>& events);
~CounterAQLPacket() override { aqlprofile_pmc_delete_packets(this->handle); };
void populate_before() override
{
if(!empty) before_krn_pkt.push_back(packets.start_packet);
};
void populate_after() override
{
if(empty) return;
after_krn_pkt.push_back(packets.read_packet);
after_krn_pkt.push_back(packets.stop_packet);
};
aqlprofile_pmc_aql_packets_t packets{};
protected:
bool command_buf_mallocd = false;
bool output_buffer_malloced = false;
memory_pool_free_func_t free_func = nullptr;
CounterMemoryPool pool{};
};
struct TraceMemoryPool
@@ -95,10 +95,10 @@ TEST(thread_trace, resource_creation)
packet->populate_after();
size_t vendor_packet = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
ASSERT_TRUE(packet->start.header == vendor_packet);
ASSERT_TRUE(packet->stop.header == vendor_packet);
ASSERT_TRUE(packet->before_krn_pkt.size() > 0);
ASSERT_TRUE(packet->after_krn_pkt.size() > 0);
ASSERT_TRUE(packet->before_krn_pkt.at(0).header == vendor_packet);
ASSERT_TRUE(packet->after_krn_pkt.at(0).header == vendor_packet);
}
{