Add rocprofiler_load_counter_definition (#1193)

Adds rocprofiler_load_counter_definition. This function allows a counter definition file to be supplied to rocprofiler-sdk directly. Takes in a string containing the counter definition YAML, its size (in bytes), and a flag value to state whether this is an append operation or not.

---------

Co-authored-by: Benjamin Welton <ben@amd.com>
Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Co-authored-by: usrihari123 <srihari.u@amd.com>

[ROCm/rocprofiler-sdk commit: 7ddc72ad45]
Este commit está contenido en:
Benjamin Welton
2024-11-22 01:55:47 -08:00
cometido por GitHub
padre 5bea1772ea
commit 39db3e8a1d
Se han modificado 22 ficheros con 384 adiciones y 8 borrados
+1
Ver fichero
@@ -120,6 +120,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- Changed naming of "dispatch profiling service" to a more descriptive "dispatch counting service". To convert existing tool or user code to the new names, the following sed can be used: `-type f -exec sed -i -e 's/dispatch_profile_counting_service/dispatch_counting_service/g' -e 's/dispatch_profile.h/dispatch_counting_service.h/g' -e 's/rocprofiler_profile_counting_dispatch_callback_t/rocprofiler_dispatch_counting_service_callback_t/g' -e 's/rocprofiler_profile_counting_dispatch_data_t/rocprofiler_dispatch_counting_service_data_t/g' -e 's/rocprofiler_profile_counting_dispatch_record_t/rocprofiler_dispatch_counting_service_record_t/g' {} +`
- `FETCH_SIZE` metric on gfx94x now uses `TCC_BUBBLE` for 128B reads.
- PMC dispatch-based counter collection serialization is now per-device instead of being global across all devices.
- Added rocprofiler_load_counter_definition.
### Resolved issues
@@ -144,6 +144,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
choices=("fatal", "error", "warning", "info", "trace", "env"),
type=str.lower,
)
io_options.add_argument(
"-E",
"--extra-counters",
help="Path to YAML file containing extra counter definitions",
type=str,
required=False,
)
aggregate_tracing_options = parser.add_argument_group("Aggregate tracing options")
@@ -881,6 +888,11 @@ def run(app_args, args, **kwargs):
if args.kernel_iteration_range:
update_env("ROCPROF_KERNEL_FILTER_RANGE", ", ".join(args.kernel_iteration_range))
if args.extra_counters is not None:
with open(args.extra_counters, "r") as e_file:
e_file_contents = e_file.read()
update_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", e_file_contents, overwrite=True)
if args.pmc:
update_env("ROCPROF_COUNTER_COLLECTION", True, overwrite=True)
update_env(
@@ -126,6 +126,10 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
- List metrics for counter collection.
- Kernel Dispatch Counter Collection
* - ``-E`` \| ``--extra_counters``
- Specifies the path to a YAML file containing extra counter definitions.
- Kernel Dispatch Counter Collection
* - ``-M`` \| ``--mangled-kernels``
- Overrides the default demangling of kernel names.
- Output control
@@ -733,6 +737,18 @@ To supply the counters via ``command-line`` options, use:
1. Please note that more than 1 counters should be separated by a space or a comma.
2. Job will fail if entire set of counters cannot be collected in single pass
Extra-counters
++++++++++++++++
Counters with custom definitions can be defined through an extra_counters.yaml
file using the ``command-line`` option.
To supply the extra counters via ``command-line`` options, use:
.. code-block:: shell
rocprofv3 -E <path-to-extra_counters.yaml> --pmc <custom_metric> -- <app_relative_path>
Kernel profiling output
+++++++++++++++++++++++++
@@ -48,3 +48,4 @@ add_subdirectory(rccl)
add_subdirectory(cxx)
add_subdirectory(kfd)
add_subdirectory(amd_detail)
add_subdirectory(experimental)
@@ -0,0 +1,6 @@
set(ROCPROFILER_EXPERIMENTAL_HEADER_FILES counters.h)
install(
FILES ${ROCPROFILER_EXPERIMENTAL_HEADER_FILES}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/experimental
COMPONENT development)
@@ -0,0 +1,50 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
ROCPROFILER_EXTERN_C_INIT
/**
* @brief Apply a custom counter definition (YAML). This function must be called before
* counter iteration functions like @ref rocprofiler_iterate_agent_supported_counters
* if custom counter definitions are to be used. This function will return an error
* if the definition has already been loaded.
* @param [in] yaml YAML string containing counter definitions
* @param [in] size Size of the YAML string
* @param [in] flags Flags to apply to the counter definition
* @return ::rocprofiler_status_t
* @retval ROCPROFILER_STATUS_SUCCESS if counter definition applied
* @retval ROCPROFILER_STATUS_ERROR if counter definition already loaded
*/
rocprofiler_status_t
rocprofiler_load_counter_definition(const char* yaml,
size_t size,
rocprofiler_counter_flag_t flags) ROCPROFILER_API;
/** @} */
ROCPROFILER_EXTERN_C_FINI
@@ -428,7 +428,9 @@ typedef enum
typedef enum
{
ROCPROFILER_COUNTER_FLAG_NONE = 0,
ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning.
ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning.
ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION, ///< Append the counter definition to the system
///< provided counter definition file.
ROCPROFILER_COUNTER_FLAG_LAST,
} rocprofiler_counter_flag_t;
@@ -56,9 +56,9 @@ struct json_output
return (*archive)(std::forward<Args>(args)...);
}
decltype(auto) startNode() { return archive->startNode(); }
decltype(auto) finishNode() { return archive->finishNode(); }
decltype(auto) makeArray() { return archive->makeArray(); }
void startNode() { archive->startNode(); }
void finishNode() { archive->finishNode(); }
void makeArray() { archive->makeArray(); }
decltype(auto) setNextName(const char* name) { archive->setNextName(name); }
void start_process();
@@ -96,8 +96,9 @@ struct config : output_config
int mpi_size = get_mpi_size();
int mpi_rank = get_mpi_rank();
std::string kernel_filter_include = get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", ".*");
std::string kernel_filter_exclude = get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", "");
std::string kernel_filter_include = get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", ".*");
std::string kernel_filter_exclude = get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", "");
std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", "");
std::unordered_set<uint32_t> kernel_filter_range = {};
std::set<std::string> counters = {};
+10
Ver fichero
@@ -49,6 +49,7 @@
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/callback_tracing.h>
#include <rocprofiler-sdk/experimental/counters.h>
#include <rocprofiler-sdk/external_correlation.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/internal_threading.h>
@@ -1597,6 +1598,15 @@ rocprofiler_configure(uint32_t version,
return compose_tmp_file_name(tool::get_config(), type);
};
if(!tool::get_config().extra_counters_contents.empty())
{
std::string contents(tool::get_config().extra_counters_contents);
size_t length = contents.size();
ROCPROFILER_CALL(rocprofiler_load_counter_definition(
contents.c_str(), length, ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION),
"Loading extra counters");
}
if(tool::get_config().list_metrics)
{
tool_metadata->init(tool::metadata::inprocess{});
@@ -20,6 +20,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler-sdk/experimental/counters.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
@@ -191,4 +192,15 @@ rocprofiler_iterate_counter_dimensions(rocprofiler_counter_id_t id,
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
rocprofiler_load_counter_definition(const char* yaml, size_t size, rocprofiler_counter_flag_t flags)
{
rocprofiler::counters::CustomCounterDefinition def;
if(yaml == nullptr && size != 0) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
def.data = std::string(yaml, size);
def.append = (flags == ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION ? true : false);
def.loaded = false;
return rocprofiler::counters::setCustomCounterDefinition(def);
}
}
@@ -26,11 +26,13 @@
#include "lib/common/filesystem.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "glog/logging.h"
#include "rocprofiler-sdk/fwd.h"
#include "yaml-cpp/exceptions.h"
#include "yaml-cpp/node/convert.h"
#include "yaml-cpp/node/detail/impl.h"
@@ -50,6 +52,13 @@ namespace counters
{
namespace
{
common::Synchronized<CustomCounterDefinition>&
getCustomCounterDefinition()
{
static common::Synchronized<CustomCounterDefinition> def = {};
return def;
}
uint64_t&
current_id()
{
@@ -104,12 +113,31 @@ MetricMap
loadYAML(const std::string& filename, bool load_constants = false, bool load_derived = false)
{
MetricMap ret;
ROCP_INFO << "Loading Counter Config: " << filename;
auto yaml = YAML::LoadFile(filename);
auto override = getCustomCounterDefinition().wlock([&](auto& data) {
data.loaded = true;
return data;
});
std::stringstream counter_data;
if(override.data.empty() || override.append)
{
ROCP_INFO << "Loading Counter Config: " << filename;
std::ifstream file(filename);
counter_data << file.rdbuf();
}
if(!override.data.empty())
{
ROCP_INFO << "Adding Override Config Data: " << override.data;
counter_data << override.data;
}
auto yaml = YAML::Load(counter_data.str());
for(auto it = yaml.begin(); it != yaml.end(); ++it)
{
auto counter_name = it->first.as<std::string>();
if(counter_name == "schema-version") continue;
auto counter_def = it->second;
auto def_iterator = counter_def["architectures"];
@@ -189,6 +217,18 @@ findViaEnvironment(const std::string& filename)
} // namespace
rocprofiler_status_t
setCustomCounterDefinition(const CustomCounterDefinition& def)
{
return getCustomCounterDefinition().wlock([&](auto& data) {
// Counter definition already loaded, cannot override anymore
if(data.loaded) return ROCPROFILER_STATUS_ERROR;
data.data = def.data;
data.append = def.append;
return ROCPROFILER_STATUS_SUCCESS;
});
}
MetricMap
getDerivedHardwareMetrics()
{
@@ -26,6 +26,7 @@
#include <string>
#include <unordered_map>
#include <vector>
#include "rocprofiler-sdk/fwd.h"
#include <fmt/core.h>
#include <fmt/ranges.h>
@@ -84,6 +85,13 @@ private:
uint32_t flags_ = 0;
};
struct CustomCounterDefinition
{
std::string data = {};
bool append = {false};
bool loaded = {false};
};
using MetricMap = std::unordered_map<std::string, std::vector<Metric>>;
using MetricIdMap = std::unordered_map<uint64_t, Metric>;
@@ -130,6 +138,12 @@ getPerfCountersIdMap();
**/
bool
checkValidMetric(const std::string& agent, const Metric& metric);
/**
* Set a custom counter definition
*/
rocprofiler_status_t
setCustomCounterDefinition(const CustomCounterDefinition& def);
} // namespace counters
} // namespace rocprofiler
@@ -34,6 +34,7 @@
#include "lib/rocprofiler-sdk/registration.hpp"
#include <rocprofiler-sdk/dispatch_counting_service.h>
#include <rocprofiler-sdk/experimental/counters.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
@@ -746,3 +747,65 @@ TEST(core, public_api_iterate_agents)
EXPECT_TRUE(from_api.empty());
}
}
TEST(core, check_load_counter_def_append)
{
const std::string test_yaml = R"(
TEST_YAML_LOAD:
architectures:
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9:
expression: reduce(GRBM_GUI_ACTIVE,max)*CU_NUM
description: 'Unit: cycles'
)";
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
registration::init_logging();
registration::set_init_status(-1);
context::push_client(1);
ROCPROFILER_CALL(
rocprofiler_load_counter_definition(
test_yaml.c_str(), test_yaml.size(), ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION),
"Could not load counter definition");
auto agents = hsa::get_queue_controller()->get_supported_agents();
for(const auto& [_, agent] : agents)
{
EXPECT_EQ(findDeviceMetrics(agent, {"TEST_YAML_LOAD"}).size(), 1);
}
}
TEST(core, check_load_counter_def)
{
const std::string test_yaml = R"(
GRBM_GUI_ACTIVE:
architectures:
gfx942/gfx941/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx940/gfx908/gfx900/gfx90a/gfx9:
block: GRBM
event: 2
description: The GUI is Active
TEST_YAML_LOAD:
architectures:
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9:
expression: reduce(GRBM_GUI_ACTIVE,max)
description: cycles
)";
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
registration::init_logging();
registration::set_init_status(-1);
context::push_client(1);
ROCPROFILER_CALL(rocprofiler_load_counter_definition(
test_yaml.c_str(), test_yaml.size(), ROCPROFILER_COUNTER_FLAG_NONE),
"Could not load counter definition");
auto agents = hsa::get_queue_controller()->get_supported_agents();
for(const auto& [_, agent] : agents)
{
// MAX_WAVE_SIZE should not be present
EXPECT_EQ(
findDeviceMetrics(agent, {"TEST_YAML_LOAD", "GRBM_GUI_ACTIVE", "MAX_WAVE_SIZE"}).size(),
2);
}
}
@@ -1,3 +1,4 @@
schema-version: 1
ALUStalledByLDS:
architectures:
gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9:
@@ -8,3 +8,4 @@ add_subdirectory(input3)
add_subdirectory(list_metrics)
add_subdirectory(kernel_filtering)
add_subdirectory(range_filtering)
add_subdirectory(extra_counters)
@@ -0,0 +1,47 @@
#
# rocprofv3 tool test
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-counter-collection
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py
input.txt extra_counters.yaml)
# pmc1 with extra counters
add_test(
NAME rocprofv3-test-counter-collection-txt-pmc1-extra-counters-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/input.txt -E
${CMAKE_CURRENT_BINARY_DIR}/extra_counters.yaml -T -d
${CMAKE_CURRENT_BINARY_DIR}/out_counter_collection_1_extra -o pmc1
--output-format csv -- $<TARGET_FILE:vector-ops>)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(cc-env-pmc1 "${PRELOAD_ENV}")
set_tests_properties(
rocprofv3-test-counter-collection-txt-pmc1-extra-counters-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${cc-env-pmc1}"
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
add_test(
NAME rocprofv3-test-counter-collection-pmc1-extra-counters-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/out_counter_collection_1_extra/pmc_1/pmc1_counter_collection.csv
)
set_tests_properties(
rocprofv3-test-counter-collection-pmc1-extra-counters-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
"rocprofv3-test-counter-collection-txt-pmc1-extra-counters-execute"
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
@@ -0,0 +1,31 @@
#!/usr/bin/env python3
import json
import pytest
import pandas as pd
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
def pytest_addoption(parser):
parser.addoption("--input", action="store", help="Path to csv file.")
parser.addoption(
"--json-input",
action="store",
help="Path to JSON file.",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
with open(filename, "r") as inp:
return pd.read_csv(inp)
@pytest.fixture
def json_data(request):
filename = request.config.getoption("--json-input")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
@@ -0,0 +1,5 @@
TEST_YAML_LOAD:
architectures:
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9:
expression: reduce(GRBM_GUI_ACTIVE,max)*CU_NUM
description: 'Unit: cycles'
@@ -0,0 +1 @@
pmc: TEST_YAML_LOAD
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
import sys
import pytest
import numpy as np
import pandas as pd
import re
kernel_list = sorted(
["addition_kernel", "subtract_kernel", "multiply_kernel", "divide_kernel"]
)
def unique(lst):
return list(set(lst))
def test_validate_counter_collection_pmc1_extra_counters(input_data: pd.DataFrame):
df = input_data
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert (df["Process_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
counter_collection_pmc1_kernel_list = [
x
for x in sorted(df["Kernel_Name"].unique().tolist())
if not re.search(r"__amd_rocclr_.*", x)
]
assert kernel_list == counter_collection_pmc1_kernel_list
kernel_count = dict([[itr, 0] for itr in kernel_list])
assert len(kernel_count) == len(kernel_list)
for itr in df["Kernel_Name"]:
if re.search(r"__amd_rocclr_.*", itr):
continue
kernel_count[itr] += 1
kn_cnt = [itr for _, itr in kernel_count.items()]
assert min(kn_cnt) == max(kn_cnt) and len(unique(kn_cnt)) == 1
assert len(df["Counter_Value"]) > 0
assert df["Counter_Name"].str.contains("TEST_YAML_LOAD").all()
assert (df["Counter_Value"].astype(int).values > 0).all()
di_list = df["Dispatch_Id"].astype(int).values.tolist()
di_uniq = sorted(df["Dispatch_Id"].unique().tolist())
# make sure the dispatch ids are unique and ordered
di_expect = [idx + 1 for idx in range(len(di_list))]
assert di_expect == di_uniq
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)