SWDEV-499989: Conversion Script to change counter collection output format from v3 to v1 (#107)

* SWDEV-499989: Add script to convert rocprofv3 counter collection output format to that of v1

* Add logging and argparsing

* Dropping duplicated counters in pmc multiple lines

* Adding test for conversion

* moving conversion script to test files

* copy conversion script from scripts folder

[ROCm/rocprofiler-sdk commit: c77596b703]
Tento commit je obsažen v:
Jakaraddi, Manjunath
2025-02-12 11:31:17 -08:00
odevzdal GitHub
rodič 3a26de9e53
revize 0608bbb4db
7 změnil soubory, kde provedl 314 přidání a 0 odebrání
+156
Zobrazit soubor
@@ -0,0 +1,156 @@
#!/usr/bin/env python3
import os
import pandas as pd
import argparse
import logging
def read_csv(file_path):
df = pd.DataFrame()
try:
df = pd.read_csv(file_path)
except Exception as e:
logging.info(f"Error reading {file_path}: {e}")
raise
return df
def get_counter_collection_files(root_path):
file_paths = []
for root, _, files in os.walk(root_path):
if "pmc_" in root:
for file in files:
if file.endswith("counter_collection.csv"):
file_path = os.path.join(root, file)
file_paths.append(file_path)
return file_paths
def get_combined_df(args):
files_list = []
for input in args.input:
if os.path.isfile(input):
files_list.append(input)
elif os.path.isdir(input):
files_list.extend(get_counter_collection_files(input))
if not files_list:
raise ValueError("Valid Input files not found")
logging.info(f"Processing files: {files_list}")
combined_df = pd.DataFrame()
for file in files_list:
combined_df = pd.concat([combined_df, read_csv(file)], ignore_index=True)
return combined_df
def write_to_file(df, args):
logging.info(f"Saving output file to : {args.output}")
directory, file_path = os.path.split(args.output)
if directory:
os.makedirs(directory, exist_ok=True)
df.to_csv(args.output, index=False)
def main(args):
logging.basicConfig(level=args.loglevel)
input_df = get_combined_df(args)
# Validate
columns = [
"Correlation_Id",
"Dispatch_Id",
"Agent_Id",
"Queue_Id",
"Process_Id",
"Thread_Id",
"Grid_Size",
"Kernel_Id",
"Kernel_Name",
"Workgroup_Size",
"LDS_Block_Size",
"Scratch_Size",
"VGPR_Count",
"SGPR_Count",
"Counter_Name",
"Counter_Value",
"Start_Timestamp",
"End_Timestamp",
]
for col in input_df.columns:
if col not in columns:
logging.debug(f"Unexpected column {col} found in rocprofv3 input file")
non_index_columns = [
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp",
"Process_Id",
"Thread_Id",
"Kernel_Id",
]
# Convert
indexes = [
"Dispatch_Id",
"Agent_Id",
"Grid_Size",
"Kernel_Name",
"LDS_Block_Size",
"Queue_Id",
"SGPR_Count",
"Scratch_Size",
"VGPR_Count",
"Workgroup_Size",
]
# Drop duplicate counters in multiple PMC lines
input_df.drop_duplicates(
subset=indexes + ["Counter_Name"], keep="first", inplace=True
)
pivoted_data = input_df.pivot_table(
index=indexes, columns="Counter_Name", values="Counter_Value", aggfunc="sum"
).reset_index()
# Save
write_to_file(pivoted_data, args)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--input",
help="Rocprofv3 Counter Collection input files and/or directories containing `*counter_collection.csv` files",
nargs="+",
default=[],
required=True,
)
parser.add_argument(
"-o",
"--output",
help="Rocprofv1 formatted output file name",
default=None,
type=str,
required=True,
)
parser.add_argument(
"-d",
"--debug",
help="Debug Logs",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.WARNING,
)
parser.add_argument(
"-v",
"--verbose",
help="Verbose Logs",
action="store_const",
dest="loglevel",
const=logging.INFO,
)
return parser.parse_args()
if __name__ == "__main__":
main(parse_args())
+1
Zobrazit soubor
@@ -68,6 +68,7 @@ add_subdirectory(thread-trace)
add_subdirectory(pc_sampling)
add_subdirectory(hip-graph-tracing)
add_subdirectory(counter-collection)
add_subdirectory(conversion-script)
if(ROCPROFILER_BUILD_ROCDECODE_TESTS)
add_subdirectory(rocdecode)
endif()
+57
Zobrazit soubor
@@ -0,0 +1,57 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-conversion-script
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
# copy to binary directory
rocprofiler_configure_pytest_files(COPY validate.py input.txt conftest.py
CONFIG pytest.ini)
configure_file(${CMAKE_SOURCE_DIR}/source/scripts/convert-counters-collection-format.py
${CMAKE_CURRENT_BINARY_DIR}/convert-counters-collection-format.py COPYONLY)
add_test(
NAME test-conversion-script-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/input.txt -T -d
${CMAKE_CURRENT_BINARY_DIR}/out_conversion_script -o pmc1 --output-format csv --
$<TARGET_FILE:vector-ops>)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(cc-env-pmc "${PRELOAD_ENV}")
set_tests_properties(
test-conversion-script-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${cc-env-pmc}"
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
add_test(
NAME test-conversion-script-convert
COMMAND
${Python3_EXECUTABLE} convert-counters-collection-format.py --input
${CMAKE_CURRENT_BINARY_DIR}/out_conversion_script --output
${CMAKE_CURRENT_BINARY_DIR}/out_conversion_script/converted.csv)
set_tests_properties(
test-conversion-script-convert
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
test-conversion-script-execute FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
add_test(NAME test-conversion-script-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/out_conversion_script/converted.csv)
set_tests_properties(
test-conversion-script-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
test-conversion-script-convert FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
+38
Zobrazit soubor
@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import json
import pytest
import pandas as pd
def pytest_addoption(parser):
parser.addoption("--input", action="store", help="Path to csv file.")
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
with open(filename, "r") as inp:
return pd.read_csv(inp)
+2
Zobrazit soubor
@@ -0,0 +1,2 @@
pmc: SQ_WAVES
pmc: GRBM_GUI_ACTIVE
+5
Zobrazit soubor
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
+55
Zobrazit soubor
@@ -0,0 +1,55 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import sys
import pytest
import numpy as np
import pandas as pd
import re
kernel_list = sorted(
["addition_kernel", "subtract_kernel", "multiply_kernel", "divide_kernel"]
)
counters_list = ["SQ_WAVES", "GRBM_GUI_ACTIVE"]
def test_validate_counter_collection_pmc1(input_data: pd.DataFrame):
df = input_data
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
for counter in counters_list:
assert counter in df.columns.tolist()
for counter in counters_list:
for itr in df[counter].values:
assert itr > 0
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)