Adding GPU index as a parameter for ATT (#547)
* Adding GPU index as a parameter for ATT
* Tidy fix
* Using tokenize
* Update tests/rocprofv3/advanced-thread-trace/CMakeLists.txt
Co-authored-by: Indic, Vladimir <Vladimir.Indic@amd.com>
* Update tests/rocprofv3/advanced-thread-trace/CMakeLists.txt
* Adding error logging. Using idx instead of id.
---------
Co-authored-by: Giovanni <gbaraldi@amd.com>
Co-authored-by: Indic, Vladimir <Vladimir.Indic@amd.com>
[ROCm/rocprofiler-sdk commit: fd6f96ffb5]
This commit is contained in:
committato da
GitHub
parent
6b2a4fcfc2
commit
6a6b16be93
@@ -781,6 +781,13 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
type=str,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-gpu-index",
|
||||
help="Comma-separated list of GPU index(es) to enable thread trace. Default: All",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-perfcounters",
|
||||
help="(gfx9) List of performance counters, and optionally their SIMD mask.",
|
||||
@@ -1553,6 +1560,12 @@ def run(app_args, args, **kwargs):
|
||||
args.att_serialize_all,
|
||||
overwrite=True,
|
||||
)
|
||||
if args.att_gpu_index:
|
||||
update_env(
|
||||
"ROCPROF_ATT_PARAM_GPU_INDEX",
|
||||
args.att_gpu_index,
|
||||
overwrite=True,
|
||||
)
|
||||
if check_att_capability(args):
|
||||
update_env(
|
||||
"ROCPROF_ATT_LIBRARY_PATH",
|
||||
|
||||
@@ -102,6 +102,9 @@ The following table lists the parameters relevant to thread tracing:
|
||||
| | | | | Shorthand for att-perfcounter-ctrl and the att-perfcounters |
|
||||
| | | | | related to compute unit activity such as VALU, SALU, etc. |
|
||||
+--------------------------+---------+---------+-----------+--------------------------------------------------------------+
|
||||
| att-gpu-index | Integer | | | Comma-separated list of integers. If enabled, only the GPU |
|
||||
| | (List) | | | indexes in the list will be profiled by thread trace. |
|
||||
+--------------------------+---------+---------+-----------+--------------------------------------------------------------+
|
||||
|
||||
For AMD Instinct accelerators, enable perfmon streaming using:
|
||||
|
||||
|
||||
@@ -151,6 +151,7 @@ struct config : output_config
|
||||
std::string pc_sampling_unit = get_env("ROCPROF_PC_SAMPLING_UNIT", "none");
|
||||
std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", "");
|
||||
std::string att_library_path = get_env("ROCPROF_ATT_LIBRARY_PATH", "");
|
||||
std::string att_gpu_index = get_env("ROCPROF_ATT_PARAM_GPU_INDEX", "");
|
||||
|
||||
std::unordered_set<size_t> kernel_filter_range = {};
|
||||
std::vector<std::set<std::string>> counters = {};
|
||||
|
||||
@@ -1941,10 +1941,28 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
ROCP_FATAL << "ATT Perf requires setting both perfcounter_ctrl and perfcounter list!";
|
||||
}
|
||||
|
||||
auto gpu_idx_set = std::set<uint64_t>{};
|
||||
|
||||
for(const auto& entry :
|
||||
rocprofiler::sdk::parse::tokenize(tool::get_config().att_gpu_index, ","))
|
||||
{
|
||||
try
|
||||
{
|
||||
gpu_idx_set.insert(std::stoi(entry));
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
ROCP_FATAL << "Invalid GPU Id string: " << entry << " - " << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
const auto selecting_by_gpuid = !gpu_idx_set.empty();
|
||||
|
||||
for(auto& [id, agent] : tool_metadata->agents_map)
|
||||
{
|
||||
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
|
||||
if(selecting_by_gpuid && gpu_idx_set.erase(agent.gpu_index) == 0) continue;
|
||||
|
||||
auto agent_params = global_parameters;
|
||||
for(auto& counter : get_att_perfcounter_params(id, att_perf))
|
||||
agent_params.push_back(counter);
|
||||
@@ -1959,6 +1977,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
tool_data),
|
||||
"thread trace service configure");
|
||||
}
|
||||
|
||||
// Any agent not removed by above loop was not in the agents_map list
|
||||
for(const auto& entry : gpu_idx_set)
|
||||
ROCP_ERROR << "Invalid GPU Device Index: " << entry;
|
||||
}
|
||||
|
||||
if(tool::get_config().counter_collection)
|
||||
|
||||
@@ -253,3 +253,31 @@ set_tests_properties(
|
||||
rocprofv3-test-att-plus-pc-sampling
|
||||
PROPERTIES TIMEOUT 90 LABELS "integration-tests;thread-trace;pc-sampling" DISABLED
|
||||
${ATT_PLUS_PCS_DISABLE})
|
||||
|
||||
# Trace two GPUs
|
||||
add_test(NAME rocprofv3-test-att-gpu-index-two-gpus
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input -o
|
||||
out --att-gpu-index 0,1 -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-gpu-index-two-gpus
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
|
||||
|
||||
# No machine has gpu index = 9999
|
||||
add_test(NAME rocprofv3-test-att-gpu-index-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input -o
|
||||
out --att-gpu-index 0,9999 -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
# The FAIL regex for this test is the ROCP_ERROR specific to invalid device index
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-gpu-index-will-fail
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DISABLED
|
||||
${IS_DISABLED}
|
||||
WILL_FAIL
|
||||
True
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"Invalid GPU Device Index")
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user