MI355X Support - PC Sampling and updating counter_defs.yaml (#206)
* Update mi350/gfx950 counter_defs.yaml (#131)
* Update gfx950 counter_defs.yaml
* Update F8 MFMA for gfx950
* Update counter_defs.yaml
* Update counter_defs.yaml
* add simd_util counter
* add new rdc ops gfx950
* Update counter_defs.yaml
* New mi350 CPC counters
* Update counter_defs.yaml
* New mi350 spi counters
* Update new mi350 sq counter_defs.yaml
* Update TA counter_defs.yaml
* Update TD GFX950counter_defs.yaml
* Update TCP gfx950 counter_defs.yaml
* Update new gfx950 tcc counter_defs.yaml
* Update TCP_PENDING_STALL_CYCLES counter_defs.yaml
* MI355X Host-Trap PC sampling Support (#130)
* Adding gfx12 to CU_NUM
* Add ELFABIVERSION_AMDGPU_HSA_V6
* add gfx950 to TEST_YAML_LOAD metric
* add gfx950 to append counters tests
* Updated CHANGELOG.md
---------
Co-authored-by: Kandula, Venkateshwar reddy <Venkateshwarreddy.Kandula@amd.com>
Co-authored-by: Indic, Vladimir <Vladimir.Indic@amd.com>
Co-authored-by: Bhardwaj, Gopesh <Gopesh.Bhardwaj@amd.com>
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
[ROCm/rocprofiler-sdk commit: 09c7d44cc4]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
ac6e512e25
Коммит
aef4f2f4c5
@@ -168,6 +168,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
### Added
|
||||
|
||||
- Added support for rocJPEG API Tracing
|
||||
- Added MI350X/MI355X support
|
||||
- Added rocprofiler_create_counter to allow for adding custom derived counters at runtime.
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -757,7 +757,7 @@ TEST(core, check_load_counter_def_append)
|
||||
const std::string test_yaml = R"(
|
||||
TEST_YAML_LOAD:
|
||||
architectures:
|
||||
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
gfx950/gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
expression: reduce(GRBM_GUI_ACTIVE,max)*CU_NUM
|
||||
description: 'Unit: cycles'
|
||||
)";
|
||||
@@ -784,13 +784,13 @@ TEST(core, check_load_counter_def)
|
||||
const std::string test_yaml = R"(
|
||||
GRBM_GUI_ACTIVE:
|
||||
architectures:
|
||||
gfx942/gfx941/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx940/gfx908/gfx900/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
gfx950/gfx942/gfx941/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx940/gfx908/gfx900/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
block: GRBM
|
||||
event: 2
|
||||
description: The GUI is Active
|
||||
TEST_YAML_LOAD:
|
||||
architectures:
|
||||
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
gfx950/gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
expression: reduce(GRBM_GUI_ACTIVE,max)
|
||||
description: cycles
|
||||
)";
|
||||
|
||||
+1564
-313
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
+10
@@ -238,6 +238,16 @@ is_pc_sampling_supported(const rocprofiler_agent_t* agent)
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx95") == 0)
|
||||
{
|
||||
// As I am not sure if the PCS IOCTL is going to be bumped for gfx950,
|
||||
// I introduced a separate branch for it.
|
||||
// We expect PC sampling IOCTL to be at least 0.3 for gfx950.
|
||||
if(pcs_ioctl_version.major_version > 0 || pcs_ioctl_version.minor_version >= 3)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The agent does not support PC sampling.
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
TEST_YAML_LOAD:
|
||||
architectures:
|
||||
gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
gfx950/gfx942/gfx10/gfx1010/gfx1030/gfx1031/gfx11/gfx1032/gfx1102/gfx906/gfx1100/gfx1101/gfx908/gfx90a/gfx9/gfx12/gfx1200/gfx1201:
|
||||
expression: reduce(GRBM_GUI_ACTIVE,max)*CU_NUM
|
||||
description: 'Unit: cycles'
|
||||
|
||||
+3
-1
@@ -39,7 +39,9 @@ def test_multi_agent_support(
|
||||
|
||||
mi2xx_mi3xx_agents_df = input_agent_info_csv[
|
||||
input_agent_info_csv["Name"].apply(
|
||||
lambda name: name == "gfx90a" or name.startswith("gfx94")
|
||||
lambda name: name == "gfx90a"
|
||||
or name.startswith("gfx94")
|
||||
or name.startswith("gfx95")
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user