Accum_vgpr support in Rocprofv3 (#70)
* output accumulate vgpr count
* fix logic for computing accum_vgpr
* add accum_vgpr to csv.
* accumulation vgpr's docs and support for rocprofv3
* CHANGELOG.md
---------
Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
Co-authored-by: Elwazir, Ammar <Ammar.Elwazir@amd.com>
[ROCm/rocprofiler-sdk commit: 6427fbafc2]
Tento commit je obsažen v:
odevzdal
GitHub
rodič
9874a65bea
revize
7fde16067f
@@ -157,6 +157,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
- Added usage documentation for MPI applications
|
||||
- SDK: `rocprofiler_agent_v0_t` support for agent UUIDs
|
||||
- SDK: `rocprofiler_agent_v0_t` support for agent visibility based on gpu isolation environment variables (`ROCR_VISIBLE_DEVICES`, etc.)
|
||||
- Accumulation VGPR support for rocprofv3.
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@@ -917,11 +917,11 @@ To collect counters for the kernels matching the filters specified in the preced
|
||||
rocprofv3 -i input.yml -- <application_path>
|
||||
|
||||
$ cat pass_1/312_counter_collection.csv
|
||||
"Correlation_Id","Dispatch_Id","Agent_Id","Queue_Id","Process_Id","Thread_Id","Grid_Size","Kernel_Name","Workgroup_Size","LDS_Block_Size","Scratch_Size","VGPR_Count","SGPR_Count","Counter_Name","Counter_Value","Start_Timestamp","End_Timestamp"
|
||||
4,4,1,1,36499,36499,1048576,"divide_kernel(float*, float const*, float const*, int, int)",64,0,0,12,16,"SQ_WAVES",16384,2228955885095594,2228955885119754
|
||||
8,8,1,2,36499,36499,1048576,"divide_kernel(float*, float const*, float const*, int, int)",64,0,0,12,16,"SQ_WAVES",16384,2228955885095594,2228955885119754
|
||||
12,12,1,3,36499,36499,1048576,"divide_kernel(float*, float const*, float const*, int, int)",64,0,0,12,16,"SQ_WAVES",16384,2228955892986914,2228955893006114
|
||||
16,16,1,4,36499,36499,1048576,"divide_kernel(float*, float const*, float const*, int, int)",64,0,0,12,16,"SQ_WAVES",16384,2228955892986914,2228955893006114
|
||||
"Correlation_Id","Dispatch_Id","Agent_Id","Queue_Id","Process_Id","Thread_Id","Grid_Size","Kernel_Id","Kernel_Name","Workgroup_Size","LDS_Block_Size","Scratch_Size","VGPR_Count","Accum_VGPR_Count","SGPR_Count","Counter_Name","Counter_Value","Start_Timestamp","End_Timestamp"
|
||||
1,1,4,1,225049,225049,1048576,10,"void addition_kernel<float>(float*, float const*, float const*, int, int)",64,0,0,8,0,16,"SQ_WAVES",16384.000000,317095766765717,317095766775957
|
||||
2,2,4,1,225049,225049,1048576,13,"subtract_kernel(float*, float const*, float const*, int, int)",64,0,0,8,0,16,"SQ_WAVES",16384.000000,317095767013157,317095767022957
|
||||
3,3,4,1,225049,225049,1048576,11,"multiply_kernel(float*, float const*, float const*, int, int)",64,0,0,8,0,16,"SQ_WAVES",16384.000000,317095767176998,317095767186678
|
||||
4,4,4,1,225049,225049,1048576,12,"divide_kernel(float*, float const*, float const*, int, int)",64,0,0,12,4,16,"SQ_WAVES",16384.000000,317095767380718,317095767390878
|
||||
|
||||
|
||||
I/O control options
|
||||
@@ -1088,7 +1088,10 @@ The following table lists the various fields or the columns in the output CSV fi
|
||||
- Kernel's Scalar General Purpose Register (SGPR) count.
|
||||
|
||||
* - VGPR_Count
|
||||
- Kernel's Vector General Purpose Register (VGPR) count.
|
||||
- Kernel's Architected Vector General Purpose Register (VGPR) count.
|
||||
|
||||
* - Accum_VGPR_Count
|
||||
- Kernel's Accumulation Vector General Purpose Register (Accum_VGPR/AGPR) count.
|
||||
|
||||
Output formats
|
||||
----------------
|
||||
@@ -1313,7 +1316,8 @@ Here are the properties of the JSON output schema:
|
||||
- **`handle`** *(integer, required)*: Handle of the counter.
|
||||
- **`value`** *(number, required)*: Value of the counter.
|
||||
- **`thread_id`** *(integer, required)*: Thread ID.
|
||||
- **`arch_vgpr_count`** *(integer, required)*: Count of VGPRs.
|
||||
- **`arch_vgpr_count`** *(integer, required)*: Count of Architected VGPRs.
|
||||
- **`accum_vgpr_count`** *(integer, required)*: Count of Accumulation VGPRs.
|
||||
- **`sgpr_count`** *(integer, required)*: Count of SGPRs.
|
||||
- **`lds_block_size_v`** *(integer, required)*: Size of LDS block.
|
||||
- **``pc_sample_host_trap``** *(array)*: Host Trap PC Sampling records.
|
||||
|
||||
@@ -102,7 +102,7 @@ struct csv_encoder
|
||||
using api_csv_encoder = csv_encoder<7>;
|
||||
using agent_info_csv_encoder = csv_encoder<53>;
|
||||
using kernel_trace_csv_encoder = csv_encoder<18>;
|
||||
using counter_collection_csv_encoder = csv_encoder<18>;
|
||||
using counter_collection_csv_encoder = csv_encoder<19>;
|
||||
using memory_copy_csv_encoder = csv_encoder<7>;
|
||||
using memory_allocation_csv_encoder = csv_encoder<8>;
|
||||
using marker_csv_encoder = csv_encoder<7>;
|
||||
|
||||
@@ -573,6 +573,7 @@ generate_csv(const output_config& cfg,
|
||||
"LDS_Block_Size",
|
||||
"Scratch_Size",
|
||||
"VGPR_Count",
|
||||
"Accum_VGPR_Count",
|
||||
"SGPR_Count",
|
||||
"Counter_Name",
|
||||
"Counter_Value",
|
||||
@@ -621,6 +622,7 @@ generate_csv(const output_config& cfg,
|
||||
lds_block_size_v,
|
||||
record.dispatch_data.dispatch_info.private_segment_size,
|
||||
kernel_info->arch_vgpr_count,
|
||||
kernel_info->accum_vgpr_count,
|
||||
kernel_info->sgpr_count,
|
||||
counter_id_to_name.at(counter_id),
|
||||
counter_value,
|
||||
|
||||
+5
-4
@@ -244,10 +244,11 @@ accum_vgpr_count(std::string_view name, kernel_descriptor_t kernel_code)
|
||||
if(name == "gfx908")
|
||||
return arch_vgpr_count(name, kernel_code);
|
||||
else if(name == "gfx90a" || name.find("gfx94") == 0)
|
||||
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
|
||||
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
|
||||
1) *
|
||||
(8 - arch_vgpr_count(name, kernel_code));
|
||||
return ((AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
|
||||
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
|
||||
1) *
|
||||
8) -
|
||||
arch_vgpr_count(name, kernel_code);
|
||||
|
||||
bool emplaced = false;
|
||||
{
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele