Accum_vgpr support in Rocprofv3 (#70)
* output accumulate vgpr count * fix logic for computing accum_vgpr * add accum_vgpr to csv. * accumulation vgpr's docs and support for rocprofv3 * CHANGELOG.md --------- Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com> Co-authored-by: Elwazir, Ammar <Ammar.Elwazir@amd.com>
This commit is contained in:
committed by
GitHub
parent
075d36eb82
commit
6427fbafc2
@@ -244,10 +244,11 @@ accum_vgpr_count(std::string_view name, kernel_descriptor_t kernel_code)
|
||||
if(name == "gfx908")
|
||||
return arch_vgpr_count(name, kernel_code);
|
||||
else if(name == "gfx90a" || name.find("gfx94") == 0)
|
||||
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
|
||||
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
|
||||
1) *
|
||||
(8 - arch_vgpr_count(name, kernel_code));
|
||||
return ((AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
|
||||
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
|
||||
1) *
|
||||
8) -
|
||||
arch_vgpr_count(name, kernel_code);
|
||||
|
||||
bool emplaced = false;
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user