diff --git a/projects/rocr-runtime/include/hsakmttypes.h b/projects/rocr-runtime/include/hsakmttypes.h index fe3d0b220e..656e85d707 100644 --- a/projects/rocr-runtime/include/hsakmttypes.h +++ b/projects/rocr-runtime/include/hsakmttypes.h @@ -876,6 +876,11 @@ typedef struct _HSA_UUID HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ, 0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8); +// HSA_UUID that identifies the GPU TextureCache (TCA) block +// {333e393f-e147-4f49-a6d1-60914c7086b0} +HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCA, +0x333e393f, 0xe147, 0x4f49, 0xa6, 0xd1,0x60, 0x91, 0x4c, 0x70, 0x86, 0xb0); + // GUID that identifies the GPU Memory Controller (MC) block // {13900B57-4956-4D98-81D0-68521937F59C} HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC, diff --git a/projects/rocr-runtime/src/perfctr.c b/projects/rocr-runtime/src/perfctr.c index ce991dd228..dd82e2114c 100644 --- a/projects/rocr-runtime/src/perfctr.c +++ b/projects/rocr-runtime/src/perfctr.c @@ -180,6 +180,9 @@ static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid) case PERFCOUNTER_BLOCKID__SQ: *uuid = HSA_PROFILEBLOCK_AMD_SQ; break; + case PERFCOUNTER_BLOCKID__TCA: + *uuid = HSA_PROFILEBLOCK_AMD_TCA; + break; case PERFCOUNTER_BLOCKID__IOMMUV2: *uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2; break; diff --git a/projects/rocr-runtime/src/pmc_table.c b/projects/rocr-runtime/src/pmc_table.c index 1e184cb4cb..92c363bfa1 100644 --- a/projects/rocr-runtime/src/pmc_table.c +++ b/projects/rocr-runtime/src/pmc_table.c @@ -49,6 +49,11 @@ static uint32_t gfx7_sq_counter_ids[] = { 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250 }; +static uint32_t gfx7_tca_counter_ids[] = { +1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, +23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38 +}; + /* Unused counters - 166, 292 - 297 */ static uint32_t gfx8_sq_counter_ids[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, @@ -70,6 +75,11 @@ static uint32_t gfx8_sq_counter_ids[] = { 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 298 }; +static uint32_t gfx8_tca_counter_ids[] = { +1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, +23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34 +}; + /* Polaris 10/11 have the same SQ cpunter IDs but different from other gfx8's. */ /* Unused counters - 167 and 275 are *_DUMMY_LAST */ static uint32_t gfx8_pl_sq_counter_ids[] = { @@ -135,6 +145,17 @@ static struct perf_counter_block carrizo_blocks[PERFCOUNTER_BLOCKID__MAX] = { .counter_size_in_bits = 64, .counter_mask = BITMASK(64) }, + [PERFCOUNTER_BLOCKID__TCA] = { + /* PMC0: PERF_SEL~PERF_SEL3, PMC1: PERF_SEL~PERF_SEL3, PMC2: PERF_SEL + * PMC3: PERF_SEL. So 10 PERF_SELs in total + */ + .num_of_slots = 10, + .num_of_counters = sizeof(gfx8_tca_counter_ids) / + sizeof(*gfx8_tca_counter_ids), + .counter_ids = gfx8_tca_counter_ids, + .counter_size_in_bits = 64, + .counter_mask = BITMASK(64) + }, }; static struct perf_counter_block fiji_blocks[PERFCOUNTER_BLOCKID__MAX] = { @@ -146,6 +167,14 @@ static struct perf_counter_block fiji_blocks[PERFCOUNTER_BLOCKID__MAX] = { .counter_size_in_bits = 64, .counter_mask = BITMASK(64) }, + [PERFCOUNTER_BLOCKID__TCA] = { + .num_of_slots = 10, /* same as CZ */ + .num_of_counters = sizeof(gfx8_tca_counter_ids) / + sizeof(*gfx8_tca_counter_ids), + .counter_ids = gfx8_tca_counter_ids, + .counter_size_in_bits = 64, + .counter_mask = BITMASK(64) + }, }; static struct perf_counter_block hawaii_blocks[PERFCOUNTER_BLOCKID__MAX] = { @@ -157,6 +186,14 @@ static struct perf_counter_block hawaii_blocks[PERFCOUNTER_BLOCKID__MAX] = { .counter_size_in_bits = 64, .counter_mask = BITMASK(64) }, + [PERFCOUNTER_BLOCKID__TCA] = { + .num_of_slots = 10, /* same as CZ */ + .num_of_counters = sizeof(gfx7_tca_counter_ids) / + sizeof(*gfx7_tca_counter_ids), + .counter_ids = gfx7_tca_counter_ids, + .counter_size_in_bits = 64, + .counter_mask = BITMASK(64) + }, }; static struct perf_counter_block polaris_blocks[PERFCOUNTER_BLOCKID__MAX] = { @@ -168,6 +205,14 @@ static struct perf_counter_block polaris_blocks[PERFCOUNTER_BLOCKID__MAX] = { .counter_size_in_bits = 64, .counter_mask = BITMASK(64) }, + [PERFCOUNTER_BLOCKID__TCA] = { + .num_of_slots = 10, /* same as CZ */ + .num_of_counters = sizeof(gfx8_tca_counter_ids) / + sizeof(*gfx8_tca_counter_ids), + .counter_ids = gfx8_tca_counter_ids, + .counter_size_in_bits = 64, + .counter_mask = BITMASK(64) + }, }; static struct perf_counter_block vega_blocks[PERFCOUNTER_BLOCKID__MAX] = { @@ -179,6 +224,15 @@ static struct perf_counter_block vega_blocks[PERFCOUNTER_BLOCKID__MAX] = { .counter_size_in_bits = 64, .counter_mask = BITMASK(64) }, + [PERFCOUNTER_BLOCKID__TCA] = { + .num_of_slots = 10, /* same as Fiji */ + /* Greenland has the same TCA counter IDs with Fiji */ + .num_of_counters = sizeof(gfx8_tca_counter_ids) / + sizeof(*gfx8_tca_counter_ids), + .counter_ids = gfx8_tca_counter_ids, + .counter_size_in_bits = 64, + .counter_mask = BITMASK(64) + }, }; /* Current APUs only have one IOMMU. If NUMA is introduced to APUs, we'll need diff --git a/projects/rocr-runtime/src/pmc_table.h b/projects/rocr-runtime/src/pmc_table.h index 2aa1002bdb..c6d16ad47e 100644 --- a/projects/rocr-runtime/src/pmc_table.h +++ b/projects/rocr-runtime/src/pmc_table.h @@ -31,6 +31,7 @@ enum perf_block_id { PERFCOUNTER_BLOCKID__FIRST = 0, PERFCOUNTER_BLOCKID__SQ = PERFCOUNTER_BLOCKID__FIRST, + PERFCOUNTER_BLOCKID__TCA, PERFCOUNTER_BLOCKID__IOMMUV2, PERFCOUNTER_BLOCKID__MAX };