diff --git a/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h b/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h index 9cea346493..58626b60c5 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h +++ b/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h @@ -184,7 +184,1287 @@ static const std::unordered_map", - "TCP stalls TA data interface. Now Windowed."}}}}; + "TCP stalls TA data interface. Now Windowed."}, + {"SQ_WAVES_LT_32", + "SQ", + "9", + "", + "Count number of waves sent <32 active threads sent to SQs. " + "This value represents the number of waves that an each individual SIMD has enqueued during " + "the collection timeframe (for dispatch profiling this is the timeframe of kernel execution, " + "for agent profiling it is the timeframe between start_context and read counter data) with " + "less than 32 threads. A sum of all SQ_WAVES_LT_32 values will give the total number of " + "waves with 32 threads enqueued during the collection timeframe by the application. " + "Returns one value per-SE (aggregates of SIMD values). " + "Useful for checking for wavefront occupancy."}, + {"TCC_ALL_TC_OP_WB_WRITEBACK", + "TCC", + "73", + "", + "Number of writebacks due to all TC_OP writeback requests."}, + {"GRBM_UTCL2_BUSY", + "GRBM", + "34", + "", + "The Unified Translation Cache Level-2 (UTCL2) block is busy."}, + {"SPI_RA_SGPR_SIMD_FULL_CSN", + "SPI", + "115", + "", + "Sum of SIMD where SGPR can't take csn wave when !fits. Source is RA0"}, + {"TCP_TCC_NC_ATOMIC_REQ", + "TCP", + "77", + "", + "Total atomic requests with NC mtype from this TCP to all TCCs"}, + {"SQC_DCACHE_HITS", + "SQ", + "291", + "", + "Number of cache hits. (per-SQ, per-Bank, nondeterministic)"}, + {"CPC_CPC_TCIU_IDLE", + "CPC", + "29", + "", + "CPC TCIU interface Idle."}, + {"SPI_CSN_WAVE", + "SPI", + "52", + "", + "Number of waves. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, " + "DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; " + "DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"}, + {"SQ_INST_CYCLES_VMEM_WR", + "SQ", + "77", + "", + "The number of cycles needed to send addr and cmd data for VMEM write instructions. " + "This value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis " + "with units in quad-cycles(4 cycles)."}, + {"TCC_EA0_WR_UNCACHED_32B", + "TCC", + "29", + "", + "Number of 32-byte write/atomic going over the TC_EA_wrreq interface due to uncached " + "traffic. Note that CC mtypes can produce uncached requests, and those are included " + "in this. A 64-byte request will be counted as 2"}, + {"TA_ADDR_STALLED_BY_TD_CYCLES", + "TA", + "55", + "", + "Number of cycles addr path stalled by TD. Perf_Windowing not supported for this " + "counter."}, + {"TCP_TOTAL_WRITEBACK_INVALIDATES", + "TCP", + "45", + "", + "Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ " + "TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ " + "TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed."}, + {"TCC_PROBE_ALL", + "TCC", + "10", + "", + "Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable."}, + {"TCC_CC_REQ", + "TCC", + "7", + "", + "The number of coherently cached requests. This is measured at the tag block."}, + {"SPI_RA_REQ_NO_ALLOC_CSN", + "SPI", + "85", + "", + "Arb cycles with CSn req and no CSn alloc. Source is RA0"}, + {"CPC_ME1_DC0_SPI_BUSY", + "CPC", + "33", + "", + "CPC Me1 Processor Busy."}, + {"SQ_WAVES_RESTORED", + "SQ", + "159", + "", + "Count number of context-restored waves sent to SQs. This value represents the number " + "of waves whos current register state has been restored from a register bank during " + "the collection timeframe (for dispatch profiling this is the timeframe of kernel " + "execution, for agent profiling it is the timeframe between start_context and read " + "counter data). Context saving/restoring is a slow operation and should be limited. " + "High values can also indicate that stalling may be taking place (waiting for free " + "register space). Returns one value per-SE (aggregates of SIMD values)."}, + {"CPF_CPF_TCIU_IDLE", + "CPF", + "27", + "", + "CPF TCIU interface Idle."}, + {"TCP_TCC_ATOMIC_WITH_RET_REQ", + "TCP", + "71", + "", + "Total atomic with return requests from TCP to all TCCs"}, + {"SQC_DCACHE_REQ_READ_8", + "SQ", + "326", + "", + "Number of constant cache 8 dw read requests. (per-SQ)"}, + {"TCC_STREAMING_REQ", + "TCC", + "4", + "", + "Number of streaming requests. This is measured at the tag block."}, + {"SQ_INSTS_SMEM_NORM", + "SQ", + "161", + "", + "Number of SMEM instructions issued normalized to match the level of memory accessed " + "(i.e. scratch, global, etc). This normalized value is designed to give a hint of " + "high cost memory actions being used. The formula used to calculate this value is " + "the following (INST_COUNT *2 for load/store; INST_COUNT*2 atomic; INST_COUNT*2 " + "memtime; INST_COUNT*4 wb/inv). This value is returned per-SE (aggregate of values " + "in SIMDs in the SE)."}, + {"SQC_ICACHE_MISSES", + "SQ", + "272", + "", + "Number of cache misses, includes uncached requests. (per-SQ, per-Bank, " + "nondeterministic)"}, + {"SQ_WAVES_LT_64", + "SQ", + "7", + "", + "Count number of waves with <64 active threads sent to SQs. This value represents " + "the number of waves that an each individual SIMD has enqueued during the collection " + "timeframe (for dispatch profiling this is the timeframe of kernel execution, for " + "agent profiling it is the timeframe between start_context and read counter data) " + "with less than 64 threads. A sum of all SQ_WAVES_LT_64 values will give the total " + "number of waves with 64 threads enqueued during the collection timeframe by the " + "application. Returns one value per-SE (aggregates of SIMD values). Useful for " + "checking for wavefront occupancy."}, + {"TCP_TCC_NC_WRITE_REQ", + "TCP", + "76", + "", + "Total write requests with NC mtype from this TCP to all TCCs"}, + {"SQ_LDS_ATOMIC_RETURN", + "SQ", + "98", + "", + "The number of atomic return cycles in LDS (local data store). This value is returned " + "on a per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"CPF_CMP_UTCL1_STALL_ON_TRANSLATION", + "CPF", + "20", + "", + "One of the Compute UTCL1s is stalled waiting on translation, XNACK or PENDING " + "response."}, + {"TCC_CYCLE", + "TCC", + "1", + "", + "Number of cycles. Not windowable."}, + {"TCP_GATE_EN2", + "TCP", + "1", + "", + "TCP core clocks are turned on. Not Windowed."}, + {"TCC_WRITEBACK", + "TCC", + "22", + "", + "Number of lines written back to main memory. This includes writebacks of dirty lines " + "and uncached write/atomic requests."}, + {"SPI_CSN_WINDOW_VALID", + "SPI", + "47", + "", + "Clock count enabled by perfcounter_start event. Requires " + "SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is CS1; " + "DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; default, " + "source is CS0;"}, + {"SQ_ACCUM_PREV_HIRES", + "SQ", + "158", + "", + "This is a hardware register that can be used for accumulating values for other " + "counters. This is useful in expressions where you want to integrate over time. " + "This counter is primarily for use with derived counters supplied by rocprof."}, + {"TCP_TCC_UC_WRITE_REQ", + "TCP", + "79", + "", + "Total write requests with UC mtype from this TCP to all TCCs"}, + {"TCP_UTCL1_TRANSLATION_MISS", + "TCP", + "48", + "", + "Total utcl1 translation misses"}, + {"GRBM_TA_BUSY", + "GRBM", + "13", + "", + "Any of the Texture Pipes (TA) are busy in the shader engine(s)."}, + {"TCC_EA0_ATOMIC_LEVEL", + "TCC", + "37", + "", + "The sum of the number of EA atomics in flight. This is primarily meant for measure " + "average EA atomic latency. Average atomic latency = " + "TCC_PERF_SEL_EA_WRREQ_ATOMIC_LEVEL/TCC_PERF_SEL_EA_WRREQ_ATOMIC."}, + {"SQ_IFETCH_LEVEL", + "SQ", + "89", + "", + "Number of inflight instruction fetch requests from the cache. This is a value " + "returned per-sharder engine. Best used with accumlate() functions as part of a " + "derived counter."}, + {"TCC_RW_REQ", + "TCC", + "8", + "", + "The number of RW requests. This is measured at the tag block."}, + {"TCP_TOTAL_WRITE", + "TCP", + "32", + "", + "Total number of local write pixels/buffers from TA. Equals " + "TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE+ TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE"}, + {"TCP_TOTAL_ACCESSES", + "TCP", + "29", + "", + "Total number of pixels/buffers from TA. Equals " + "TCP_PERF_SEL_TOTAL_READ+TCP_PERF_SEL_TOTAL_NONREAD"}, + {"SQC_DCACHE_REQ_READ_16", + "SQ", + "327", + "", + "Number of constant cache 16 dw read requests. (per-SQ)"}, + {"SQ_WAIT_ANY", + "SQ", + "58", + "", + "Number of wave-cycles spent waiting for anything (per-simd, nondeterministic). " + "Units in quad-cycles(4 cycles)"}, + {"SQ_CYCLES", + "SQ", + "2", + "", + "Clock cycles. Value is returned per-SIMD."}, + {"GRBM_SPI_BUSY", + "GRBM", + "11", + "", + "Any of the Shader Pipe Interpolators (SPI) are busy in the shader engine(s)."}, + {"SQ_INSTS_MFMA", + "SQ", + "27", + "", + "Total number of MFMA (Matrix-Fused-Multiply-Add) instructions issued. This value is " + "returned per-SE (aggregate of values in SIMDs in the SE). See AMD ISAs for more " + "information on MFMA instructions."}, + {"GRBM_CP_BUSY", + "GRBM", + "3", + "", + "Any of the Command Processor (CPG/CPC/CPF) blocks are busy."}, + {"SQ_ACCUM_PREV", + "SQ", + "1", + "", + "This is a hardware register that can be used for accumulating values for other " + "counters. This is useful in expressions where you want to integrate over time. " + "Only accumulates once every 4 cycles. This counter is primarily for use with " + "derived counters supplied by rocprof."}, + {"TCP_TCC_CC_WRITE_REQ", + "TCP", + "82", + "", + "Total write requests with CC mtype from this TCP to all TCCs"}, + {"SPI_CSN_NUM_THREADGROUPS", + "SPI", + "49", + "", + "Number of threadgroups launched. Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select " + "source, DEBUG_PIPE_SEL = 1, source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; " + "DEBUG_PIPE_SEL = 3, source is CS3; default, source is CS0;"}, + {"TCP_TCC_NC_READ_REQ", + "TCP", + "75", + "", + "Total read requests with NC mtype from this TCP to all TCCs"}, + {"TCP_TD_TCP_STALL_CYCLES", + "TCP", + "7", + "", + "TD stalls TCP"}, + {"SQ_INSTS_SENDMSG", + "SQ", + "40", + "", + "Total number of Sendmsg (typically an interrupt to the CPU host) instructions " + "issued. This value is returned per-SE (aggregate of values in SIMDs in the SE). " + "See AMD ISAs for more information on Sendmsg instructions."}, + {"TA_ADDR_STALLED_BY_TC_CYCLES", + "TA", + "54", + "", + "Number of cycles addr path stalled by TC. Perf_Windowing not supported for this " + "counter."}, + {"TA_BUFFER_WRITE_WAVEFRONTS", + "TA", + "46", + "", + "Number of buffer write wavefronts processed by TA."}, + {"SQ_ACTIVE_INST_EXP_GDS", + "SQ", + "74", + "", + "Number of cycles each wave spends working on EXPORT or GDS instructions. This value " + "represents the number of cycles each wave spends executing instructions " + "synchronizing workgroups across the device (global data sync). High values " + "indicates large amounts of time spent waiting on communication between CUs. This " + "value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis with " + "units in quad-cycles(4 cycles). See AMD ISAs for more information on GDS " + "instructions."}, + {"TCC_EA0_WRREQ_DRAM_CREDIT_STALL", + "TCC", + "33", + "", + "Number of cycles a EA write request was stalled because the interface was out of " + "DRAM credits."}, + {"TCC_WRITE", + "TCC", + "13", + "", + "Number of write requests."}, + {"SPI_RA_VGPR_SIMD_FULL_CSN", + "SPI", + "109", + "", + "Sum of SIMD where VGPR can't take csn wave when !fits. Source is RA0"}, + {"TCP_TCC_UC_READ_REQ", + "TCP", + "78", + "", + "Total read requests with UC mtype from this TCP to all TCCs"}, + {"TCC_EA0_RDREQ_DRAM", + "TCC", + "43", + "", + "Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC)."}, + {"TCC_EA0_WRREQ_IO_CREDIT_STALL", + "TCC", + "31", + "", + "Number of cycles a EA write request was stalled because the interface was out of IO " + "credits."}, + {"TCC_TOO_MANY_EA_WRREQS_STALL", + "TCC", + "34", + "", + "Number of cycles the TCC could not send a EA write request because it already " + "reached its maximum number of pending EA write requests."}, + {"TCP_TOTAL_READ", + "TCP", + "30", + "", + "Total number of read pixels/buffers from TA. Equals " + "TCP_PERF_SEL_TOTAL_HIT_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_LRU_READ + " + "TCP_PERF_SEL_TOTAL_MISS_EVICT_READ"}, + {"SQ_INSTS_VMEM", + "SQ", + "30", + "", + "The number of VMEM (GPU Memory) instructions issued. The value is returned per-SE " + "(aggregate of values in SIMDs in the SE)."}, + {"SPI_RA_WAVE_SIMD_FULL_CSN", + "SPI", + "103", + "", + "Sum of SIMD where WAVE can't take csn wave when !fits. Source is RA0"}, + {"SQ_INSTS_VSKIPPED", + "SQ", + "41", + "", + "The number of vector instructions skipped. This can occur when the S_SETVSKIP bit " + "is enabled on certain instructions. Often this is used as an alturnative to " + "branching (a compiler may replace a branch with setting this bit to skip the " + "operation, typically as a performance optimization). The value is returned per-SE " + "(aggregate of values in SIMDs in the SE)."}, + {"SQ_ITEMS", + "SQ", + "14", + "", + "Number of valid items per wave. This value is returned on a per-SE (aggregate of " + "values in SIMDs in the SE) basis."}, + {"SQ_LEVEL_WAVES", + "SQ", + "5", + "", + "Track the number of waves. Set ACCUM_PREV for the next counter to use this. This " + "value is returned on a per-SIMD basis."}, + {"SQC_TC_DATA_WRITE_REQ", + "SQ", + "265", + "", + "Number of data write requests to the TC (No-Masking, nondeterministic)"}, + {"SQ_INST_LEVEL_LDS", + "SQ", + "44", + "", + "Number of in-flight LDS instructions. This value represents the number of " + "instructions each wave spends executing instructions accessing the local data store " + "(data shared between SIMDs on the same CU). Set next counter to ACCUM_PREV and " + "divide by INSTS_LDS for average latency. Includes FLAT instructions. This value is " + "returned on a per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"TCP_TOTAL_CACHE_ACCESSES", + "TCP", + "60", + "", + "Count of total cache line (tag) accesses (includes hits and misses)."}, + {"TA_BUFFER_WAVEFRONTS", + "TA", + "44", + "", + "Number of buffer wavefronts processed by TA."}, + {"SQ_WAVES_EQ_64", + "SQ", + "6", + "", + "Count number of waves with exactly 64 active threads sent to SQs. This value " + "represents the number of waves that an each individual SIMD has enqueued during " + "the collection timeframe (for dispatch profiling this is the timeframe of kernel " + "execution, for agent profiling it is the timeframe between start_context and read " + "counter data) with exactly 64 threads. A sum of all SQ_WAVES_EQ_64 values will " + "give the total number of waves with 64 threads enqueued during the collection " + "timeframe by the application. Returns one value per-SE (aggregates of SIMD values). " + "Useful for checking for wavefront occupancy."}, + {"TCP_WRITE_TAGCONFLICT_STALL_CYCLES", + "TCP", + "12", + "", + "Tagram conflict stall on a write"}, + {"SQC_TC_INST_REQ", + "SQ", + "263", + "", + "Number of insruction requests to the TC (No-Masking, nondeterministic)"}, + {"SQC_TC_DATA_ATOMIC_REQ", + "SQ", + "266", + "", + "Number of data atomic requests to the TC (No-Masking, nondeterministic)"}, + {"TCC_EA0_RDREQ_IO_CREDIT_STALL", + "TCC", + "41", + "", + "Number of cycles there was a stall because the read request interface was out of IO " + "credits. Stalls occur regardless of whether a read needed to be performed or not."}, + {"TCP_TCC_CC_ATOMIC_REQ", + "TCP", + "83", + "", + "Total atomic requests with CC mtype from this TCP to all TCCs"}, + {"TCP_TCC_UC_ATOMIC_REQ", + "TCP", + "80", + "", + "Total atomic requests with UC mtype from this TCP to all TCCs"}, + {"SQC_ICACHE_MISSES_DUPLICATE", + "SQ", + "273", + "", + "Number of misses that were duplicates (access to a non-resident, miss pending CL). " + "(per-SQ, per-Bank, nondeterministic)"}, + {"TCC_EA0_RDREQ_LEVEL", + "TCC", + "44", + "", + "The sum of the number of TCC/EA read requests in flight. This is primarily meant " + "for measure average EA read latency. Average read latency = " + "TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ."}, + {"TA_BUFFER_TOTAL_CYCLES", + "TA", + "49", + "", + "Number of buffer cycles issued to TC."}, + {"SQ_WAIT_INST_ANY", + "SQ", + "61", + "", + "Number of wave-cycles spent waiting for any instruction issue. Units in " + "quad-cycles(4 cycles)."}, + {"SQ_WAVE_CYCLES", + "SQ", + "47", + "", + "The cycles spent executing waves in the CUs. This value is reported per-SE " + "(aggregates of SIMD values) and is nondeterministic. Units are in quad-cycles (4 " + "cycles). Useful for determining how much time is spent executing wave code vs " + "overhead/waiting. Low cycle count relative to actual number of cycles processed by " + "the CU can indicate that the CU is stalling or is overloaded."}, + {"TCC_EA0_WRREQ_DRAM", + "TCC", + "103", + "", + "Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC)."}, + {"TCP_TCR_TCP_STALL_CYCLES", + "TCP", + "8", + "", + "TCR stalls TCP_TCR_req interface"}, + {"TCP_TCC_RW_READ_REQ", + "TCP", + "85", + "", + "Total write requests with RW mtype from this TCP to all TCCs"}, + {"SQ_INST_LEVEL_VMEM", + "SQ", + "42", + "", + "Number of in-flight VMEM instructions. Set next counter to ACCUM_PREV and divide by " + "INSTS_VMEM for average latency. Includes FLAT instructions. This value is returned " + "on a per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"TCP_TCC_CC_READ_REQ", + "TCP", + "81", + "", + "Total write requests with CC mtype from this TCP to all TCCs"}, + {"SQ_ACTIVE_INST_VMEM", + "SQ", + "70", + "", + "Number of cycles each wave spends working on a VMEM instructions. This value " + "represents the number of cycles each wave spends executing vector memory " + "instructions. High values indicates a large amount of time spent executing vector " + "memory operations. This value is returned on a per-SE (aggregate of values in " + "SIMDs in the SE) basis with units in quad-cycles(4 cycles)."}, + {"SQ_IFETCH", + "SQ", + "88", + "", + "Number of instruction fetch requests from L1I (instruction) cache. This is a value " + "returned per-SIMD."}, + {"TCP_TCC_READ_REQ", + "TCP", + "69", + "", + "Total read requests from TCP to all TCCs"}, + {"SQC_DCACHE_REQ", + "SQ", + "290", + "", + "Number of requests (post-bank-serialization). (per-SQ, per-Bank)"}, + {"CPC_CPC_STAT_STALL", + "CPC", + "27", + "", + "CPC Stalled."}, + {"TCP_GATE_EN1", + "TCP", + "0", + "", + "TCP interface clocks are turned on. Not Windowed."}, + {"TCP_PENDING_STALL_CYCLES", + "TCP", + "22", + "", + "Stall due to data pending from L2"}, + {"SQC_DCACHE_MISSES_DUPLICATE", + "SQ", + "293", + "", + "Number of misses that were duplicates (access to a non-resident, miss pending CL). " + "(per-SQ, per-Bank, nondeterministic)"}, + {"CPF_CPF_STAT_IDLE", + "CPF", + "24", + "", + "CPF Idle."}, + {"TCP_VOLATILE", + "TCP", + "28", + "", + "Total number of L1 volatile pixels/buffers from TA"}, + {"CPC_CPC_TCIU_BUSY", + "CPC", + "28", + "", + "CPC TCIU interface Busy."}, + {"SQC_DCACHE_REQ_READ_2", + "SQ", + "324", + "", + "Number of constant cache 2 dw read requests. (per-SQ)"}, + {"CPC_CPC_STAT_BUSY", + "CPC", + "25", + "", + "CPC Busy."}, + {"TCP_TCP_LATENCY", + "TCP", + "65", + "", + "Total TCP wave latency (from first clock of wave entering to first clock of wave " + "leaving), divide by TA_TCP_STATE_READ to avg wave latency"}, + {"TCP_UTCL1_TRANSLATION_HIT", + "TCP", + "49", + "", + "Total utcl1 translation hits"}, + {"SQ_INST_LEVEL_SMEM", + "SQ", + "43", + "", + "Number of in-flight SMEM instructions (*2 load/store; *2 atomic; *2 memtime; *4 " + "wb/inv). Set next counter to ACCUM_PREV and divide by INSTS_SMEM for average " + "latency per smem request. Falls slightly short of total request latency because " + "some fetches are divided into two requests that may finish at different times and " + "this counter collects the average latency of the two. This value is returned on a " + "per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"SPI_SWC_CSC_WR", + "SPI", + "189", + "", + "Number of clocks to write CSC waves to SGPRs (need to multiply this value by 4) " + "Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, " + "source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is " + "CS3; default, source is CS0;"}, + {"TCC_READ", + "TCC", + "12", + "", + "Number of read requests. Compressed reads are included in this, but metadata reads " + "are not included."}, + {"TD_LOAD_WAVEFRONT", + "TD", + "25", + "", + "Count the wavefronts with opcode = load, include atomics and store."}, + {"GRBM_EA_BUSY", + "GRBM", + "35", + "", + "The Efficiency Arbiter (EA) block is busy."}, + {"SPI_RA_WVLIM_STALL_CSN", + "SPI", + "133", + "", + "Number of clocks csn is stalled due to WAVE LIMIT."}, + {"SPI_RA_BAR_CU_FULL_CSN", + "SPI", + "123", + "", + "Sum of CU where BARRIER can't take csn wave when !fits. Source is RA0"}, + {"TD_TD_BUSY", + "TD", + "1", + "", + "TD is processing or waiting for data. Perf_Windowing not supported for this " + "counter."}, + {"SQC_ICACHE_REQ", + "SQ", + "270", + "", + "Number of requests. (per-SQ, per-Bank)"}, + {"TCC_ATOMIC", + "TCC", + "14", + "", + "Number of atomic requests of all types."}, + {"TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES", + "TCP", + "13", + "", + "Tagram conflict stall on an atomic"}, + {"CPF_CPF_STAT_BUSY", + "CPF", + "23", + "", + "CPF Busy."}, + {"TCC_EA0_WRREQ_LEVEL", + "TCC", + "35", + "", + "The sum of the number of EA write requests in flight. This is primarily meant for " + "measure average EA write latency. Average write latency = " + "TCC_PERF_SEL_EA_WRREQ_LEVEL/TCC_PERF_SEL_EA_WRREQ."}, + {"SPI_RA_RES_STALL_CSN", + "SPI", + "91", + "", + "Arb cycles with CSn req and no CSn fits. Source is RA0"}, + {"TCC_BUSY", + "TCC", + "2", + "", + "Number of cycles we have a request pending. Not windowable."}, + {"SQC_DCACHE_INPUT_VALID_READYB", + "SQ", + "260", + "", + "Input stalled by SQC (per-SQ, nondeterministic, unwindowed)"}, + {"SQ_WAVES_SAVED", + "SQ", + "160", + "", + "Count number of context-saved waves sent to SQs. This value represents the number " + "of waves whos current register state has been saved to a register bank during the " + "collection timeframe (for dispatch profiling this is the timeframe of kernel " + "execution, for agent profiling it is the timeframe between start_context and read " + "counter data) . Context saving/restoring is a slow operation and should be limited. " + "High values can also indicate that stalling may be taking place (waiting for free " + "register space). Returns one value per-SE (aggregates of SIMD values)."}, + {"SQ_ACTIVE_INST_LDS", + "SQ", + "71", + "", + "Number of cycles each wave spends working on LDS instructions. This value represents " + "the number of cycles each wave spends executing instructions accessing the local " + "data store (data shared between SIMDs on the same CU). High values indicates a " + "large amount of reading/writing to this shared memory space. This value is returned " + "on a per-SE (aggregate of values in SIMDs in the SE) basis with units in " + "quad-cycles(4 cycles). See AMD ISAs for more information on LDS instructions."}, + {"TCC_EA0_RD_UNCACHED_32B", + "TCC", + "40", + "", + "Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be " + "counted as 2"}, + {"TCP_TCC_RW_WRITE_REQ", + "TCP", + "86", + "", + "Total write requests with RW mtype from this TCP to all TCCs"}, + {"TCC_EA0_RDREQ_DRAM_CREDIT_STALL", + "TCC", + "43", + "", + "Number of cycles there was a stall because the read request interface was out of " + "DRAM credits. Stalls occur regardless of whether a read needed to be performed or " + "not."}, + {"SPI_VWC_CSC_WR", + "SPI", + "195", + "", + "Number of clocks to write CSC waves to VGPRs (need to multiply this value by 4) " + "Requires SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, " + "source is CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is " + "CS3; default, source is CS0;"}, + {"SQC_DCACHE_REQ_READ_4", + "SQ", + "325", + "", + "Number of constant cache 4 dw read requests. (per-SQ)"}, + {"SQ_WAVES_LT_16", + "SQ", + "10", + "", + "Count number of waves sent <16 active threads sent to SQs. (per-simd, emulated, " + "global). This value represents the number of waves that an each individual SIMD " + "has enqueued during the collection timeframe (for dispatch profiling this is the " + "timeframe of kernel execution, for agent profiling it is the timeframe between " + "start_context and read counter data) with less than 16 threads. A sum of all " + "SQ_WAVES_LT_16 values will give the total number of waves with 16 threads enqueued " + "during the collection timeframe by the application. Returns one value per-SE " + "(aggregates of SIMD values). Useful for checking for wavefront occupancy."}, + {"SQC_DCACHE_ATOMIC", + "SQ", + "298", + "", + "Number of atomic requests. (per-SQ, per-Bank)"}, + {"TCC_EA0_RDREQ_GMI_CREDIT_STALL", + "TCC", + "42", + "", + "Number of cycles there was a stall because the read request interface was out of " + "GMI credits. Stalls occur regardless of whether a read needed to be performed or " + "not."}, + {"SPI_RA_REQ_NO_ALLOC", + "SPI", + "79", + "", + "Arb cycles with requests but no allocation. Source is RA0"}, + {"SQC_DCACHE_MISSES", + "SQ", + "292", + "", + "Number of cache misses, includes uncached requests. (per-SQ, per-Bank, " + "nondeterministic)"}, + {"TCC_NC_REQ", + "TCC", + "5", + "", + "The number of noncoherently cached requests. This is measured at the tag block."}, + {"SQ_ACTIVE_INST_FLAT", + "SQ", + "76", + "", + "Number of cycles each wave spends working on FLAT instructions. This value " + "represents the number of cycles each wave spends executing instructions accessing " + "flat scratch memory locations. High values indicates a large amount of " + "reading/writing to scratch memory on the device. This value is returned on a " + "per-SE (aggregate of values in SIMDs in the SE) basis with units in quad-cycles(4 " + "cycles). See AMD ISAs for more information on FLAT instructions."}, + {"SQC_TC_DATA_READ_REQ", + "SQ", + "264", + "", + "Number of data read requests to the TC (No-Masking, nondeterministic)"}, + {"TCP_TCC_READ_REQ_LATENCY", + "TCP", + "66", + "", + "Total TCP->TCC request latency for reads and atomics with return. Not Windowed."}, + {"CPC_UTCL1_STALL_ON_TRANSLATION", + "CPC", + "24", + "", + "One of the UTCL1s is stalled waiting on translation, XNACK or PENDING response."}, + {"TCC_PROBE", + "TCC", + "9", + "", + "Number of probe requests. Not windowable."}, + {"TA_BUFFER_ATOMIC_WAVEFRONTS", + "TA", + "47", + "", + "Number of buffer atomic wavefronts processed by TA."}, + {"TA_DATA_STALLED_BY_TC_CYCLES", + "TA", + "56", + "", + "Number of cycles data path stalled by TC. Perf_Windowing not supported for this " + "counter."}, + {"TA_FLAT_WAVEFRONTS", + "TA", + "100", + "", + "Number of flat opcode wavfronts processed by the TA."}, + {"TA_TOTAL_WAVEFRONTS", + "TA", + "32", + "", + "Total number of wavefronts processed by TA."}, + {"CPC_CPC_STAT_IDLE", + "CPC", + "26", + "", + "CPC Idle."}, + {"CPC_CPC_UTCL2IU_STALL", + "CPC", + "32", + "", + "CPC UTCL2 interface Stalled waiting on Free, Tags or Translation."}, + {"TCC_NORMAL_WRITEBACK", + "TCC", + "68", + "", + "Number of writebacks due to requests that are not writeback requests."}, + {"SQ_INST_CYCLES_SMEM", + "SQ", + "84", + "", + "The number of cycles needed to execute scalar memory reads (SMEM). This value is " + "returned on a per-SE (aggregate of values in SIMDs in the SE) basis with units in " + "quad-cycles(4 cycles)."}, + {"SQ_BUSY_CYCLES", + "SQ", + "3", + "", + "Number of clock cycles there are active waves in a shader engine (as reported by " + "the distributed sequencer). This value does not denote the number of active waves, " + "only the clock cycle in which any wave is present in a SE. This value is returned " + "on a per-shader engine basis in clock cycles."}, + {"SQ_LDS_UNALIGNED_STALL", + "SQ", + "96", + "", + "Number of cycles LDS (local data store) is stalled processing flat unaligned " + "load/store ops. This value is returned on a per-SE (aggregate of values in SIMDs " + "in the SE) basis."}, + {"TA_BUFFER_READ_WAVEFRONTS", + "TA", + "45", + "", + "Number of buffer read wavefronts processed by TA."}, + {"TA_FLAT_ATOMIC_WAVEFRONTS", + "TA", + "103", + "", + "Number of flat opcode atomics processed by the TA."}, + {"GRBM_TC_BUSY", + "GRBM", + "28", + "", + "Any of the Texture Cache Blocks (TCP/TCI/TCA/TCC) are busy."}, + {"GRBM_CPC_BUSY", + "GRBM", + "30", + "", + "The Command Processor Compute (CPC) is busy."}, + {"TCP_UTCL1_PERMISSION_MISS", + "TCP", + "50", + "", + "Total utcl1 permission misses"}, + {"SPI_RA_BULKY_CU_FULL_CSN", + "SPI", + "125", + "", + "Sum of CU where BULKY can't take csn wave when !fits. Source is RA0"}, + {"TCP_TA_TCP_STATE_READ", + "TCP", + "27", + "", + "Number of state reads"}, + {"TCP_TCC_WRITE_REQ", + "TCP", + "70", + "", + "Total write requests from TCP to all TCCs"}, + {"TCP_TCC_RW_ATOMIC_REQ", + "TCP", + "87", + "", + "Total atomic requests with RW mtype from this TCP to all TCCs"}, + {"SQ_ACTIVE_INST_MISC", + "SQ", + "75", + "", + "Number of cycles each wave spends working on a BRANCH or SENDMSG instructions. This " + "value represents the number of cycles each wave spends executing instructions " + "performing control flow branching and message sending. This value is returned on a " + "per-SE (aggregate of values in SIMDs in the SE) basis with units in quad-cycles(4 " + "cycles). See AMD ISAs for more information on BRANCH and SENDMSG instructions."}, + {"SQ_ACTIVE_INST_SCA", + "SQ", + "73", + "", + "Number of cycles each wave spends working on a SALU or SMEM instructions. This " + "value represents the number of cycles each wave spends executing scalar ALU or " + "scalar memory instructions. On MI200/300 platforms, there is a single ALU per CU. " + "High values indicates a large amount of time spent executing scalar instructions. " + "This value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis " + "with units in quad-cycles(4 cycles). See AMD ISAs for more information on SALU " + "and SMEM instructions."}, + {"TD_COALESCABLE_WAVEFRONT", + "TD", + "32", + "", + "Count wavefronts that TA finds coalescable."}, + {"SPI_RA_TMP_STALL_CSN", + "SPI", + "97", + "", + "Cycles where csn wants to req but does not fit in temp space."}, + {"SQ_BUSY_CU_CYCLES", + "SQ", + "13", + "", + "Number of quad-cycles each CU is busy. Can be used to calculate the percentage of " + "time each CU is busy. This value is returned on a per-SE (aggregate of values in " + "SIMDs in the SE) basis with units in quad-cycles(4 cycles)."}, + {"TCA_BUSY", + "TCA", + "2", + "", + "Number of cycles we have a request pending. Not windowable."}, + {"TCP_TOTAL_ATOMIC_WITH_RET", + "TCP", + "38", + "", + "Total number of atomic with return pixels/buffers from TA"}, + {"SQC_ICACHE_HITS", + "SQ", + "271", + "", + "Number of cache hits. (per-SQ, per-Bank, nondeterministic)"}, + {"TCA_CYCLE", + "TCA", + "1", + "", + "Number of cycles. Not windowable."}, + {"SPI_CSN_BUSY", + "SPI", + "48", + "", + "Number of clocks with outstanding waves (SPI or SH). Requires " + "SPI_DEBUG_CNTL.DEBUG_PIPE_SEL to select source, DEBUG_PIPE_SEL = 1, source is " + "CS1; DEBUG_PIPE_SEL = 2, source is CS2; DEBUG_PIPE_SEL = 3, source is CS3; " + "default, source is CS0;"}, + {"TCC_UC_REQ", + "TCC", + "6", + "", + "The number of uncached requests. This is measured at the tag block."}, + {"SQC_DCACHE_REQ_READ_1", + "SQ", + "323", + "", + "Number of constant cache 1 dw read requests. (per-SQ)"}, + {"SQ_ACTIVE_INST_ANY", + "SQ", + "69", + "", + "Number of cycles each wave spends working on any type of instruction. Useful in " + "determining percentage of time spend executing wave workloads (see WaveExec). This " + "value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis with " + "units in quad-cycles(4 cycles)."}, + {"TCP_TCC_ATOMIC_WITHOUT_RET_REQ", + "TCP", + "72", + "", + "Total atomic without return requests from TCP to all TCCs"}, + {"TCP_TCC_WRITE_REQ_LATENCY", + "TCP", + "67", + "", + "Total TCP->TCC request latency for writes and atomics without return. Not Windowed."}, + {"TCP_READ_TAGCONFLICT_STALL_CYCLES", + "TCP", + "11", + "", + "Tagram conflict stall on a read"}, + {"TCC_EA0_WRREQ_STALL", + "TCC", + "30", + "", + "Number of cycles a write request was stalled."}, + {"SQC_TC_REQ", + "SQ", + "262", + "", + "Total number of TC requests that were issued by instruction and constant caches. " + "(No-Masking, nondeterministic)"}, + {"CPF_CPF_STAT_STALL", + "CPF", + "25", + "", + "CPF Stalled."}, + {"TCC_ALL_TC_OP_INV_EVICT", + "TCC", + "80", + "", + "Number of evictions due to all TC_OP invalidate requests."}, + {"SQ_LDS_ADDR_CONFLICT", + "SQ", + "95", + "", + "Number of cycles LDS (local data store) is stalled by address conflicts. This value " + "is returned on a per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"SQ_LDS_IDX_ACTIVE", + "SQ", + "99", + "", + "Number of cycles LDS (local data store) is used for indexed (non-direct," + "non-interpolation) operations. This value is returned on a per-SE (aggregate of " + "values in SIMDs in the SE) basis."}, + {"SQ_INST_CYCLES_VMEM_RD", + "SQ", + "78", + "", + "The number of cycles needed to send addr and cmd data for VMEM read instructions. " + "This value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis " + "with units in quad-cycles(4 cycles)."}, + {"CPC_CPC_UTCL2IU_BUSY", + "CPC", + "30", + "", + "CPC UTCL2 interface Busy."}, + {"TCP_UTCL1_REQUEST", + "TCP", + "47", + "", + "Total CLIENT_UTCL1 NORMAL requests"}, + {"CPF_CPF_TCIU_STALL", + "CPF", + "28", + "", + "CPF TCIU interface Stalled waiting on Free, Tags."}, + {"SQ_INSTS_BRANCH", + "SQ", + "39", + "", + "Total number of BRANCH instructions issued. This value is returned per-SE " + "(aggregate of values in SIMDs in the SE). This value SHOULD NOT be used in " + "combination with SQ_ACTIVE_INST_MISC to calculate latency. SQ_ACTIVE_INST_MISC " + "includes both BRANCH and SENDMSG instructions while this is only BRANCH."}, + {"SPI_RA_LDS_CU_FULL_CSN", + "SPI", + "120", + "", + "Sum of CU where LDS can't take csn wave when !fits. Source is RA0"}, + {"TD_ATOMIC_WAVEFRONT", + "TD", + "26", + "", + "Count the wavefronts with opcode = atomic."}, + {"SQ_INSTS_EXP_GDS", + "SQ", + "38", + "", + "Total number of EXPORT or GDS (global wave state) instructions issued. When used in " + "combination with SQ_ACTIVE_INST_EXP_GDS (cycle count for executing instructions) " + "the average latency of EXPORT/GDS instruction execution can be calculated " + "(SQ_ACTIVE_INST_EXP_GDS / SQ_INSTS_EXP_GDS). This value is returned per-SE " + "(aggregate of values in SIMDs in the SE)."}, + {"SQC_TC_STALL", + "SQ", + "267", + "", + "Valid request stalled TC request interface (no-credits). (No-Masking, " + "nondeterministic, unwindowed)"}, + {"CPF_CPF_TCIU_BUSY", + "CPF", + "26", + "", + "CPF TCIU interface Busy."}, + {"TCC_EA0_WRREQ_GMI_CREDIT_STALL", + "TCC", + "32", + "", + "Number of cycles a EA write request was stalled because the interface was out of " + "GMI credits."}, + {"GRBM_CPF_BUSY", + "GRBM", + "31", + "", + "The Command Processor Fetchers (CPF) is busy."}, + {"SQ_WAVES_LT_48", + "SQ", + "8", + "", + "Count number of waves with <48 active threads sent to SQs. This value represents " + "the number of waves that an each individual SIMD has enqueued during the collection " + "timeframe (for dispatch profiling this is the timeframe of kernel execution, for " + "agent profiling it is the timeframe between start_context and read counter data) " + "with less than 48 threads. A sum of all SQ_WAVES_LT_48 values will give the total " + "number of waves with 48 threads enqueued during the collection timeframe by the " + "application. Returns one value per-SE (aggregates of SIMD values). Useful for " + "checking for wavefront occupancy."}, + {"TCC_EA0_ATOMIC", + "TCC", + "36", + "", + "Number of transactions going over the TC_EA_wrreq interface that are actually " + "atomic requests."}, + {"TD_TC_STALL", + "TD", + "15", + "", + "TD is stalled waiting for TC data."}, + {"SPI_RA_TGLIM_CU_FULL_CSN", + "SPI", + "127", + "", + "Cycles where csn wants to req but all CU are at tg_limit"}, + {"TA_BUFFER_COALESCED_WRITE_CYCLES", + "TA", + "53", + "", + "Number of buffer coalesced write cycles issued to TC."}, + {"TCP_TOTAL_ATOMIC_WITHOUT_RET", + "TCP", + "39", + "", + "Total number of atomic without return pixels/buffers from TA"}, + {"CPC_ME1_BUSY_FOR_PACKET_DECODE", + "CPC", + "13", + "", + "Me1 busy for packet decode."}, + {"SQ_INSTS", + "SQ", + "25", + "", + "Total number of instructions issued. When used in combination with " + "SQ_ACTIVE_INST_ANY (cycle count for executing instructions) the average latency of " + "instruction execution can be calculated (SQ_ACTIVE_INST_ANY / SQ_INSTS). This " + "value is returned per-SE (aggregate of values in SIMDs in the SE)."}, + {"TCC_NORMAL_EVICT", + "TCC", + "74", + "", + "Number of evictions due to requests that are not invalidate or probe requests."}, + {"CPC_CPC_UTCL2IU_IDLE", + "CPC", + "31", + "", + "CPC UTCL2 interface Idle."}, + {"TCC_REQ", + "TCC", + "3", + "", + "Number of requests of all types. This is measured at the tag block. This may be " + "more than the number of requests arriving at the TCC, but it is a good indication " + "of the total amount of work that needs to be performed."}, + {"TCC_TAG_STALL", + "TCC", + "45", + "", + "Number of cycles the normal request pipeline in the tag was stalled for any reason. " + "Normally, stalls of this nature are measured exactly from one point the pipeline, " + "but that is not the case for this counter. Probes can stall the pipeline at a " + "variety of places, and there is no single point that can reasonably measure the " + "total stalls accurately."}, + {"TD_STORE_WAVEFRONT", + "TD", + "27", + "", + "Count the wavefronts with opcode = store."}, + {"TA_BUFFER_COALESCED_READ_CYCLES", + "TA", + "52", + "", + "Number of buffer coalesced read cycles issued to TC."}, + {"SQ_LDS_MEM_VIOLATIONS", + "SQ", + "97", + "", + "Number of threads that have a memory violation in the LDS (local data store). This " + "value is returned on a per-SE (aggregate of values in SIMDs in the SE) basis."}, + {"TCC_CLIENT184_REQ", + "TCC", + "312", + "", + "Number of cycles client184 sent a request to this TCC."}, + {"TCC_CLIENT185_REQ", + "TCC", + "313", + "", + "Number of cycles client185 sent a request to this TCC."}, + {"TCC_CLIENT186_REQ", + "TCC", + "314", + "", + "Number of cycles client186 sent a request to this TCC."}, + {"TCC_CLIENT187_REQ", + "TCC", + "315", + "", + "Number of cycles client187 sent a request to this TCC."}, + {"TCC_CLIENT188_REQ", + "TCC", + "316", + "", + "Number of cycles client188 sent a request to this TCC."}, + {"TCC_CLIENT189_REQ", + "TCC", + "317", + "", + "Number of cycles client189 sent a request to this TCC."}, + {"TCC_CLIENT190_REQ", + "TCC", + "318", + "", + "Number of cycles client190 sent a request to this TCC."}, + {"TCC_CLIENT191_REQ", + "TCC", + "319", + "", + "Number of cycles client191 sent a request to this TCC."}}}}; static const std::unordered_map>> derived_gfx908 = {{"gfx908", @@ -431,4 +1711,533 @@ static const std::unordered_mapTCC request latency for reads and atomics with return. Not Windowed. Sum over " + "TCP instances."}, + {"TCC_EA0_RDREQ_GMI_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_RDREQ_GMI_CREDIT_STALL,sum)", + "Number of cycles there was a stall because the read request interface was out of GMI " + "credits. Stalls occur regardless of whether a read needed to be performed or not. Sum " + "over TCC instances."}, + {"TCC_TAG_STALL_sum", + "", + "", + "reduce(TCC_TAG_STALL,sum)", + "Total number of cycles the normal request pipeline in the tag is stalled for any reason."}, + {"TCP_TA_TCP_STATE_READ_sum", + "", + "", + "reduce(TCP_TA_TCP_STATE_READ,sum)", + "Number of state reads Sum over TCP instances."}, + {"TCC_ATOMIC_sum", + "", + "", + "reduce(TCC_ATOMIC,sum)", + "Number of atomic requests of all types. Sum over TCC instances."}, + {"TCP_TCC_READ_REQ_sum", + "", + "", + "reduce(TCP_TCC_READ_REQ,sum)", + "Total read requests from TCP to all TCCs Sum over TCP instances."}, + {"TCP_TCC_NC_ATOMIC_REQ_sum", + "", + "", + "reduce(TCP_TCC_NC_ATOMIC_REQ,sum)", + "Total atomic requests with NC mtype from this TCP to all TCCs Sum over TCP instances."}, + {"TCC_READ_sum", + "", + "", + "reduce(TCC_READ,sum)", + "Number of read requests. Compressed reads are included in this, but metadata reads are " + "not included. Sum over TCC instances."}, + {"TA_BUFFER_TOTAL_CYCLES_sum", + "", + "", + "reduce(TA_BUFFER_TOTAL_CYCLES,sum)", + "Number of buffer cycles issued to TC. Sum over TA instances."}, + {"TCP_TOTAL_WRITEBACK_INVALIDATES_sum", + "", + "", + "reduce(TCP_TOTAL_WRITEBACK_INVALIDATES,sum)", + "Total number of cache invalidates. Equals TCP_PERF_SEL_TOTAL_WBINVL1+ " + "TCP_PERF_SEL_TOTAL_WBINVL1_VOL+ TCP_PERF_SEL_CP_TCP_INVALIDATE+ " + "TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL. Not Windowed. Sum over TCP instances."}, + {"TCC_EA0_ATOMIC_sum", + "", + "", + "reduce(TCC_EA0_ATOMIC,sum)", + "Number of transactions going over the TC_EA_wrreq interface that are actually atomic " + "requests. Sum over TCC instances."}, + {"TCC_STREAMING_REQ_sum", + "", + "", + "reduce(TCC_STREAMING_REQ,sum)", + "Number of streaming requests. This is measured at the tag block. Sum over TCC instances."}, + {"TA_ADDR_STALLED_BY_TC_CYCLES_sum", + "", + "", + "reduce(TA_ADDR_STALLED_BY_TC_CYCLES,sum)", + "Number of cycles addr path stalled by TC. Perf_Windowing not supported for this counter. " + "Sum over TA instances."}, + {"TCC_EA0_WRREQ_DRAM_sum", + "", + "", + "reduce(TCC_EA0_WRREQ_DRAM,sum)", + "Number of TCC/EA write requests (either 32-byte of 64-byte) destined for DRAM (MC). Sum " + "over TCC instances."}, + {"TCC_EA0_RDREQ_DRAM_sum", + "", + "", + "reduce(TCC_EA0_RDREQ_DRAM,sum)", + "Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC). Sum " + "over TCC instances."}, + {"TD_ATOMIC_WAVEFRONT_sum", + "", + "", + "reduce(TD_ATOMIC_WAVEFRONT,sum)", + "Count the wavefronts with opcode = atomic. Sum over TD instances."}, + {"TCC_EA0_RDREQ_DRAM_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_RDREQ_DRAM_CREDIT_STALL,sum)", + "Number of cycles there was a stall because the read request interface was out of DRAM " + "credits. Stalls occur regardless of whether a read needed to be performed or not. Sum " + "over TCC instances."}, + {"TCC_ALL_TC_OP_INV_EVICT_sum", + "", + "", + "reduce(TCC_ALL_TC_OP_INV_EVICT,sum)", + "Number of evictions due to all TC_OP invalidate requests. Sum over TCC instances."}, + {"TCP_TD_TCP_STALL_CYCLES_sum", + "", + "", + "reduce(TCP_TD_TCP_STALL_CYCLES,sum)", + "TD stalls TCP. Sum over TCP instances."}, + {"TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum", + "", + "", + "reduce(TCP_WRITE_TAGCONFLICT_STALL_CYCLES,sum)", + "Tagram conflict stall on a write. Sum over TCP instances."}, + {"TCC_EA0_WRREQ_STALL_sum", + "", + "", + "reduce(TCC_EA0_WRREQ_STALL,sum)", + "Number of cycles a write request was stalled. Sum over TCC instances."}, + {"TCP_TCC_UC_ATOMIC_REQ_sum", + "", + "", + "reduce(TCP_TCC_UC_ATOMIC_REQ,sum)", + "Total atomic requests with UC mtype from this TCP to all TCCs Sum over TCP instances."}, + {"TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum", + "", + "", + "reduce(TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES,sum)", + "Tagram conflict stall on an atomic. Sum over TCP instances."}, + {"TCP_GATE_EN1_sum", + "", + "", + "reduce(TCP_GATE_EN1,sum)", + "TCP interface clocks are turned on. Not Windowed. Sum over TCP instances."}, + {"TD_TC_STALL_sum", + "", + "", + "reduce(TD_TC_STALL,sum)", + "TD is stalled waiting for TC data. Sum over TD instances."}, + {"TCC_ALL_TC_OP_WB_WRITEBACK_sum", + "", + "", + "reduce(TCC_ALL_TC_OP_WB_WRITEBACK,sum)", + "Number of writebacks due to all TC_OP writeback requests. Sum over TCC instances."}, + {"TCC_UC_REQ_sum", + "", + "", + "reduce(TCC_UC_REQ,sum)", + "The number of uncached requests. This is measured at the tag block. Sum over TCC " + "instances."}, + {"TCC_EA0_RD_UNCACHED_32B_sum", + "", + "", + "reduce(TCC_EA0_RD_UNCACHED_32B,sum)", + "Number of 32-byte TCC/EA read due to uncached traffic. A 64-byte request will be counted " + "as 2 Sum over TCC instances."}, + {"TCP_UTCL1_PERMISSION_MISS_sum", + "", + "", + "reduce(TCP_UTCL1_PERMISSION_MISS,sum)", + "Total utcl1 permission misses Sum over TCP instances."}, + {"TCC_TOO_MANY_EA_WRREQS_STALL_sum", + "", + "", + "reduce(TCC_TOO_MANY_EA_WRREQS_STALL,sum)", + "Number of cycles the TCC could not send a EA write request because it already reached its " + "maximum number of pending EA write requests. Sum over TCC instances."}, + {"TCP_TCC_WRITE_REQ_LATENCY_sum", + "", + "", + "reduce(TCP_TCC_WRITE_REQ_LATENCY,sum)", + "Total TCP->TCC request latency for writes and atomics without return. Not Windowed. Sum " + "over TCP instances."}, + {"TCC_REQ_sum", + "", + "", + "reduce(TCC_REQ,sum)", + "Number of requests of all types. This is measured at the tag block. This may be more " + "than the number of requests arriving at the TCC, but it is a good indication of the " + "total amount of work that needs to be performed. Sum over TCC instances."}, + {"TCP_TCR_TCP_STALL_CYCLES_sum", + "", + "", + "reduce(TCP_TCR_TCP_STALL_CYCLES,sum)", + "TCR stalls TCP_TCR_req interface. Sum over TCP instances."}, + {"TCP_UTCL1_REQUEST_sum", + "", + "", + "reduce(TCP_UTCL1_REQUEST,sum)", + "Total CLIENT_UTCL1 NORMAL requests Sum over TCP instances."}, + {"TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum", + "", + "", + "reduce(TCP_TCC_ATOMIC_WITHOUT_RET_REQ,sum)", + "Total atomic without return requests from TCP to all TCCs Sum over TCP instances."}, + {"TA_ADDR_STALLED_BY_TD_CYCLES_sum", + "", + "", + "reduce(TA_ADDR_STALLED_BY_TD_CYCLES,sum)", + "Number of cycles addr path stalled by TD. Perf_Windowing not supported for this counter. " + "Sum over TA instances."}, + {"TCP_TCC_NC_WRITE_REQ_sum", + "", + "", + "reduce(TCP_TCC_NC_WRITE_REQ,sum)", + "Total write requests with NC mtype from this TCP to all TCCs Sum over TCP instances."}, + {"TCC_EA0_RDREQ_LEVEL_sum", + "", + "", + "reduce(TCC_EA0_RDREQ_LEVEL,sum)", + "The sum of the number of TCC/EA read requests in flight. This is primarily meant for " + "measure average EA read latency. Average read latency = " + "TCC_PERF_SEL_EA_RDREQ_LEVEL/TCC_PERF_SEL_EA_RDREQ. Sum over TCC instances."}, + {"TCC_RW_REQ_sum", + "", + "", + "reduce(TCC_RW_REQ,sum)", + "The number of RW requests. This is measured at the tag block. Sum over TCC instances."}, + {"TCC_EA0_WRREQ_IO_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_WRREQ_IO_CREDIT_STALL,sum)", + "Number of cycles a EA write request was stalled because the interface was out of IO " + "credits. Sum over TCC instances."}, + {"TCP_READ_TAGCONFLICT_STALL_CYCLES_sum", + "", + "", + "reduce(TCP_READ_TAGCONFLICT_STALL_CYCLES,sum)", + "Tagram conflict stall on a read. Sum over TCP instances."}, + {"TCC_BUSY_sum", + "", + "", + "reduce(TCC_BUSY,sum)", + "Number of cycles we have a request pending. Not windowable. Sum over TCC instances."}, + {"TCC_BUSY_avr", + "", + "", + "reduce(TCC_BUSY,avr)", + "TCC_BUSY avr over all memory channels."}, + {"TCC_PROBE_sum", + "", + "", + "reduce(TCC_PROBE,sum)", + "Number of probe requests. Not windowable. Sum over TCC instances."}, + {"TCC_PROBE_ALL_sum", + "", + "", + "reduce(TCC_PROBE_ALL,sum)", + "Number of external probe requests with with EA_TCC_preq_all== 1. Not windowable. Sum " + "over TCC instances."}, + {"TA_BUFFER_READ_WAVEFRONTS_sum", + "", + "", + "reduce(TA_BUFFER_READ_WAVEFRONTS,sum)", + "Number of buffer read wavefronts processed by TA. Sum over TA instances."}, + {"TCP_TOTAL_READ_sum", + "", + "", + "reduce(TCP_TOTAL_READ,sum)", + "Total number of read pixels/buffers from TA. Equals TCP_PERF_SEL_TOTAL_HIT_LRU_READ + " + "TCP_PERF_SEL_TOTAL_MISS_LRU_READ + TCP_PERF_SEL_TOTAL_MISS_EVICT_READ. Sum over TCP " + "instances."}, + {"TCC_WRITE_sum", + "", + "", + "reduce(TCC_WRITE,sum)", + "Number of write requests. Sum over TCC instances."}, + {"TCC_EA0_RDREQ_IO_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_RDREQ_IO_CREDIT_STALL,sum)", + "Number of cycles there was a stall because the read request interface was out of IO " + "credits. Stalls occur regardless of whether a read needed to be performed or not. Sum " + "over TCC instances."}, + {"TCC_EA0_WRREQ_GMI_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_WRREQ_GMI_CREDIT_STALL,sum)", + "Number of cycles a EA write request was stalled because the interface was out of GMI " + "credits. Sum over TCC instances."}, + {"TCP_UTCL1_TRANSLATION_HIT_sum", + "", + "", + "reduce(TCP_UTCL1_TRANSLATION_HIT,sum)", + "Total utcl1 translation hits Sum over TCP instances."}, + {"TCC_CYCLE_sum", + "", + "", + "reduce(TCC_CYCLE,sum)", + "Number of cycles. Not windowable. Sum over TCC instances."}, + {"TCC_EA0_WRREQ_DRAM_CREDIT_STALL_sum", + "", + "", + "reduce(TCC_EA0_WRREQ_DRAM_CREDIT_STALL,sum)", + "Number of cycles a EA write request was stalled because the interface was out of DRAM " + "credits. Sum over TCC instances."}, + {"TCP_TCC_RW_ATOMIC_REQ_sum", + "", + "", + "reduce(TCP_TCC_RW_ATOMIC_REQ,sum)", + "Total atomic requests with RW mtype from this TCP to all TCCs. Sum over TCP instances."}, + {"TCP_TCC_RW_READ_REQ_sum", + "", + "", + "reduce(TCP_TCC_RW_READ_REQ,sum)", + "Total write requests with RW mtype from this TCP to all TCCs. Sum over TCP instances."}, + {"TCP_UTCL1_TRANSLATION_MISS_sum", + "", + "", + "reduce(TCP_UTCL1_TRANSLATION_MISS,sum)", + "Total utcl1 translation misses Sum over TCP instances."}, + {"TA_TOTAL_WAVEFRONTS_sum", + "", + "", + "reduce(TA_TOTAL_WAVEFRONTS,sum)", + "Total number of wavefronts processed by TA. Sum over TA instances."}, + {"TCP_TOTAL_ATOMIC_WITHOUT_RET_sum", + "", + "", + "reduce(TCP_TOTAL_ATOMIC_WITHOUT_RET,sum)", + "Total number of atomic without return pixels/buffers from TA Sum over TCP instances."}, + {"TCC_NORMAL_WRITEBACK_sum", + "", + "", + "reduce(TCC_NORMAL_WRITEBACK,sum)", + "Number of writebacks due to requests that are not writeback requests. Sum over TCC " + "instances."}}}}; diff --git a/source/share/rocprofiler-sdk/counter_defs.yaml b/source/share/rocprofiler-sdk/counter_defs.yaml index 82243e87c6..4072c46437 100644 --- a/source/share/rocprofiler-sdk/counter_defs.yaml +++ b/source/share/rocprofiler-sdk/counter_defs.yaml @@ -42,6 +42,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -54,6 +55,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -66,6 +68,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -78,6 +81,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -90,6 +94,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -102,6 +107,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -114,6 +120,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -126,6 +133,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -138,6 +146,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -150,6 +159,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -162,6 +172,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -310,6 +321,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -322,6 +334,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -334,6 +347,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -346,6 +360,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -358,6 +373,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -370,6 +386,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -382,6 +399,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1171,6 +1189,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1183,6 +1202,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1200,6 +1220,7 @@ rocprofiler-sdk: - gfx1031 - gfx1032 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1217,6 +1238,7 @@ rocprofiler-sdk: - gfx1031 - gfx1032 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1286,6 +1308,7 @@ rocprofiler-sdk: - gfx1031 - gfx1032 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1303,6 +1326,7 @@ rocprofiler-sdk: - gfx1031 - gfx1032 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1315,6 +1339,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1327,6 +1352,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1769,6 +1795,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1782,6 +1809,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1795,6 +1823,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1809,6 +1838,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1821,6 +1851,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1833,6 +1864,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1845,6 +1877,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1857,6 +1890,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1869,6 +1903,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1881,6 +1916,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1893,6 +1929,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1905,6 +1942,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1917,6 +1955,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1929,6 +1968,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1941,6 +1981,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1953,6 +1994,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1967,6 +2009,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -1993,6 +2036,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2509,6 +2553,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2532,6 +2577,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2544,6 +2590,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2556,6 +2603,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2568,6 +2616,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2580,6 +2629,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2592,6 +2642,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2604,6 +2655,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2616,6 +2668,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2628,6 +2681,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2640,6 +2694,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2663,6 +2718,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2686,6 +2742,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2698,6 +2755,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2710,6 +2768,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2773,6 +2832,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2785,6 +2845,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2797,6 +2858,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2809,6 +2871,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2821,6 +2884,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2833,6 +2897,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2859,6 +2924,7 @@ rocprofiler-sdk: - gfx1200 - gfx1201 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -2875,6 +2941,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 185 + - architectures: + - gfx908 + block: SQ + event: 158 - architectures: - gfx940 - gfx941 @@ -2895,6 +2965,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 96 + - architectures: + - gfx908 + block: SQ + event: 69 - architectures: - gfx940 - gfx941 @@ -2917,6 +2991,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 101 + - architectures: + - gfx908 + block: SQ + event: 74 - architectures: - gfx940 - gfx941 @@ -2938,6 +3016,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 103 + - architectures: + - gfx908 + block: SQ + event: 76 - architectures: - gfx940 - gfx941 @@ -2959,6 +3041,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 98 + - architectures: + - gfx908 + block: SQ + event: 71 - architectures: - gfx940 - gfx941 @@ -2980,6 +3066,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 102 + - architectures: + - gfx908 + block: SQ + event: 75 - architectures: - gfx940 - gfx941 @@ -3002,6 +3092,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 100 + - architectures: + - gfx908 + block: SQ + event: 73 - architectures: - gfx940 - gfx941 @@ -3054,6 +3148,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 97 + - architectures: + - gfx908 + block: SQ + event: 70 - architectures: - gfx940 - gfx941 @@ -3071,6 +3169,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -3097,6 +3196,7 @@ rocprofiler-sdk: - gfx1200 - gfx1201 - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -3109,6 +3209,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -3123,6 +3224,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 115 + - architectures: + - gfx908 + block: SQ + event: 88 - architectures: - gfx940 - gfx941 @@ -3142,6 +3247,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 116 + - architectures: + - gfx908 + block: SQ + event: 89 - architectures: - gfx940 - gfx941 @@ -3160,6 +3269,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -3176,6 +3286,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 64 + - architectures: + - gfx908 + block: SQ + event: 39 - architectures: - gfx940 - gfx941 @@ -3196,6 +3310,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 63 + - architectures: + - gfx908 + block: SQ + event: 38 - architectures: - gfx940 - gfx941 @@ -3379,6 +3497,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 52 + - architectures: + - gfx908 + block: SQ + event: 27 - architectures: - gfx940 - gfx941 @@ -3445,6 +3567,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 65 + - architectures: + - gfx908 + block: SQ + event: 40 - architectures: - gfx940 - gfx941 @@ -3513,6 +3639,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 188 + - architectures: + - gfx908 + block: SQ + event: 161 - architectures: - gfx940 - gfx941 @@ -4039,6 +4169,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 55 + - architectures: + - gfx908 + block: SQ + event: 30 - architectures: - gfx940 - gfx941 @@ -4118,6 +4252,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 66 + - architectures: + - gfx908 + block: SQ + event: 41 - architectures: - gfx940 - gfx941 @@ -4243,6 +4381,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 111 + - architectures: + - gfx908 + block: SQ + event: 84 - architectures: - gfx940 - gfx941 @@ -4289,6 +4431,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 105 + - architectures: + - gfx908 + block: SQ + event: 78 - architectures: - gfx940 - gfx941 @@ -4308,6 +4454,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 104 + - architectures: + - gfx908 + block: SQ + event: 77 - architectures: - gfx940 - gfx941 @@ -4358,6 +4508,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 69 + - architectures: + - gfx908 + block: SQ + event: 44 - architectures: - gfx940 - gfx941 @@ -4392,6 +4546,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 68 + - architectures: + - gfx908 + block: SQ + event: 43 - architectures: - gfx940 - gfx941 @@ -4411,6 +4569,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 67 + - architectures: + - gfx908 + block: SQ + event: 42 - architectures: - gfx940 - gfx941 @@ -4428,6 +4590,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4443,6 +4606,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 122 + - architectures: + - gfx908 + block: SQ + event: 95 - architectures: - gfx940 - gfx941 @@ -4462,6 +4629,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 125 + - architectures: + - gfx908 + block: SQ + event: 98 - architectures: - gfx940 - gfx941 @@ -4510,6 +4681,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 126 + - architectures: + - gfx908 + block: SQ + event: 99 - architectures: - gfx940 - gfx941 @@ -4529,6 +4704,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 124 + - architectures: + - gfx908 + block: SQ + event: 97 - architectures: - gfx940 - gfx941 @@ -4548,6 +4727,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 123 + - architectures: + - gfx908 + block: SQ + event: 96 - architectures: - gfx940 - gfx941 @@ -4573,6 +4756,7 @@ rocprofiler-sdk: event: 7 - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4643,6 +4827,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 85 + - architectures: + - gfx908 + block: SQ + event: 58 - architectures: - gfx940 - gfx941 @@ -4682,6 +4870,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 88 + - architectures: + - gfx908 + block: SQ + event: 61 - architectures: - gfx940 - gfx941 @@ -4838,6 +5030,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4855,6 +5048,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4872,6 +5066,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4889,6 +5084,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4906,6 +5102,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -4924,6 +5121,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 186 + - architectures: + - gfx908 + block: SQ + event: 159 - architectures: - gfx940 - gfx941 @@ -4946,6 +5147,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 187 + - architectures: + - gfx908 + block: SQ + event: 160 - architectures: - gfx940 - gfx941 @@ -5004,6 +5209,10 @@ rocprofiler-sdk: - gfx90a block: SQ event: 74 + - architectures: + - gfx908 + block: SQ + event: 47 - architectures: - gfx940 - gfx941 @@ -5256,6 +5465,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 54 - architectures: @@ -5271,6 +5481,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5282,6 +5493,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 55 - architectures: @@ -5297,6 +5509,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5308,6 +5521,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 47 - architectures: @@ -5323,6 +5537,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5334,6 +5549,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 52 - architectures: @@ -5349,6 +5565,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5360,6 +5577,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 53 - architectures: @@ -5375,6 +5593,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5413,6 +5632,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 45 - architectures: @@ -5428,6 +5648,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5466,6 +5687,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 49 - architectures: @@ -5481,6 +5703,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5492,6 +5715,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 44 - architectures: @@ -5507,6 +5731,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5518,6 +5743,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 46 - architectures: @@ -5533,6 +5759,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5625,6 +5852,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 56 - architectures: @@ -5640,6 +5868,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5651,6 +5880,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 103 - architectures: @@ -5666,6 +5896,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5757,6 +5988,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 100 - architectures: @@ -5772,6 +6004,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5848,6 +6081,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5859,6 +6093,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TA event: 32 - architectures: @@ -5874,6 +6109,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5956,6 +6192,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -5979,6 +6216,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6002,6 +6240,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6017,6 +6256,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6028,6 +6268,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6043,6 +6284,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6054,6 +6296,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6069,6 +6312,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6080,6 +6324,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6092,6 +6337,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6103,6 +6349,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6114,6 +6361,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6129,6 +6377,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6140,6 +6389,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6152,6 +6402,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6162,6 +6413,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6177,6 +6429,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6192,6 +6445,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6203,6 +6457,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6250,6 +6505,10 @@ rocprofiler-sdk: description: Number of TCC/EA read requests (either 32-byte or 64-byte) destined for DRAM (MC). properties: [] definitions: + - architectures: + - gfx908 + block: TCC + event: 43 - architectures: - gfx940 - gfx941 @@ -6266,6 +6525,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6281,6 +6541,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6291,6 +6552,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6302,6 +6564,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6317,6 +6580,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6328,6 +6592,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6343,6 +6608,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6354,6 +6620,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6369,6 +6636,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6389,6 +6657,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6404,6 +6673,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6454,6 +6724,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6468,6 +6739,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6483,6 +6755,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6493,6 +6766,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6503,6 +6777,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6518,6 +6793,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6528,6 +6804,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6543,6 +6820,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6554,6 +6832,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6569,6 +6848,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6593,6 +6873,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6607,6 +6888,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6630,6 +6912,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -6646,6 +6929,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7172,6 +7456,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx90a - gfx940 - gfx941 @@ -7188,6 +7473,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7199,6 +7485,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7214,6 +7501,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7224,6 +7512,7 @@ rocprofiler-sdk: properties: [] definitions: - architectures: + - gfx908 - gfx90a - gfx940 - gfx941 @@ -7240,6 +7529,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7251,6 +7541,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7266,6 +7557,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7281,6 +7573,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7306,6 +7599,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7317,6 +7611,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7333,6 +7628,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7345,6 +7641,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7362,6 +7659,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7373,6 +7671,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7388,6 +7687,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7399,6 +7699,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7414,6 +7715,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7427,6 +7729,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7442,6 +7745,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7454,6 +7758,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7470,6 +7775,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7481,6 +7787,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7496,6 +7803,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7507,6 +7815,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7523,6 +7832,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7539,6 +7849,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7550,6 +7861,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7783,6 +8095,70 @@ rocprofiler-sdk: - gfx950 block: TCC event: 116 + - name: TCC_CLIENT184_REQ + description: 'Number of cycles client184 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 312 + - name: TCC_CLIENT185_REQ + description: 'Number of cycles client185 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 313 + - name: TCC_CLIENT186_REQ + description: 'Number of cycles client186 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 314 + - name: TCC_CLIENT187_REQ + description: 'Number of cycles client187 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 315 + - name: TCC_CLIENT188_REQ + description: 'Number of cycles client188 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 316 + - name: TCC_CLIENT189_REQ + description: 'Number of cycles client189 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 317 + - name: TCC_CLIENT190_REQ + description: 'Number of cycles client190 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 318 + - name: TCC_CLIENT191_REQ + description: 'Number of cycles client191 sent a request to this TCC.' + properties: [] + definitions: + - architectures: + - gfx908 + block: TCC + event: 319 - name: TCC_EA0_RDREQ_64B_sum description: Number of 64-byte TCC/EA read requests. Sum over TCP instances. properties: [] @@ -7954,6 +8330,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 13 - architectures: @@ -7969,6 +8346,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7980,6 +8358,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -7992,6 +8371,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8003,6 +8383,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8015,6 +8396,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8026,6 +8408,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 22 - architectures: @@ -8041,6 +8424,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8052,6 +8436,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 11 - architectures: @@ -8067,6 +8452,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8078,6 +8464,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 27 - architectures: @@ -8093,6 +8480,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8104,6 +8492,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 72 - architectures: @@ -8122,6 +8511,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8133,6 +8523,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 71 - architectures: @@ -8151,6 +8542,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8162,6 +8554,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 83 - architectures: @@ -8180,6 +8573,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8191,6 +8585,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 81 - architectures: @@ -8209,6 +8604,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8220,6 +8616,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 82 - architectures: @@ -8238,6 +8635,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8249,6 +8647,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 77 - architectures: @@ -8267,6 +8666,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8278,6 +8678,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 75 - architectures: @@ -8296,6 +8697,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8307,6 +8709,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 76 - architectures: @@ -8325,6 +8728,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8336,6 +8740,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 69 - architectures: @@ -8354,6 +8759,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 66 - architectures: @@ -8366,6 +8772,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx950 expression: reduce(TCP_TCC_READ_REQ_LATENCY,sum) - name: TCP_TCC_READ_REQ_sum @@ -8374,6 +8781,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8385,6 +8793,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 87 - architectures: @@ -8403,6 +8812,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8414,6 +8824,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 85 - architectures: @@ -8432,6 +8843,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8443,6 +8855,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 86 - architectures: @@ -8461,6 +8874,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8472,6 +8886,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 80 - architectures: @@ -8490,6 +8905,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8501,6 +8917,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 78 - architectures: @@ -8519,6 +8936,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8530,6 +8948,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 79 - architectures: @@ -8548,6 +8967,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8559,6 +8979,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 70 - architectures: @@ -8577,6 +8998,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 67 - architectures: @@ -8589,6 +9011,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx950 expression: reduce(TCP_TCC_WRITE_REQ_LATENCY,sum) - name: TCP_TCC_WRITE_REQ_sum @@ -8597,6 +9020,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8609,6 +9033,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 65 - architectures: @@ -8622,6 +9047,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx950 expression: reduce(TCP_TCP_LATENCY,sum) - name: TCP_TCP_TA_DATA_STALL_CYCLES @@ -8676,6 +9102,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8688,6 +9115,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8699,6 +9127,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8711,6 +9140,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8722,6 +9152,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 29 - architectures: @@ -8738,6 +9169,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8749,6 +9181,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 39 - architectures: @@ -8764,6 +9197,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8775,6 +9209,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 38 - architectures: @@ -8790,6 +9225,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8801,6 +9237,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8816,6 +9253,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8828,6 +9266,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 30 - architectures: @@ -8844,6 +9283,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8855,6 +9295,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 32 - architectures: @@ -8871,6 +9312,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 45 - architectures: @@ -8890,6 +9332,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8902,6 +9345,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8913,6 +9357,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 50 - architectures: @@ -8931,6 +9376,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8942,6 +9388,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 47 - architectures: @@ -8960,6 +9407,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -8971,6 +9419,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 49 - architectures: @@ -8989,6 +9438,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9000,6 +9450,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 48 - architectures: @@ -9018,6 +9469,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9029,6 +9481,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 28 - architectures: @@ -9044,6 +9497,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9055,6 +9509,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TCP event: 12 - architectures: @@ -9070,6 +9525,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9396,6 +9852,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TD event: 26 - architectures: @@ -9411,6 +9868,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9422,6 +9880,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TD event: 32 - architectures: @@ -9437,6 +9896,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9448,6 +9908,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TD event: 25 - architectures: @@ -9463,6 +9924,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9500,6 +9962,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TD event: 27 - architectures: @@ -9515,6 +9978,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9526,6 +9990,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 block: TD event: 15 - architectures: @@ -9541,6 +10006,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9552,6 +10018,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942 @@ -9564,6 +10031,7 @@ rocprofiler-sdk: definitions: - architectures: - gfx90a + - gfx908 - gfx940 - gfx941 - gfx942