From 9533c318bb183d371333296e3f9e08fe066db2b2 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 12 Sep 2023 15:27:07 -0400 Subject: [PATCH] libhsakmt: remove IOMMUv2 performance monitor support IOMMUv2 is removed from AMDGPU/KFD. Change-Id: Ib87f501c07d9de90e6b83b98f98daacd5913e98a Signed-off-by: James Zhu [ROCm/ROCR-Runtime commit: 274b5b51ca9905799fca9b4282c854dce3f3d082] --- projects/rocr-runtime/src/perfctr.c | 182 +------------------------- projects/rocr-runtime/src/pmc_table.c | 15 --- projects/rocr-runtime/src/pmc_table.h | 2 - 3 files changed, 2 insertions(+), 197 deletions(-) diff --git a/projects/rocr-runtime/src/perfctr.c b/projects/rocr-runtime/src/perfctr.c index 546dd2358b..8c78da3274 100644 --- a/projects/rocr-runtime/src/perfctr.c +++ b/projects/rocr-runtime/src/perfctr.c @@ -175,8 +175,6 @@ static void init_perf_shared_table(void) /* write the perf content */ shared_table->magic4cc = HSA_PERF_MAGIC4CC; - shared_table->iommu_slots_left = - pmc_table_get_max_concurrent(PERFCOUNTER_BLOCKID__IOMMUV2); sem_post(sem); } @@ -294,9 +292,6 @@ static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid) case PERFCOUNTER_BLOCKID__WD: *uuid = HSA_PROFILEBLOCK_AMD_WD; break; - case PERFCOUNTER_BLOCKID__IOMMUV2: - *uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2; - break; default: /* If we reach this point, it's a bug */ rc = -1; @@ -321,144 +316,6 @@ static HSAuint32 get_block_concurrent_limit(uint32_t node_id, return 0; } -static HSAKMT_STATUS update_block_slots(enum perf_trace_action action, - uint32_t block_id, uint32_t num_slots) -{ - uint32_t *slots_left; - HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; - - if (shmem_fd <= 0) - return HSAKMT_STATUS_UNAVAILABLE; - if (sem == SEM_FAILED) - return HSAKMT_STATUS_UNAVAILABLE; - - sem_wait(sem); - - if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) - slots_left = &shared_table->iommu_slots_left; - else { - ret = HSAKMT_STATUS_UNAVAILABLE; - goto out; - } - - switch (action) { - case PERF_TRACE_ACTION__ACQUIRE: - if (*slots_left >= num_slots) - *slots_left -= num_slots; - else - ret = HSAKMT_STATUS_UNAVAILABLE; - break; - case PERF_TRACE_ACTION__RELEASE: - if ((*slots_left + num_slots) <= - pmc_table_get_max_concurrent(block_id)) - *slots_left += num_slots; - else - ret = HSAKMT_STATUS_ERROR; - break; - default: - ret = HSAKMT_STATUS_INVALID_PARAMETER; - break; - } - -out: - sem_post(sem); - - return ret; -} - -static unsigned int get_perf_event_type(enum perf_block_id block_id) -{ - FILE *file = NULL; - unsigned int type = 0; - - if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) { - /* Starting from kernel 4.12, amd_iommu_0 is used */ - file = fopen("/sys/bus/event_source/devices/amd_iommu_0/type", - "r"); - if (!file) - file = fopen(/* kernel 4.11 and older */ - "/sys/bus/event_source/devices/amd_iommu/type", - "r"); - } - - if (!file) - return 0; - - if (fscanf(file, "%d", &type) != 1) - type = 0; - fclose(file); - - return type; -} - -/* close_perf_event_fd - Close all FDs opened for this block. - * When RT acquires the trace access, RT has no ideas about each - * individual FD opened for this block. We should treat the whole - * block as one and close all of them. - */ -static void close_perf_event_fd(struct perf_trace_block *block) -{ - uint32_t i; - - if (!block || !block->perf_event_fd) - return; - - for (i = 0; i < block->num_counters; i++) - if (block->perf_event_fd[i] > 0) { - close(block->perf_event_fd[i]); - block->perf_event_fd[i] = 0; - } -} - -/* open_perf_event_fd - Open FDs required for this block. - * If one of them fails, we should close all FDs that have been - * opened because RT has no ideas about those FDs successfully - * opened and it won't send anything to close them. - */ -static HSAKMT_STATUS open_perf_event_fd(struct perf_trace_block *block) -{ - struct perf_event_attr attr; - uint32_t i; - HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; - - if (!block || !block->perf_event_fd) - return HSAKMT_STATUS_INVALID_HANDLE; - - if (getuid()) { - pr_err("Must be root to open perf_event.\n"); - return HSAKMT_STATUS_ERROR; - } - - memset(&attr, 0, sizeof(struct perf_event_attr)); - attr.type = get_perf_event_type(block->block_id); - if (!attr.type) - return HSAKMT_STATUS_ERROR; - - for (i = 0; i < block->num_counters; i++) { - attr.size = sizeof(struct perf_event_attr); - attr.config = block->counter_id[i]; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - attr.disabled = 1; - attr.inherit = 1; - - /* We are profiling system wide, not per cpu, so no threads, - * no groups -> pid=-1 and group_fd=-1. cpu = 0 - * flags=PERF_FLAG_FD_NO_GROUP - */ - block->perf_event_fd[i] = syscall(__NR_perf_event_open, &attr, - -1, 0, -1, PERF_FLAG_FD_NO_GROUP); - - if (block->perf_event_fd[i] < 0) { - ret = HSAKMT_STATUS_ERROR; - close_perf_event_fd(block); - break; - } - } - - return ret; -} - static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block, uint32_t cmd) { @@ -556,10 +413,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId, block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits; block_prop->Counters[i].CounterMask = block.counter_mask; block_prop->Counters[i].Flags.ui32.Global = 1; - if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) - block_prop->Counters[i].Type = HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE; - else - block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE; + block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE; } block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters]; @@ -750,8 +604,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId, { struct perf_trace *trace; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; - uint32_t gpu_id, i; - int j; + uint32_t gpu_id; pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId); @@ -766,29 +619,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId, if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) return HSAKMT_STATUS_INVALID_NODE_UNIT; - for (i = 0; i < trace->num_blocks; i++) { - ret = update_block_slots(PERF_TRACE_ACTION__ACQUIRE, - trace->blocks[i].block_id, - trace->blocks[i].num_counters); - if (ret != HSAKMT_STATUS_SUCCESS) - goto out; - ret = open_perf_event_fd(&trace->blocks[i]); - if (ret != HSAKMT_STATUS_SUCCESS) { - i++; /* to release slots just reserved */ - goto out; - } - } - -out: - if (ret != HSAKMT_STATUS_SUCCESS) { - for (j = i-1; j >= 0; j--) { - update_block_slots(PERF_TRACE_ACTION__RELEASE, - trace->blocks[j].block_id, - trace->blocks[j].num_counters); - close_perf_event_fd(&trace->blocks[j]); - } - } - return ret; } @@ -796,7 +626,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId, HSATraceId TraceId) { struct perf_trace *trace; - uint32_t i; pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId); @@ -808,13 +637,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId, if (trace->magic4cc != HSA_PERF_MAGIC4CC) return HSAKMT_STATUS_INVALID_HANDLE; - for (i = 0; i < trace->num_blocks; i++) { - update_block_slots(PERF_TRACE_ACTION__RELEASE, - trace->blocks[i].block_id, - trace->blocks[i].num_counters); - close_perf_event_fd(&trace->blocks[i]); - } - return HSAKMT_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/src/pmc_table.c b/projects/rocr-runtime/src/pmc_table.c index 7b1f9c5eba..d2dbab224b 100644 --- a/projects/rocr-runtime/src/pmc_table.c +++ b/projects/rocr-runtime/src/pmc_table.c @@ -1963,16 +1963,6 @@ static struct perf_counter_block navi_blocks[PERFCOUNTER_BLOCKID__MAX] = { */ static struct perf_counter_block iommu_block; -uint32_t pmc_table_get_max_concurrent(int block_id) -{ - switch (block_id) { - case PERFCOUNTER_BLOCKID__IOMMUV2: - return iommu_block.num_of_slots; - default: - return 0; - } -} - static HSAKMT_STATUS alloc_pmc_blocks_iommu(void) { DIR *dir; @@ -2095,11 +2085,6 @@ HSAKMT_STATUS get_block_properties(uint32_t node_id, block_id < PERFCOUNTER_BLOCKID__FIRST) return HSAKMT_STATUS_INVALID_PARAMETER; - if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) { - *block = iommu_block; - return HSAKMT_STATUS_SUCCESS; - } - /* Major GFX Version */ switch (gfxv >> 16) { case 7: diff --git a/projects/rocr-runtime/src/pmc_table.h b/projects/rocr-runtime/src/pmc_table.h index 7e48cbe801..b734183a54 100644 --- a/projects/rocr-runtime/src/pmc_table.h +++ b/projects/rocr-runtime/src/pmc_table.h @@ -56,7 +56,6 @@ enum perf_block_id { PERFCOUNTER_BLOCKID__VGT, PERFCOUNTER_BLOCKID__WD, /* privileged */ - PERFCOUNTER_BLOCKID__IOMMUV2, PERFCOUNTER_BLOCKID__MAX }; @@ -70,7 +69,6 @@ struct perf_counter_block { HSAKMT_STATUS alloc_pmc_blocks(void); void free_pmc_blocks(void); -uint32_t pmc_table_get_max_concurrent(int block_id); HSAKMT_STATUS get_block_properties(uint32_t node_id, enum perf_block_id block_id,