libhsakmt: remove IOMMUv2 performance monitor support

IOMMUv2 is removed from AMDGPU/KFD.

Change-Id: Ib87f501c07d9de90e6b83b98f98daacd5913e98a
Signed-off-by: James Zhu <James.Zhu@amd.com>


[ROCm/ROCR-Runtime commit: 274b5b51ca]
Этот коммит содержится в:
James Zhu
2023-09-12 15:27:07 -04:00
родитель 351cbe9dc7
Коммит 9533c318bb
3 изменённых файлов: 2 добавлений и 197 удалений
+2 -180
Просмотреть файл
@@ -175,8 +175,6 @@ static void init_perf_shared_table(void)
/* write the perf content */
shared_table->magic4cc = HSA_PERF_MAGIC4CC;
shared_table->iommu_slots_left =
pmc_table_get_max_concurrent(PERFCOUNTER_BLOCKID__IOMMUV2);
sem_post(sem);
}
@@ -294,9 +292,6 @@ static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
case PERFCOUNTER_BLOCKID__WD:
*uuid = HSA_PROFILEBLOCK_AMD_WD;
break;
case PERFCOUNTER_BLOCKID__IOMMUV2:
*uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2;
break;
default:
/* If we reach this point, it's a bug */
rc = -1;
@@ -321,144 +316,6 @@ static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
return 0;
}
static HSAKMT_STATUS update_block_slots(enum perf_trace_action action,
uint32_t block_id, uint32_t num_slots)
{
uint32_t *slots_left;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
if (shmem_fd <= 0)
return HSAKMT_STATUS_UNAVAILABLE;
if (sem == SEM_FAILED)
return HSAKMT_STATUS_UNAVAILABLE;
sem_wait(sem);
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
slots_left = &shared_table->iommu_slots_left;
else {
ret = HSAKMT_STATUS_UNAVAILABLE;
goto out;
}
switch (action) {
case PERF_TRACE_ACTION__ACQUIRE:
if (*slots_left >= num_slots)
*slots_left -= num_slots;
else
ret = HSAKMT_STATUS_UNAVAILABLE;
break;
case PERF_TRACE_ACTION__RELEASE:
if ((*slots_left + num_slots) <=
pmc_table_get_max_concurrent(block_id))
*slots_left += num_slots;
else
ret = HSAKMT_STATUS_ERROR;
break;
default:
ret = HSAKMT_STATUS_INVALID_PARAMETER;
break;
}
out:
sem_post(sem);
return ret;
}
static unsigned int get_perf_event_type(enum perf_block_id block_id)
{
FILE *file = NULL;
unsigned int type = 0;
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) {
/* Starting from kernel 4.12, amd_iommu_0 is used */
file = fopen("/sys/bus/event_source/devices/amd_iommu_0/type",
"r");
if (!file)
file = fopen(/* kernel 4.11 and older */
"/sys/bus/event_source/devices/amd_iommu/type",
"r");
}
if (!file)
return 0;
if (fscanf(file, "%d", &type) != 1)
type = 0;
fclose(file);
return type;
}
/* close_perf_event_fd - Close all FDs opened for this block.
* When RT acquires the trace access, RT has no ideas about each
* individual FD opened for this block. We should treat the whole
* block as one and close all of them.
*/
static void close_perf_event_fd(struct perf_trace_block *block)
{
uint32_t i;
if (!block || !block->perf_event_fd)
return;
for (i = 0; i < block->num_counters; i++)
if (block->perf_event_fd[i] > 0) {
close(block->perf_event_fd[i]);
block->perf_event_fd[i] = 0;
}
}
/* open_perf_event_fd - Open FDs required for this block.
* If one of them fails, we should close all FDs that have been
* opened because RT has no ideas about those FDs successfully
* opened and it won't send anything to close them.
*/
static HSAKMT_STATUS open_perf_event_fd(struct perf_trace_block *block)
{
struct perf_event_attr attr;
uint32_t i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
if (!block || !block->perf_event_fd)
return HSAKMT_STATUS_INVALID_HANDLE;
if (getuid()) {
pr_err("Must be root to open perf_event.\n");
return HSAKMT_STATUS_ERROR;
}
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = get_perf_event_type(block->block_id);
if (!attr.type)
return HSAKMT_STATUS_ERROR;
for (i = 0; i < block->num_counters; i++) {
attr.size = sizeof(struct perf_event_attr);
attr.config = block->counter_id[i];
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
attr.disabled = 1;
attr.inherit = 1;
/* We are profiling system wide, not per cpu, so no threads,
* no groups -> pid=-1 and group_fd=-1. cpu = 0
* flags=PERF_FLAG_FD_NO_GROUP
*/
block->perf_event_fd[i] = syscall(__NR_perf_event_open, &attr,
-1, 0, -1, PERF_FLAG_FD_NO_GROUP);
if (block->perf_event_fd[i] < 0) {
ret = HSAKMT_STATUS_ERROR;
close_perf_event_fd(block);
break;
}
}
return ret;
}
static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
uint32_t cmd)
{
@@ -556,10 +413,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits;
block_prop->Counters[i].CounterMask = block.counter_mask;
block_prop->Counters[i].Flags.ui32.Global = 1;
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
block_prop->Counters[i].Type = HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE;
else
block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
}
block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
@@ -750,8 +604,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
{
struct perf_trace *trace;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint32_t gpu_id, i;
int j;
uint32_t gpu_id;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
@@ -766,29 +619,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
for (i = 0; i < trace->num_blocks; i++) {
ret = update_block_slots(PERF_TRACE_ACTION__ACQUIRE,
trace->blocks[i].block_id,
trace->blocks[i].num_counters);
if (ret != HSAKMT_STATUS_SUCCESS)
goto out;
ret = open_perf_event_fd(&trace->blocks[i]);
if (ret != HSAKMT_STATUS_SUCCESS) {
i++; /* to release slots just reserved */
goto out;
}
}
out:
if (ret != HSAKMT_STATUS_SUCCESS) {
for (j = i-1; j >= 0; j--) {
update_block_slots(PERF_TRACE_ACTION__RELEASE,
trace->blocks[j].block_id,
trace->blocks[j].num_counters);
close_perf_event_fd(&trace->blocks[j]);
}
}
return ret;
}
@@ -796,7 +626,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
HSATraceId TraceId)
{
struct perf_trace *trace;
uint32_t i;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
@@ -808,13 +637,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
for (i = 0; i < trace->num_blocks; i++) {
update_block_slots(PERF_TRACE_ACTION__RELEASE,
trace->blocks[i].block_id,
trace->blocks[i].num_counters);
close_perf_event_fd(&trace->blocks[i]);
}
return HSAKMT_STATUS_SUCCESS;
}
-15
Просмотреть файл
@@ -1963,16 +1963,6 @@ static struct perf_counter_block navi_blocks[PERFCOUNTER_BLOCKID__MAX] = {
*/
static struct perf_counter_block iommu_block;
uint32_t pmc_table_get_max_concurrent(int block_id)
{
switch (block_id) {
case PERFCOUNTER_BLOCKID__IOMMUV2:
return iommu_block.num_of_slots;
default:
return 0;
}
}
static HSAKMT_STATUS alloc_pmc_blocks_iommu(void)
{
DIR *dir;
@@ -2095,11 +2085,6 @@ HSAKMT_STATUS get_block_properties(uint32_t node_id,
block_id < PERFCOUNTER_BLOCKID__FIRST)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) {
*block = iommu_block;
return HSAKMT_STATUS_SUCCESS;
}
/* Major GFX Version */
switch (gfxv >> 16) {
case 7:
-2
Просмотреть файл
@@ -56,7 +56,6 @@ enum perf_block_id {
PERFCOUNTER_BLOCKID__VGT,
PERFCOUNTER_BLOCKID__WD,
/* privileged */
PERFCOUNTER_BLOCKID__IOMMUV2,
PERFCOUNTER_BLOCKID__MAX
};
@@ -70,7 +69,6 @@ struct perf_counter_block {
HSAKMT_STATUS alloc_pmc_blocks(void);
void free_pmc_blocks(void);
uint32_t pmc_table_get_max_concurrent(int block_id);
HSAKMT_STATUS get_block_properties(uint32_t node_id,
enum perf_block_id block_id,