Add IOMMU to performance counter table
Add IOMMUv2 to blocks returned by hsaKmtPmcGetCounterProperties(). IOMMU information is read from sysfs. Change-Id: I3a1c6f902f947913570a78700fc0ffc444e1dd72 Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Αυτή η υποβολή περιλαμβάνεται σε:
+63
-40
@@ -56,6 +56,9 @@ HSAKMT_STATUS init_counter_props(unsigned int NumNodes)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
counter_props_count = NumNodes;
|
||||
|
||||
alloc_pmc_blocks();
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -73,18 +76,24 @@ void destroy_counter_props(void)
|
||||
}
|
||||
|
||||
free(counter_props);
|
||||
free_pmc_blocks();
|
||||
}
|
||||
|
||||
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
switch (block_id) {
|
||||
case PERFCOUNTER_BLOCKID__SQ:
|
||||
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
|
||||
break;
|
||||
case PERFCOUNTER_BLOCKID__IOMMUV2:
|
||||
*uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2;
|
||||
break;
|
||||
default:
|
||||
/* If we reach this point, it's a bug */
|
||||
rc = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
return rc;
|
||||
@@ -99,11 +108,12 @@ hsaKmtPmcGetCounterProperties(
|
||||
{
|
||||
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t gpu_id, i, block_id;
|
||||
uint16_t dev_id;
|
||||
uint32_t counter_props_size = 0;
|
||||
uint32_t total_counters = 0;
|
||||
uint32_t total_concurrent = 0;
|
||||
struct perf_counter_block block = {0};
|
||||
uint32_t total_blocks = 0;
|
||||
uint32_t entry;
|
||||
|
||||
if (counter_props == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
@@ -114,50 +124,63 @@ hsaKmtPmcGetCounterProperties(
|
||||
if (validate_nodeid(NodeId, &gpu_id) != 0)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
if (counter_props[NodeId] == NULL) {
|
||||
dev_id = get_device_id_by_node(NodeId);
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
rc = get_block_properties(dev_id, i, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS)
|
||||
return rc;
|
||||
total_concurrent += block.num_of_slots;
|
||||
total_counters += block.num_of_counters;
|
||||
if (counter_props[NodeId] != NULL) {
|
||||
*CounterProperties = counter_props[NodeId];
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
rc = get_block_properties(NodeId, i, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS)
|
||||
return rc;
|
||||
total_concurrent += block.num_of_slots;
|
||||
total_counters += block.num_of_counters;
|
||||
/* If num_of_slots=0, this block doesn't exist */
|
||||
if (block.num_of_slots)
|
||||
total_blocks++;
|
||||
}
|
||||
|
||||
counter_props_size = sizeof(HsaCounterProperties) +
|
||||
sizeof(HsaCounterBlockProperties)*(total_blocks-1) +
|
||||
sizeof(HsaCounter)*(total_counters-1);
|
||||
|
||||
counter_props[NodeId] = malloc(counter_props_size);
|
||||
if (counter_props[NodeId] == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
counter_props[NodeId]->NumBlocks = total_blocks;
|
||||
counter_props[NodeId]->NumConcurrent = total_concurrent;
|
||||
|
||||
entry = 0;
|
||||
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
|
||||
rc = get_block_properties(NodeId, block_id, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS) {
|
||||
free(counter_props[NodeId]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
counter_props_size = sizeof(HsaCounterProperties) +
|
||||
sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
|
||||
sizeof(HsaCounter)*(total_counters-1);
|
||||
if (!block.num_of_slots) /* not a valid block */
|
||||
continue;
|
||||
|
||||
counter_props[NodeId] = malloc(counter_props_size);
|
||||
blockid2uuid(block_id,
|
||||
&counter_props[NodeId]->Blocks[entry].BlockId);
|
||||
counter_props[NodeId]->Blocks[entry].NumCounters =
|
||||
block.num_of_counters;
|
||||
counter_props[NodeId]->Blocks[entry].NumConcurrent =
|
||||
block.num_of_slots;
|
||||
|
||||
if (counter_props[NodeId] == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
|
||||
counter_props[NodeId]->NumConcurrent = total_concurrent;
|
||||
|
||||
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
|
||||
{
|
||||
rc = get_block_properties(dev_id, block_id, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS) {
|
||||
free(counter_props[NodeId]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Filling the SQ block */
|
||||
blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
|
||||
counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
|
||||
counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
|
||||
|
||||
for (i = 0; i < block.num_of_counters; i++) {
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
|
||||
}
|
||||
for (i = 0; i < block.num_of_counters; i++) {
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].BlockIndex = block_id;
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].CounterId = block.counter_ids[i];
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].CounterMask = block.counter_mask;
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].Flags.ui32.Global = 1;
|
||||
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].Type = HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE;
|
||||
else
|
||||
counter_props[NodeId]->Blocks[entry].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
|
||||
}
|
||||
entry++;
|
||||
}
|
||||
|
||||
*CounterProperties = counter_props[NodeId];
|
||||
|
||||
+129
-4
@@ -23,6 +23,11 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "libhsakmt.h"
|
||||
#include "pmc_table.h"
|
||||
|
||||
@@ -182,15 +187,135 @@ static struct perf_counter_block polaris_blocks[PERFCOUNTER_BLOCKID__MAX] = {
|
||||
},
|
||||
};
|
||||
|
||||
/* Current APUs only have one IOMMU. If NUMA is introduced to APUs, we'll need
|
||||
* to expand the struct here to an array.
|
||||
*/
|
||||
static struct perf_counter_block iommu_block;
|
||||
|
||||
static HSAKMT_STATUS
|
||||
alloc_pmc_blocks_iommu(void)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *dent;
|
||||
const char sysfs_amdiommu_event_path[] =
|
||||
"/sys/bus/event_source/devices/amd_iommu/events";
|
||||
/* Counter source in IOMMU's Counter Bank Addressing register is 8 bits,
|
||||
* so the biggest counter number/id possible is 0xff.
|
||||
*/
|
||||
const int max_counter_id = 0xff;
|
||||
char path[256];
|
||||
const int len = sizeof(path);
|
||||
FILE *file;
|
||||
int num;
|
||||
char counter_id[max_counter_id + 1];
|
||||
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t *ptr;
|
||||
struct perf_counter_block *block = &iommu_block;
|
||||
|
||||
memset(block, 0, sizeof(struct perf_counter_block));
|
||||
|
||||
dir = opendir(sysfs_amdiommu_event_path);
|
||||
if (!dir)
|
||||
goto out;
|
||||
|
||||
memset(counter_id, 0, max_counter_id + 1);
|
||||
while ((dent = readdir(dir))) {
|
||||
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
|
||||
continue;
|
||||
if (snprintf(path, len, "%s/%s", sysfs_amdiommu_event_path,
|
||||
dent->d_name) >= len) {
|
||||
fprintf(stderr, "Increase path length.\n");
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
ret = HSAKMT_STATUS_ERROR;
|
||||
goto out;
|
||||
}
|
||||
if (fscanf(file, "csource=0x%x", &num) != 1) {
|
||||
ret = HSAKMT_STATUS_ERROR;
|
||||
fclose(file);
|
||||
goto out;
|
||||
}
|
||||
if (num > max_counter_id)
|
||||
/* This should never happen. If it does, check IOMMU driver. */
|
||||
fprintf(stderr,
|
||||
"Error: max_counter_id %d is set too small.\n",
|
||||
max_counter_id);
|
||||
else {
|
||||
counter_id[num] = 1;
|
||||
++block->num_of_counters;
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
block->counter_ids = malloc(sizeof(uint32_t) * block->num_of_counters);
|
||||
if (!block->counter_ids) {
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
ptr = block->counter_ids;
|
||||
for (num = 0; num < (max_counter_id + 1); num++) {
|
||||
if (counter_id[num]) {
|
||||
ptr[0] = num;
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (snprintf(path, len, "%s/%d/%s",
|
||||
"/sys/devices/virtual/kfd/kfd/topology/nodes",
|
||||
0, /* IOMMU is in node 0. Change this if NUMA is introduced to APU. */
|
||||
"perf/iommu/max_concurrent") >= len) {
|
||||
fprintf(stderr, "Increase path length\n");
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto out;
|
||||
};
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
ret = HSAKMT_STATUS_ERROR;
|
||||
goto out;
|
||||
}
|
||||
if (fscanf(file, "%d", &block->num_of_slots) != 1)
|
||||
ret = HSAKMT_STATUS_ERROR;
|
||||
fclose(file);
|
||||
|
||||
out:
|
||||
if (dir)
|
||||
closedir(dir);
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS alloc_pmc_blocks(void)
|
||||
{
|
||||
return alloc_pmc_blocks_iommu();
|
||||
}
|
||||
|
||||
void free_pmc_blocks(void)
|
||||
{
|
||||
if (iommu_block.counter_ids)
|
||||
free(iommu_block.counter_ids);
|
||||
iommu_block.counter_ids = NULL;
|
||||
iommu_block.num_of_counters = 0;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
get_block_properties(uint16_t dev_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block)
|
||||
get_block_properties(uint32_t node_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block)
|
||||
{
|
||||
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
|
||||
if (block_id > PERFCOUNTER_BLOCKID__MAX || block_id < PERFCOUNTER_BLOCKID__FIRST)
|
||||
uint16_t dev_id = get_device_id_by_node(node_id);
|
||||
|
||||
if (block_id > PERFCOUNTER_BLOCKID__MAX ||
|
||||
block_id < PERFCOUNTER_BLOCKID__FIRST)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) {
|
||||
*block = iommu_block;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/* To avoid the long list, we read the 12 most significant digits of DID
|
||||
* to identify the GPU instead of listing the complete 16 bits. If one
|
||||
* day 12-bits is not good enough to distinguish the GPU, change the
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
enum perf_block_id {
|
||||
PERFCOUNTER_BLOCKID__FIRST = 0,
|
||||
PERFCOUNTER_BLOCKID__SQ = PERFCOUNTER_BLOCKID__FIRST,
|
||||
PERFCOUNTER_BLOCKID__IOMMUV2,
|
||||
PERFCOUNTER_BLOCKID__MAX
|
||||
};
|
||||
|
||||
@@ -42,9 +43,12 @@ struct perf_counter_block {
|
||||
uint64_t counter_mask;
|
||||
};
|
||||
|
||||
HSAKMT_STATUS alloc_pmc_blocks(void);
|
||||
void free_pmc_blocks(void);
|
||||
|
||||
HSAKMT_STATUS
|
||||
get_block_properties(uint16_t dev_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block);
|
||||
get_block_properties(uint32_t node_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block);
|
||||
|
||||
#endif // PMC_TABLE_H
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user