Add IOMMU to performance counter table

Add IOMMUv2 to blocks returned by hsaKmtPmcGetCounterProperties(). IOMMU
information is read from sysfs.

Change-Id: I3a1c6f902f947913570a78700fc0ffc444e1dd72
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Αυτή η υποβολή περιλαμβάνεται σε:
Amber Lin
2017-02-03 12:07:57 -05:00
γονέας d4dbf562a9
υποβολή 9dadac6dc9
3 αρχεία άλλαξαν με 199 προσθήκες και 47 διαγραφές
+63 -40
Προβολή Αρχείου
@@ -56,6 +56,9 @@ HSAKMT_STATUS init_counter_props(unsigned int NumNodes)
return HSAKMT_STATUS_NO_MEMORY;
counter_props_count = NumNodes;
alloc_pmc_blocks();
return HSAKMT_STATUS_SUCCESS;
}
@@ -73,18 +76,24 @@ void destroy_counter_props(void)
}
free(counter_props);
free_pmc_blocks();
}
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
{
int rc = 0;
switch (block_id) {
case PERFCOUNTER_BLOCKID__SQ:
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
break;
case PERFCOUNTER_BLOCKID__IOMMUV2:
*uuid = HSA_PROFILEBLOCK_AMD_IOMMUV2;
break;
default:
/* If we reach this point, it's a bug */
rc = -1;
break;
}
return rc;
@@ -99,11 +108,12 @@ hsaKmtPmcGetCounterProperties(
{
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
uint32_t gpu_id, i, block_id;
uint16_t dev_id;
uint32_t counter_props_size = 0;
uint32_t total_counters = 0;
uint32_t total_concurrent = 0;
struct perf_counter_block block = {0};
uint32_t total_blocks = 0;
uint32_t entry;
if (counter_props == NULL)
return HSAKMT_STATUS_NO_MEMORY;
@@ -114,50 +124,63 @@ hsaKmtPmcGetCounterProperties(
if (validate_nodeid(NodeId, &gpu_id) != 0)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (counter_props[NodeId] == NULL) {
dev_id = get_device_id_by_node(NodeId);
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
rc = get_block_properties(dev_id, i, &block);
if (rc != HSAKMT_STATUS_SUCCESS)
return rc;
total_concurrent += block.num_of_slots;
total_counters += block.num_of_counters;
if (counter_props[NodeId] != NULL) {
*CounterProperties = counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
rc = get_block_properties(NodeId, i, &block);
if (rc != HSAKMT_STATUS_SUCCESS)
return rc;
total_concurrent += block.num_of_slots;
total_counters += block.num_of_counters;
/* If num_of_slots=0, this block doesn't exist */
if (block.num_of_slots)
total_blocks++;
}
counter_props_size = sizeof(HsaCounterProperties) +
sizeof(HsaCounterBlockProperties)*(total_blocks-1) +
sizeof(HsaCounter)*(total_counters-1);
counter_props[NodeId] = malloc(counter_props_size);
if (counter_props[NodeId] == NULL)
return HSAKMT_STATUS_NO_MEMORY;
counter_props[NodeId]->NumBlocks = total_blocks;
counter_props[NodeId]->NumConcurrent = total_concurrent;
entry = 0;
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
rc = get_block_properties(NodeId, block_id, &block);
if (rc != HSAKMT_STATUS_SUCCESS) {
free(counter_props[NodeId]);
return rc;
}
counter_props_size = sizeof(HsaCounterProperties) +
sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
sizeof(HsaCounter)*(total_counters-1);
if (!block.num_of_slots) /* not a valid block */
continue;
counter_props[NodeId] = malloc(counter_props_size);
blockid2uuid(block_id,
&counter_props[NodeId]->Blocks[entry].BlockId);
counter_props[NodeId]->Blocks[entry].NumCounters =
block.num_of_counters;
counter_props[NodeId]->Blocks[entry].NumConcurrent =
block.num_of_slots;
if (counter_props[NodeId] == NULL)
return HSAKMT_STATUS_NO_MEMORY;
counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
counter_props[NodeId]->NumConcurrent = total_concurrent;
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
{
rc = get_block_properties(dev_id, block_id, &block);
if (rc != HSAKMT_STATUS_SUCCESS) {
free(counter_props[NodeId]);
return rc;
}
/* Filling the SQ block */
blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
for (i = 0; i < block.num_of_counters; i++) {
counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
}
for (i = 0; i < block.num_of_counters; i++) {
counter_props[NodeId]->Blocks[entry].Counters[i].BlockIndex = block_id;
counter_props[NodeId]->Blocks[entry].Counters[i].CounterId = block.counter_ids[i];
counter_props[NodeId]->Blocks[entry].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
counter_props[NodeId]->Blocks[entry].Counters[i].CounterMask = block.counter_mask;
counter_props[NodeId]->Blocks[entry].Counters[i].Flags.ui32.Global = 1;
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2)
counter_props[NodeId]->Blocks[entry].Counters[i].Type = HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE;
else
counter_props[NodeId]->Blocks[entry].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
}
entry++;
}
*CounterProperties = counter_props[NodeId];
+129 -4
Προβολή Αρχείου
@@ -23,6 +23,11 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "libhsakmt.h"
#include "pmc_table.h"
@@ -182,15 +187,135 @@ static struct perf_counter_block polaris_blocks[PERFCOUNTER_BLOCKID__MAX] = {
},
};
/* Current APUs only have one IOMMU. If NUMA is introduced to APUs, we'll need
* to expand the struct here to an array.
*/
static struct perf_counter_block iommu_block;
static HSAKMT_STATUS
alloc_pmc_blocks_iommu(void)
{
DIR *dir;
struct dirent *dent;
const char sysfs_amdiommu_event_path[] =
"/sys/bus/event_source/devices/amd_iommu/events";
/* Counter source in IOMMU's Counter Bank Addressing register is 8 bits,
* so the biggest counter number/id possible is 0xff.
*/
const int max_counter_id = 0xff;
char path[256];
const int len = sizeof(path);
FILE *file;
int num;
char counter_id[max_counter_id + 1];
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint32_t *ptr;
struct perf_counter_block *block = &iommu_block;
memset(block, 0, sizeof(struct perf_counter_block));
dir = opendir(sysfs_amdiommu_event_path);
if (!dir)
goto out;
memset(counter_id, 0, max_counter_id + 1);
while ((dent = readdir(dir))) {
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
continue;
if (snprintf(path, len, "%s/%s", sysfs_amdiommu_event_path,
dent->d_name) >= len) {
fprintf(stderr, "Increase path length.\n");
ret = HSAKMT_STATUS_NO_MEMORY;
goto out;
}
file = fopen(path, "r");
if (!file) {
ret = HSAKMT_STATUS_ERROR;
goto out;
}
if (fscanf(file, "csource=0x%x", &num) != 1) {
ret = HSAKMT_STATUS_ERROR;
fclose(file);
goto out;
}
if (num > max_counter_id)
/* This should never happen. If it does, check IOMMU driver. */
fprintf(stderr,
"Error: max_counter_id %d is set too small.\n",
max_counter_id);
else {
counter_id[num] = 1;
++block->num_of_counters;
}
fclose(file);
}
block->counter_ids = malloc(sizeof(uint32_t) * block->num_of_counters);
if (!block->counter_ids) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto out;
}
ptr = block->counter_ids;
for (num = 0; num < (max_counter_id + 1); num++) {
if (counter_id[num]) {
ptr[0] = num;
++ptr;
}
}
if (snprintf(path, len, "%s/%d/%s",
"/sys/devices/virtual/kfd/kfd/topology/nodes",
0, /* IOMMU is in node 0. Change this if NUMA is introduced to APU. */
"perf/iommu/max_concurrent") >= len) {
fprintf(stderr, "Increase path length\n");
ret = HSAKMT_STATUS_NO_MEMORY;
goto out;
};
file = fopen(path, "r");
if (!file) {
ret = HSAKMT_STATUS_ERROR;
goto out;
}
if (fscanf(file, "%d", &block->num_of_slots) != 1)
ret = HSAKMT_STATUS_ERROR;
fclose(file);
out:
if (dir)
closedir(dir);
return ret;
}
HSAKMT_STATUS alloc_pmc_blocks(void)
{
return alloc_pmc_blocks_iommu();
}
void free_pmc_blocks(void)
{
if (iommu_block.counter_ids)
free(iommu_block.counter_ids);
iommu_block.counter_ids = NULL;
iommu_block.num_of_counters = 0;
}
HSAKMT_STATUS
get_block_properties(uint16_t dev_id,
enum perf_block_id block_id,
struct perf_counter_block *block)
get_block_properties(uint32_t node_id,
enum perf_block_id block_id,
struct perf_counter_block *block)
{
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
if (block_id > PERFCOUNTER_BLOCKID__MAX || block_id < PERFCOUNTER_BLOCKID__FIRST)
uint16_t dev_id = get_device_id_by_node(node_id);
if (block_id > PERFCOUNTER_BLOCKID__MAX ||
block_id < PERFCOUNTER_BLOCKID__FIRST)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (block_id == PERFCOUNTER_BLOCKID__IOMMUV2) {
*block = iommu_block;
return HSAKMT_STATUS_SUCCESS;
}
/* To avoid the long list, we read the 12 most significant digits of DID
* to identify the GPU instead of listing the complete 16 bits. If one
* day 12-bits is not good enough to distinguish the GPU, change the
+7 -3
Προβολή Αρχείου
@@ -31,6 +31,7 @@
enum perf_block_id {
PERFCOUNTER_BLOCKID__FIRST = 0,
PERFCOUNTER_BLOCKID__SQ = PERFCOUNTER_BLOCKID__FIRST,
PERFCOUNTER_BLOCKID__IOMMUV2,
PERFCOUNTER_BLOCKID__MAX
};
@@ -42,9 +43,12 @@ struct perf_counter_block {
uint64_t counter_mask;
};
HSAKMT_STATUS alloc_pmc_blocks(void);
void free_pmc_blocks(void);
HSAKMT_STATUS
get_block_properties(uint16_t dev_id,
enum perf_block_id block_id,
struct perf_counter_block *block);
get_block_properties(uint32_t node_id,
enum perf_block_id block_id,
struct perf_counter_block *block);
#endif // PMC_TABLE_H