Add pmc module
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Tento commit je obsažen v:
+206
-16
@@ -25,8 +25,41 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "libhsakmt.h"
|
||||
#include "pmc_table.h"
|
||||
#include "linux/kfd_ioctl.h"
|
||||
|
||||
#define BITS_PER_BYTE CHAR_BIT
|
||||
|
||||
#define HSA_PERF_MAGIC4CC 0x54415348
|
||||
|
||||
enum perf_trace_state {
|
||||
PERF_TRACE_STATE__STOPPED = 0,
|
||||
PERF_TRACE_STATE__STARTED
|
||||
};
|
||||
|
||||
struct perf_trace {
|
||||
uint32_t magic4cc;
|
||||
uint32_t gpu_id;
|
||||
enum perf_trace_state state;
|
||||
};
|
||||
|
||||
static HsaCounterProperties *counter_props[MAX_NODES] = {NULL};
|
||||
|
||||
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
|
||||
{
|
||||
int rc = 0;
|
||||
switch (block_id) {
|
||||
case PERFCOUNTER_BLOCKID__SQ:
|
||||
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
|
||||
break;
|
||||
default:
|
||||
/* If we reach this point, it's a bug */
|
||||
rc = -1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcGetCounterProperties(
|
||||
@@ -34,9 +67,71 @@ hsaKmtPmcGetCounterProperties(
|
||||
HsaCounterProperties** CounterProperties //OUT
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t gpu_id, i, block_id;
|
||||
uint16_t dev_id;
|
||||
uint32_t counter_props_size = 0;
|
||||
uint32_t total_counters = 0;
|
||||
uint32_t total_concurrent = 0;
|
||||
struct perf_counter_block block = {0};
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (CounterProperties == NULL)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (validate_nodeid(NodeId, &gpu_id) != 0)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
|
||||
|
||||
if (counter_props[NodeId] == NULL) {
|
||||
dev_id = get_device_id_by_node(NodeId);
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
rc = get_block_properties(dev_id, i, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS)
|
||||
return rc;
|
||||
total_concurrent += block.num_of_slots;
|
||||
total_counters += block.num_of_counters;
|
||||
}
|
||||
|
||||
counter_props_size = sizeof(HsaCounterProperties) +
|
||||
sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
|
||||
sizeof(HsaCounter)*(total_counters-1);
|
||||
|
||||
counter_props[NodeId] = malloc(counter_props_size);
|
||||
|
||||
if (counter_props[NodeId] == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
|
||||
counter_props[NodeId]->NumConcurrent = total_concurrent;
|
||||
|
||||
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
|
||||
{
|
||||
rc = get_block_properties(dev_id, block_id, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS) {
|
||||
free(counter_props[NodeId]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Filling the SQ block */
|
||||
blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
|
||||
counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
|
||||
counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
|
||||
|
||||
for (i = 0; i < block.num_of_counters; i++) {
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
|
||||
counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*CounterProperties = counter_props[NodeId];
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -52,9 +147,42 @@ hsaKmtPmcRegisterTrace(
|
||||
HsaPmcTraceRoot* TraceRoot //OUT
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
uint32_t gpu_id, i;
|
||||
uint64_t min_buf_size = 0;
|
||||
uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
|
||||
struct perf_trace *trace = NULL;
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (validate_nodeid(NodeId, &gpu_id) != 0)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
/* Calculating the minimum buffer size */
|
||||
for (i = 0; i < NumberOfCounters; i++) {
|
||||
if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
|
||||
concurrent_counters[Counters[i].BlockIndex]++;
|
||||
}
|
||||
|
||||
/* Verifying that the number of counters per block is not larger than the amount of slots */
|
||||
if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
trace = malloc(sizeof(trace));
|
||||
if (trace == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
trace->magic4cc = HSA_PERF_MAGIC4CC;
|
||||
trace->gpu_id = gpu_id;
|
||||
trace->state = PERF_TRACE_STATE__STOPPED;
|
||||
|
||||
TraceRoot->NumberOfPasses = 1;
|
||||
TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
|
||||
TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -68,9 +196,33 @@ hsaKmtPmcUnregisterTrace(
|
||||
HSATraceId TraceId //IN
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
uint32_t gpu_id;
|
||||
struct perf_trace *trace;
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (validate_nodeid(NodeId, &gpu_id) != 0)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
if (trace->gpu_id != gpu_id)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
/* If the trace is in the running state, stop it */
|
||||
if (trace->state == PERF_TRACE_STATE__STARTED) {
|
||||
HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
|
||||
if (status != HSAKMT_STATUS_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
free(trace);
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -86,9 +238,17 @@ hsaKmtPmcAcquireTraceAccess(
|
||||
HSATraceId TraceId //IN
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
struct perf_trace *trace;
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -104,9 +264,17 @@ hsaKmtPmcReleaseTraceAccess(
|
||||
HSATraceId TraceId //IN
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
struct perf_trace *trace;
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -122,9 +290,17 @@ hsaKmtPmcStartTrace(
|
||||
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
trace->state = PERF_TRACE_STATE__STARTED;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -138,9 +314,15 @@ hsaKmtPmcQueryTrace(
|
||||
HSATraceId TraceId //IN
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -154,7 +336,15 @@ hsaKmtPmcStopTrace(
|
||||
HSATraceId TraceId //IN
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
trace->state = PERF_TRACE_STATE__STOPPED;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele