Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
This commit is contained in:
Evgeny Pinchuk
2014-07-29 13:59:07 +03:00
committato da Oded Gabbay
parent a4f446e8cd
commit 2c865d510f
+206 -16
Vedi File
@@ -25,8 +25,41 @@
#include <stdlib.h>
#include "libhsakmt.h"
#include "pmc_table.h"
#include "linux/kfd_ioctl.h"
#define BITS_PER_BYTE CHAR_BIT
#define HSA_PERF_MAGIC4CC 0x54415348
enum perf_trace_state {
PERF_TRACE_STATE__STOPPED = 0,
PERF_TRACE_STATE__STARTED
};
struct perf_trace {
uint32_t magic4cc;
uint32_t gpu_id;
enum perf_trace_state state;
};
static HsaCounterProperties *counter_props[MAX_NODES] = {NULL};
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
{
int rc = 0;
switch (block_id) {
case PERFCOUNTER_BLOCKID__SQ:
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
break;
default:
/* If we reach this point, it's a bug */
rc = -1;
}
return rc;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterProperties(
@@ -34,9 +67,71 @@ hsaKmtPmcGetCounterProperties(
HsaCounterProperties** CounterProperties //OUT
)
{
CHECK_KFD_OPEN();
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
uint32_t gpu_id, i, block_id;
uint16_t dev_id;
uint32_t counter_props_size = 0;
uint32_t total_counters = 0;
uint32_t total_concurrent = 0;
struct perf_counter_block block = {0};
return HSAKMT_STATUS_NOT_SUPPORTED;
if (CounterProperties == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != 0)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (counter_props[NodeId] == NULL) {
dev_id = get_device_id_by_node(NodeId);
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
rc = get_block_properties(dev_id, i, &block);
if (rc != HSAKMT_STATUS_SUCCESS)
return rc;
total_concurrent += block.num_of_slots;
total_counters += block.num_of_counters;
}
counter_props_size = sizeof(HsaCounterProperties) +
sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
sizeof(HsaCounter)*(total_counters-1);
counter_props[NodeId] = malloc(counter_props_size);
if (counter_props[NodeId] == NULL)
return HSAKMT_STATUS_NO_MEMORY;
counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
counter_props[NodeId]->NumConcurrent = total_concurrent;
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
{
rc = get_block_properties(dev_id, block_id, &block);
if (rc != HSAKMT_STATUS_SUCCESS) {
free(counter_props[NodeId]);
return rc;
}
/* Filling the SQ block */
blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
for (i = 0; i < block.num_of_counters; i++) {
counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
}
}
}
*CounterProperties = counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
/**
@@ -52,9 +147,42 @@ hsaKmtPmcRegisterTrace(
HsaPmcTraceRoot* TraceRoot //OUT
)
{
CHECK_KFD_OPEN();
uint32_t gpu_id, i;
uint64_t min_buf_size = 0;
uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
struct perf_trace *trace = NULL;
return HSAKMT_STATUS_NOT_SUPPORTED;
if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != 0)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
/* Calculating the minimum buffer size */
for (i = 0; i < NumberOfCounters; i++) {
if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
return HSAKMT_STATUS_INVALID_PARAMETER;
min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
concurrent_counters[Counters[i].BlockIndex]++;
}
/* Verifying that the number of counters per block is not larger than the amount of slots */
if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent)
return HSAKMT_STATUS_INVALID_PARAMETER;
trace = malloc(sizeof(trace));
if (trace == NULL)
return HSAKMT_STATUS_NO_MEMORY;
trace->magic4cc = HSA_PERF_MAGIC4CC;
trace->gpu_id = gpu_id;
trace->state = PERF_TRACE_STATE__STOPPED;
TraceRoot->NumberOfPasses = 1;
TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
return HSAKMT_STATUS_SUCCESS;
}
/**
@@ -68,9 +196,33 @@ hsaKmtPmcUnregisterTrace(
HSATraceId TraceId //IN
)
{
CHECK_KFD_OPEN();
uint32_t gpu_id;
struct perf_trace *trace;
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != 0)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
if (trace->gpu_id != gpu_id)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
/* If the trace is in the running state, stop it */
if (trace->state == PERF_TRACE_STATE__STARTED) {
HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
if (status != HSAKMT_STATUS_SUCCESS)
return status;
}
free(trace);
return HSAKMT_STATUS_SUCCESS;
}
@@ -86,9 +238,17 @@ hsaKmtPmcAcquireTraceAccess(
HSATraceId TraceId //IN
)
{
CHECK_KFD_OPEN();
struct perf_trace *trace;
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
return HSAKMT_STATUS_SUCCESS;
}
@@ -104,9 +264,17 @@ hsaKmtPmcReleaseTraceAccess(
HSATraceId TraceId //IN
)
{
CHECK_KFD_OPEN();
struct perf_trace *trace;
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
return HSAKMT_STATUS_SUCCESS;
}
@@ -122,9 +290,17 @@ hsaKmtPmcStartTrace(
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
)
{
CHECK_KFD_OPEN();
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
trace->state = PERF_TRACE_STATE__STARTED;
return HSAKMT_STATUS_SUCCESS;
}
@@ -138,9 +314,15 @@ hsaKmtPmcQueryTrace(
HSATraceId TraceId //IN
)
{
CHECK_KFD_OPEN();
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
return HSAKMT_STATUS_SUCCESS;
}
@@ -154,7 +336,15 @@ hsaKmtPmcStopTrace(
HSATraceId TraceId //IN
)
{
CHECK_KFD_OPEN();
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
return HSAKMT_STATUS_NOT_SUPPORTED;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
trace->state = PERF_TRACE_STATE__STOPPED;
return HSAKMT_STATUS_SUCCESS;
}