2014-07-29 11:16:00 +03:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2014 Advanced Micro Devices, Inc.
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use, copy,
|
|
|
|
|
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
|
|
|
* of the Software, and to permit persons to whom the Software is
|
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including
|
|
|
|
|
* the next paragraph) shall be included in all copies or substantial
|
|
|
|
|
* portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
2017-02-06 15:06:32 -05:00
|
|
|
#include <stdio.h>
|
2017-02-15 11:32:49 -05:00
|
|
|
#include <string.h>
|
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
|
#include <sys/syscall.h>
|
2014-07-29 11:16:00 +03:00
|
|
|
#include "libhsakmt.h"
|
2014-07-29 13:59:07 +03:00
|
|
|
#include "pmc_table.h"
|
2024-03-12 13:26:41 -07:00
|
|
|
#include "hsakmt/linux/kfd_ioctl.h"
|
2014-12-08 17:05:56 +02:00
|
|
|
#include <unistd.h>
|
2017-02-15 11:32:49 -05:00
|
|
|
#include <sys/ioctl.h>
|
|
|
|
|
#include <errno.h>
|
2017-07-04 11:51:17 -04:00
|
|
|
#include <sys/mman.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <semaphore.h>
|
2026-01-30 09:42:25 +08:00
|
|
|
#include <assert.h>
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
#define BITS_PER_BYTE CHAR_BIT
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
#define HSA_PERF_MAGIC4CC 0x54415348
|
2014-07-29 13:59:07 +03:00
|
|
|
|
|
|
|
|
enum perf_trace_state {
|
2017-02-03 09:59:21 -05:00
|
|
|
PERF_TRACE_STATE__STOPPED = 0,
|
|
|
|
|
PERF_TRACE_STATE__STARTED
|
2014-07-29 13:59:07 +03:00
|
|
|
};
|
|
|
|
|
|
2017-02-06 15:06:32 -05:00
|
|
|
struct perf_trace_block {
|
|
|
|
|
enum perf_block_id block_id;
|
|
|
|
|
uint32_t num_counters;
|
|
|
|
|
uint64_t *counter_id;
|
2017-02-15 11:32:49 -05:00
|
|
|
int *perf_event_fd;
|
2017-02-06 15:06:32 -05:00
|
|
|
};
|
|
|
|
|
|
2014-07-29 13:59:07 +03:00
|
|
|
struct perf_trace {
|
2017-02-03 09:59:21 -05:00
|
|
|
uint32_t magic4cc;
|
2017-02-06 15:06:32 -05:00
|
|
|
uint32_t gpu_id;
|
2017-02-03 09:59:21 -05:00
|
|
|
enum perf_trace_state state;
|
2017-02-06 15:06:32 -05:00
|
|
|
uint32_t num_blocks;
|
2017-02-23 18:26:23 -05:00
|
|
|
void *buf;
|
|
|
|
|
uint64_t buf_size;
|
2017-02-06 15:06:32 -05:00
|
|
|
struct perf_trace_block blocks[0];
|
2014-07-29 13:59:07 +03:00
|
|
|
};
|
|
|
|
|
|
2017-02-15 11:32:49 -05:00
|
|
|
struct perf_counts_values {
|
|
|
|
|
union {
|
|
|
|
|
struct {
|
2020-03-19 01:04:28 +00:00
|
|
|
uint64_t val;
|
|
|
|
|
uint64_t ena;
|
|
|
|
|
uint64_t run;
|
2017-02-15 11:32:49 -05:00
|
|
|
};
|
2020-03-19 01:04:28 +00:00
|
|
|
uint64_t values[3];
|
2017-02-15 11:32:49 -05:00
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
struct hsa_kfd_perf_context
|
|
|
|
|
{
|
|
|
|
|
HsaCounterProperties **counter_props;
|
|
|
|
|
unsigned int counter_props_count;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct hsa_kfd_perf_context *hsakmt_kfdcontext_get_perf_context(HsaKFDContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
assert(ctx);
|
|
|
|
|
if (!ctx) {
|
|
|
|
|
pr_err("Expected a non-null ptr for HsaKFDContext");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->perf_context)
|
|
|
|
|
return ctx->perf_context;
|
|
|
|
|
|
|
|
|
|
ctx->perf_context = calloc(1, sizeof(struct hsa_kfd_perf_context));
|
|
|
|
|
if (!ctx->perf_context) {
|
|
|
|
|
pr_err("Alloc memory failed for struct hsa_kfd_perf_context size %zu\n",
|
|
|
|
|
sizeof(struct hsa_kfd_perf_context));
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ctx->perf_context;
|
|
|
|
|
}
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-15 11:32:49 -05:00
|
|
|
static ssize_t readn(int fd, void *buf, size_t n)
|
|
|
|
|
{
|
|
|
|
|
size_t left = n;
|
|
|
|
|
ssize_t bytes;
|
|
|
|
|
|
|
|
|
|
while (left) {
|
|
|
|
|
bytes = read(fd, buf, left);
|
2017-02-23 18:26:23 -05:00
|
|
|
if (!bytes) /* reach EOF */
|
2017-02-15 11:32:49 -05:00
|
|
|
return (n - left);
|
2017-04-20 08:25:00 -04:00
|
|
|
if (bytes < 0) {
|
2017-02-23 18:26:23 -05:00
|
|
|
if (errno == EINTR) /* read got interrupted */
|
|
|
|
|
continue;
|
|
|
|
|
else
|
|
|
|
|
return -errno;
|
|
|
|
|
}
|
2017-02-15 11:32:49 -05:00
|
|
|
left -= bytes;
|
|
|
|
|
buf = VOID_PTR_ADD(buf, bytes);
|
|
|
|
|
}
|
|
|
|
|
return n;
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
HSAKMT_STATUS hsakmt_init_counter_props(HsaKFDContext *ctx, unsigned int NumNodes)
|
2016-01-14 17:07:28 -05:00
|
|
|
{
|
2026-01-30 09:42:25 +08:00
|
|
|
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
|
|
|
|
perf_ctx->counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
|
|
|
|
|
if (!perf_ctx->counter_props) {
|
2017-07-04 11:51:17 -04:00
|
|
|
pr_warn("Profiling is not available.\n");
|
2016-01-14 17:07:28 -05:00
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
2017-07-04 11:51:17 -04:00
|
|
|
}
|
2016-01-14 17:07:28 -05:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
perf_ctx->counter_props_count = NumNodes;
|
2017-02-03 12:07:57 -05:00
|
|
|
|
2017-07-04 11:51:17 -04:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2016-01-14 17:07:28 -05:00
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
void hsakmt_destroy_counter_props(HsaKFDContext *ctx)
|
2016-01-14 17:07:28 -05:00
|
|
|
{
|
|
|
|
|
unsigned int i;
|
2026-01-30 09:42:25 +08:00
|
|
|
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
2016-01-14 17:07:28 -05:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (!perf_ctx->counter_props)
|
2016-01-14 17:07:28 -05:00
|
|
|
return;
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
for (i = 0; i < perf_ctx->counter_props_count; i++)
|
|
|
|
|
if (perf_ctx->counter_props[i]) {
|
|
|
|
|
free(perf_ctx->counter_props[i]);
|
|
|
|
|
perf_ctx->counter_props[i] = NULL;
|
2016-01-14 17:07:28 -05:00
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
free(perf_ctx->counter_props);
|
2016-01-14 17:07:28 -05:00
|
|
|
}
|
2015-12-01 14:41:59 -05:00
|
|
|
|
2014-07-29 13:59:07 +03:00
|
|
|
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
|
|
|
|
|
{
|
2017-02-03 09:59:21 -05:00
|
|
|
int rc = 0;
|
2017-02-03 12:07:57 -05:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
switch (block_id) {
|
2017-04-24 16:26:50 -04:00
|
|
|
case PERFCOUNTER_BLOCKID__CB:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_CB;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__CPF:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_CPF;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__CPG:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_CPG;
|
|
|
|
|
break;
|
2017-06-05 12:19:54 -04:00
|
|
|
case PERFCOUNTER_BLOCKID__DB:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_DB;
|
|
|
|
|
break;
|
2017-04-24 16:26:50 -04:00
|
|
|
case PERFCOUNTER_BLOCKID__GDS:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_GDS;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__GRBM:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_GRBM;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__GRBMSE:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_GRBMSE;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__IA:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_IA;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__MC:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_MC;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__PASC:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_PASC;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__PASU:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_PASU;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__SPI:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_SPI;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__SRBM:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_SRBM;
|
|
|
|
|
break;
|
2017-02-03 09:59:21 -05:00
|
|
|
case PERFCOUNTER_BLOCKID__SQ:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
|
|
|
|
|
break;
|
2017-04-24 16:26:50 -04:00
|
|
|
case PERFCOUNTER_BLOCKID__SX:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_SX;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__TA:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TA;
|
|
|
|
|
break;
|
2017-03-03 14:36:51 -05:00
|
|
|
case PERFCOUNTER_BLOCKID__TCA:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TCA;
|
|
|
|
|
break;
|
2017-04-24 16:26:50 -04:00
|
|
|
case PERFCOUNTER_BLOCKID__TCC:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TCC;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__TCP:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TCP;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__TCS:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TCS;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__TD:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_TD;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__VGT:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_VGT;
|
|
|
|
|
break;
|
|
|
|
|
case PERFCOUNTER_BLOCKID__WD:
|
|
|
|
|
*uuid = HSA_PROFILEBLOCK_AMD_WD;
|
|
|
|
|
break;
|
2017-02-03 09:59:21 -05:00
|
|
|
default:
|
|
|
|
|
/* If we reach this point, it's a bug */
|
|
|
|
|
rc = -1;
|
2017-02-03 12:07:57 -05:00
|
|
|
break;
|
2017-02-03 09:59:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rc;
|
2014-07-29 13:59:07 +03:00
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
static HSAuint32 get_block_concurrent_limit(struct hsa_kfd_perf_context *perf_ctx,
|
|
|
|
|
uint32_t node_id,
|
2017-02-06 15:06:32 -05:00
|
|
|
HSAuint32 block_id)
|
|
|
|
|
{
|
|
|
|
|
uint32_t i;
|
2026-01-30 09:42:25 +08:00
|
|
|
HsaCounterBlockProperties *block = &perf_ctx->counter_props[node_id]->Blocks[0];
|
2017-02-06 15:06:32 -05:00
|
|
|
|
2017-08-30 17:03:22 -04:00
|
|
|
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
|
|
|
|
if (block->Counters[0].BlockIndex == block_id)
|
|
|
|
|
return block->NumConcurrent;
|
|
|
|
|
block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
|
|
|
|
|
}
|
2017-02-06 15:06:32 -05:00
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 18:26:23 -05:00
|
|
|
static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
|
2017-04-20 08:25:00 -04:00
|
|
|
uint32_t cmd)
|
2017-02-23 18:26:23 -05:00
|
|
|
{
|
|
|
|
|
uint32_t i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < block->num_counters; i++) {
|
|
|
|
|
if (block->perf_event_fd[i] < 0)
|
|
|
|
|
return HSAKMT_STATUS_UNAVAILABLE;
|
|
|
|
|
if (ioctl(block->perf_event_fd[i], cmd, NULL))
|
|
|
|
|
return HSAKMT_STATUS_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
|
|
|
|
|
{
|
|
|
|
|
struct perf_counts_values content;
|
|
|
|
|
|
|
|
|
|
if (fd < 0)
|
|
|
|
|
return HSAKMT_STATUS_ERROR;
|
2017-04-20 08:25:00 -04:00
|
|
|
if (readn(fd, &content, sizeof(content)) != sizeof(content))
|
2017-02-23 18:26:23 -05:00
|
|
|
return HSAKMT_STATUS_ERROR;
|
|
|
|
|
|
|
|
|
|
*buf = content.val;
|
|
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterPropertiesCtx(HsaKFDContext *ctx,
|
|
|
|
|
HSAuint32 NodeId,
|
2017-04-20 08:25:00 -04:00
|
|
|
HsaCounterProperties **CounterProperties)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-03 09:59:21 -05:00
|
|
|
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
|
|
|
|
|
uint32_t gpu_id, i, block_id;
|
|
|
|
|
uint32_t counter_props_size = 0;
|
|
|
|
|
uint32_t total_counters = 0;
|
|
|
|
|
uint32_t total_concurrent = 0;
|
|
|
|
|
struct perf_counter_block block = {0};
|
2017-02-03 12:07:57 -05:00
|
|
|
uint32_t total_blocks = 0;
|
2017-08-30 17:03:22 -04:00
|
|
|
HsaCounterBlockProperties *block_prop;
|
2026-01-30 09:42:25 +08:00
|
|
|
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (!perf_ctx->counter_props)
|
2016-01-14 17:07:28 -05:00
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
if (!CounterProperties)
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (perf_ctx->counter_props[NodeId]) {
|
|
|
|
|
*CounterProperties = perf_ctx->counter_props[NodeId];
|
2017-02-03 12:07:57 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
2026-01-30 09:42:25 +08:00
|
|
|
rc = hsakmt_get_block_properties(ctx, NodeId, i, &block);
|
2017-02-03 12:07:57 -05:00
|
|
|
if (rc != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return rc;
|
|
|
|
|
total_concurrent += block.num_of_slots;
|
|
|
|
|
total_counters += block.num_of_counters;
|
|
|
|
|
/* If num_of_slots=0, this block doesn't exist */
|
|
|
|
|
if (block.num_of_slots)
|
|
|
|
|
total_blocks++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
counter_props_size = sizeof(HsaCounterProperties) +
|
2017-08-30 17:03:22 -04:00
|
|
|
sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
|
|
|
|
|
sizeof(HsaCounter) * (total_counters - total_blocks);
|
2017-02-03 12:07:57 -05:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
perf_ctx->counter_props[NodeId] = malloc(counter_props_size);
|
|
|
|
|
if (!perf_ctx->counter_props[NodeId])
|
2017-02-03 12:07:57 -05:00
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
perf_ctx->counter_props[NodeId]->NumBlocks = total_blocks;
|
|
|
|
|
perf_ctx->counter_props[NodeId]->NumConcurrent = total_concurrent;
|
2017-02-03 12:07:57 -05:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
block_prop = &perf_ctx->counter_props[NodeId]->Blocks[0];
|
2017-02-03 12:07:57 -05:00
|
|
|
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
|
2026-01-30 09:42:25 +08:00
|
|
|
rc = hsakmt_get_block_properties(ctx, NodeId, block_id, &block);
|
2017-02-03 12:07:57 -05:00
|
|
|
if (rc != HSAKMT_STATUS_SUCCESS) {
|
2026-01-30 09:42:25 +08:00
|
|
|
free(perf_ctx->counter_props[NodeId]);
|
|
|
|
|
perf_ctx->counter_props[NodeId] = NULL;
|
2017-02-03 12:07:57 -05:00
|
|
|
return rc;
|
2017-02-03 09:59:21 -05:00
|
|
|
}
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 12:07:57 -05:00
|
|
|
if (!block.num_of_slots) /* not a valid block */
|
|
|
|
|
continue;
|
|
|
|
|
|
2017-08-30 17:03:22 -04:00
|
|
|
blockid2uuid(block_id, &block_prop->BlockId);
|
|
|
|
|
block_prop->NumCounters = block.num_of_counters;
|
|
|
|
|
block_prop->NumConcurrent = block.num_of_slots;
|
2017-02-03 12:07:57 -05:00
|
|
|
for (i = 0; i < block.num_of_counters; i++) {
|
2017-08-30 17:03:22 -04:00
|
|
|
block_prop->Counters[i].BlockIndex = block_id;
|
|
|
|
|
block_prop->Counters[i].CounterId = block.counter_ids[i];
|
|
|
|
|
block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits;
|
|
|
|
|
block_prop->Counters[i].CounterMask = block.counter_mask;
|
|
|
|
|
block_prop->Counters[i].Flags.ui32.Global = 1;
|
2023-09-12 15:27:07 -04:00
|
|
|
block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
|
2017-02-03 09:59:21 -05:00
|
|
|
}
|
2017-08-30 17:03:22 -04:00
|
|
|
|
|
|
|
|
block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
|
2017-02-03 09:59:21 -05:00
|
|
|
}
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
*CounterProperties = perf_ctx->counter_props[NodeId];
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
/* Registers a set of (HW) counters to be used for tracing/profiling */
|
2026-01-30 09:42:25 +08:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTraceCtx(HsaKFDContext* ctx,
|
|
|
|
|
HSAuint32 NodeId,
|
2017-04-20 08:25:00 -04:00
|
|
|
HSAuint32 NumberOfCounters,
|
|
|
|
|
HsaCounter *Counters,
|
|
|
|
|
HsaPmcTraceRoot *TraceRoot)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-06 15:06:32 -05:00
|
|
|
uint32_t gpu_id, i, j;
|
2017-02-03 09:59:21 -05:00
|
|
|
uint64_t min_buf_size = 0;
|
|
|
|
|
struct perf_trace *trace = NULL;
|
2017-02-06 15:06:32 -05:00
|
|
|
uint32_t concurrent_limit;
|
|
|
|
|
const uint32_t MAX_COUNTERS = 512;
|
2025-01-06 13:15:30 -05:00
|
|
|
|
|
|
|
|
/* Declare performance counter ID 2D array as a contiguous block */
|
|
|
|
|
uint64_t *counter_id = malloc(
|
|
|
|
|
PERFCOUNTER_BLOCKID__MAX * MAX_COUNTERS * sizeof(uint64_t));
|
2017-02-06 15:06:32 -05:00
|
|
|
uint32_t num_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
|
|
|
|
|
uint32_t block, num_blocks = 0, total_counters = 0;
|
|
|
|
|
uint64_t *counter_id_ptr;
|
2017-02-15 11:32:49 -05:00
|
|
|
int *fd_ptr;
|
2026-01-30 09:42:25 +08:00
|
|
|
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);
|
|
|
|
|
|
2025-01-06 13:15:30 -05:00
|
|
|
if (counter_id == NULL) {
|
|
|
|
|
pr_err("Failed to allocate memory for counter_id. Requested %zu bytes.\n",
|
|
|
|
|
PERFCOUNTER_BLOCKID__MAX * MAX_COUNTERS * sizeof(uint64_t));
|
2016-01-14 17:07:28 -05:00
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
2025-01-06 13:15:30 -05:00
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (!perf_ctx->counter_props) {
|
2025-01-06 13:15:30 -05:00
|
|
|
pr_err("Profiling is not available, counter_props is NULL.\n");
|
|
|
|
|
goto no_memory_exit;
|
|
|
|
|
}
|
2016-01-14 17:07:28 -05:00
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
if (!Counters || !TraceRoot || NumberOfCounters == 0)
|
2025-01-06 13:15:30 -05:00
|
|
|
goto invalid_parameter_exit;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
|
2025-01-06 13:15:30 -05:00
|
|
|
free(counter_id);
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
2025-01-06 13:15:30 -05:00
|
|
|
}
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-06 15:06:32 -05:00
|
|
|
if (NumberOfCounters > MAX_COUNTERS) {
|
2025-01-06 13:15:30 -05:00
|
|
|
pr_err("MAX_COUNTERS is too small for %d.\n", NumberOfCounters);
|
|
|
|
|
goto no_memory_exit;
|
2017-02-06 15:06:32 -05:00
|
|
|
}
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
/* Calculating the minimum buffer size */
|
|
|
|
|
for (i = 0; i < NumberOfCounters; i++) {
|
|
|
|
|
if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
|
2025-01-06 13:15:30 -05:00
|
|
|
goto invalid_parameter_exit;
|
2017-02-06 15:06:32 -05:00
|
|
|
/* Only privileged counters need to register */
|
|
|
|
|
if (Counters[i].Type > HSA_PROFILE_TYPE_PRIVILEGED_STREAMING)
|
|
|
|
|
continue;
|
2017-02-03 09:59:21 -05:00
|
|
|
min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
|
2017-02-06 15:06:32 -05:00
|
|
|
/* j: the first blank entry in the block to record counter_id */
|
|
|
|
|
j = num_counters[Counters[i].BlockIndex];
|
2025-01-06 13:15:30 -05:00
|
|
|
/* Make sure counter_id stays within bounds */
|
|
|
|
|
if (j >= MAX_COUNTERS) {
|
|
|
|
|
pr_err("Counter ID exceeded MAX_COUNTERS for block %d.\n",
|
|
|
|
|
Counters[i].BlockIndex);
|
|
|
|
|
goto invalid_parameter_exit;
|
|
|
|
|
}
|
|
|
|
|
/* Initialize counter_id */
|
|
|
|
|
counter_id[Counters[i].BlockIndex * MAX_COUNTERS + j] = Counters[i].CounterId;
|
2017-02-06 15:06:32 -05:00
|
|
|
num_counters[Counters[i].BlockIndex]++;
|
|
|
|
|
total_counters++;
|
2017-02-03 09:59:21 -05:00
|
|
|
}
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-06 15:06:32 -05:00
|
|
|
/* Verify that the number of counters per block is not larger than the
|
|
|
|
|
* number of slots.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
|
|
|
|
if (!num_counters[i])
|
|
|
|
|
continue;
|
2026-01-30 09:42:25 +08:00
|
|
|
concurrent_limit = get_block_concurrent_limit(perf_ctx, NodeId, i);
|
2017-02-06 15:06:32 -05:00
|
|
|
if (!concurrent_limit) {
|
2017-06-27 16:42:18 -04:00
|
|
|
pr_err("Invalid block ID: %d\n", i);
|
2025-01-06 13:15:30 -05:00
|
|
|
goto invalid_parameter_exit;
|
2017-02-06 15:06:32 -05:00
|
|
|
}
|
|
|
|
|
if (num_counters[i] > concurrent_limit) {
|
2017-06-27 16:42:18 -04:00
|
|
|
pr_err("Counters exceed the limit.\n");
|
2025-01-06 13:15:30 -05:00
|
|
|
goto invalid_parameter_exit;
|
2017-02-06 15:06:32 -05:00
|
|
|
}
|
|
|
|
|
num_blocks++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!num_blocks)
|
2025-01-06 13:15:30 -05:00
|
|
|
goto invalid_parameter_exit;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-06 15:06:32 -05:00
|
|
|
/* Now we have sorted blocks/counters information in
|
|
|
|
|
* num_counters[block_id] and counter_id[block_id][]. Allocate trace
|
|
|
|
|
* and record the information.
|
|
|
|
|
*/
|
|
|
|
|
trace = (struct perf_trace *)calloc(sizeof(struct perf_trace)
|
|
|
|
|
+ sizeof(struct perf_trace_block) * num_blocks
|
2017-02-15 11:32:49 -05:00
|
|
|
+ sizeof(uint64_t) * total_counters
|
|
|
|
|
+ sizeof(int) * total_counters,
|
2017-02-06 15:06:32 -05:00
|
|
|
1);
|
2025-01-06 13:15:30 -05:00
|
|
|
if (!trace) {
|
|
|
|
|
pr_err("Failed to allocate memory for trace. Requested %zu bytes.\n",
|
|
|
|
|
sizeof(struct perf_trace)
|
|
|
|
|
+ sizeof(struct perf_trace_block) * num_blocks
|
|
|
|
|
+ sizeof(uint64_t) * total_counters
|
|
|
|
|
+ sizeof(int) * total_counters);
|
|
|
|
|
goto no_memory_exit;
|
|
|
|
|
}
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-15 11:32:49 -05:00
|
|
|
/* Allocated area is partitioned as:
|
|
|
|
|
* +---------------------------------+ trace
|
|
|
|
|
* | perf_trace |
|
|
|
|
|
* |---------------------------------| trace->blocks[0]
|
|
|
|
|
* | perf_trace_block 0 |
|
|
|
|
|
* | .... |
|
|
|
|
|
* | perf_trace_block N-1 | trace->blocks[N-1]
|
|
|
|
|
* |---------------------------------| <-- counter_id_ptr starts here
|
|
|
|
|
* | block 0's counter IDs(uint64_t) |
|
|
|
|
|
* | ...... |
|
|
|
|
|
* | block N-1's counter IDs |
|
|
|
|
|
* |---------------------------------| <-- perf_event_fd starts here
|
|
|
|
|
* | block 0's perf_event_fds(int) |
|
|
|
|
|
* | ...... |
|
|
|
|
|
* | block N-1's perf_event_fds |
|
|
|
|
|
* +---------------------------------+
|
|
|
|
|
*/
|
2017-02-06 15:06:32 -05:00
|
|
|
block = 0;
|
|
|
|
|
counter_id_ptr = (uint64_t *)((char *)
|
|
|
|
|
trace + sizeof(struct perf_trace)
|
|
|
|
|
+ sizeof(struct perf_trace_block) * num_blocks);
|
2017-02-15 11:32:49 -05:00
|
|
|
fd_ptr = (int *)(counter_id_ptr + total_counters);
|
2017-02-06 15:06:32 -05:00
|
|
|
/* Fill in each block's information to the TraceId */
|
|
|
|
|
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
|
|
|
|
if (!num_counters[i]) /* not a block to trace */
|
|
|
|
|
continue;
|
|
|
|
|
/* Following perf_trace + perf_trace_block x N are those
|
|
|
|
|
* counter_id arrays. Assign the counter_id array belonging to
|
|
|
|
|
* this block.
|
|
|
|
|
*/
|
|
|
|
|
trace->blocks[block].counter_id = counter_id_ptr;
|
|
|
|
|
/* Fill in counter IDs to the counter_id array. */
|
|
|
|
|
for (j = 0; j < num_counters[i]; j++)
|
2025-01-06 13:15:30 -05:00
|
|
|
trace->blocks[block].counter_id[j] = counter_id[i * MAX_COUNTERS + j];
|
2017-02-15 11:32:49 -05:00
|
|
|
trace->blocks[block].perf_event_fd = fd_ptr;
|
2017-02-06 15:06:32 -05:00
|
|
|
/* how many counters to trace */
|
|
|
|
|
trace->blocks[block].num_counters = num_counters[i];
|
|
|
|
|
/* block index in "enum perf_block_id" */
|
|
|
|
|
trace->blocks[block].block_id = i;
|
|
|
|
|
block++; /* move to next */
|
|
|
|
|
counter_id_ptr += num_counters[i];
|
2017-02-15 11:32:49 -05:00
|
|
|
fd_ptr += num_counters[i];
|
2017-02-06 15:06:32 -05:00
|
|
|
}
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace->magic4cc = HSA_PERF_MAGIC4CC;
|
|
|
|
|
trace->gpu_id = gpu_id;
|
|
|
|
|
trace->state = PERF_TRACE_STATE__STOPPED;
|
2017-02-06 15:06:32 -05:00
|
|
|
trace->num_blocks = num_blocks;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
TraceRoot->NumberOfPasses = 1;
|
|
|
|
|
TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
|
|
|
|
|
TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2024-09-03 08:29:35 -04:00
|
|
|
free(trace);
|
2025-01-06 13:15:30 -05:00
|
|
|
free(counter_id);
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2025-01-06 13:15:30 -05:00
|
|
|
|
|
|
|
|
no_memory_exit:
|
|
|
|
|
free(counter_id);
|
|
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
|
|
|
|
|
|
|
|
|
invalid_parameter_exit:
|
|
|
|
|
free(counter_id);
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
/* Unregisters a set of (HW) counters used for tracing/profiling */
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTraceCtx(HsaKFDContext* ctx,
|
|
|
|
|
HSAuint32 NodeId,
|
2017-04-20 08:25:00 -04:00
|
|
|
HSATraceId TraceId)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-03 09:59:21 -05:00
|
|
|
uint32_t gpu_id;
|
|
|
|
|
struct perf_trace *trace;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (TraceId == 0)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->gpu_id != gpu_id)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
/* If the trace is in the running state, stop it */
|
|
|
|
|
if (trace->state == PERF_TRACE_STATE__STARTED) {
|
|
|
|
|
HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
|
2017-04-20 08:25:00 -04:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (status != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return status;
|
|
|
|
|
}
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
free(trace);
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccessCtx(HsaKFDContext* ctx,
|
|
|
|
|
HSAuint32 NodeId,
|
2017-04-20 08:25:00 -04:00
|
|
|
HSATraceId TraceId)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-03 09:59:21 -05:00
|
|
|
struct perf_trace *trace;
|
2017-02-15 11:32:49 -05:00
|
|
|
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
2023-09-12 15:27:07 -04:00
|
|
|
uint32_t gpu_id;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (TraceId == 0)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2026-01-30 09:42:25 +08:00
|
|
|
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
2017-02-15 11:32:49 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
|
|
|
|
|
HSATraceId TraceId)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-03 09:59:21 -05:00
|
|
|
struct perf_trace *trace;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (TraceId == 0)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-15 11:32:49 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
/* Starts tracing operation on a previously established set of performance counters */
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStartTrace(HSATraceId TraceId,
|
|
|
|
|
void *TraceBuffer,
|
|
|
|
|
HSAuint64 TraceBufferSizeBytes)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-23 18:26:23 -05:00
|
|
|
struct perf_trace *trace =
|
|
|
|
|
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
|
|
|
|
uint32_t i;
|
|
|
|
|
int32_t j;
|
|
|
|
|
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
if (TraceId == 0 || !TraceBuffer || TraceBufferSizeBytes == 0)
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-23 18:26:23 -05:00
|
|
|
for (i = 0; i < trace->num_blocks; i++) {
|
|
|
|
|
ret = perf_trace_ioctl(&trace->blocks[i],
|
|
|
|
|
PERF_EVENT_IOC_ENABLE);
|
|
|
|
|
if (ret != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (ret != HSAKMT_STATUS_SUCCESS) {
|
|
|
|
|
/* Disable enabled blocks before returning the failure. */
|
|
|
|
|
j = (int32_t)i;
|
|
|
|
|
while (--j >= 0)
|
|
|
|
|
perf_trace_ioctl(&trace->blocks[j],
|
|
|
|
|
PERF_EVENT_IOC_DISABLE);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace->state = PERF_TRACE_STATE__STARTED;
|
2017-02-23 18:26:23 -05:00
|
|
|
trace->buf = TraceBuffer;
|
|
|
|
|
trace->buf_size = TraceBufferSizeBytes;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
/*Forces an update of all the counters that a previously started trace operation has registered */
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcQueryTrace(HSATraceId TraceId)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-23 18:26:23 -05:00
|
|
|
struct perf_trace *trace =
|
|
|
|
|
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
|
|
|
|
uint32_t i, j;
|
|
|
|
|
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
|
|
|
|
uint64_t *buf;
|
|
|
|
|
uint64_t buf_filled = 0;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (TraceId == 0)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-23 18:26:23 -05:00
|
|
|
buf = (uint64_t *)trace->buf;
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace buffer(%p): ", __func__, buf);
|
2017-02-23 18:26:23 -05:00
|
|
|
for (i = 0; i < trace->num_blocks; i++)
|
|
|
|
|
for (j = 0; j < trace->blocks[i].num_counters; j++) {
|
|
|
|
|
buf_filled += sizeof(uint64_t);
|
|
|
|
|
if (buf_filled > trace->buf_size)
|
|
|
|
|
return HSAKMT_STATUS_NO_MEMORY;
|
|
|
|
|
ret = query_trace(trace->blocks[i].perf_event_fd[j],
|
|
|
|
|
buf);
|
|
|
|
|
if (ret != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return ret;
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("%lu_", *buf);
|
2017-02-23 18:26:23 -05:00
|
|
|
buf++;
|
|
|
|
|
}
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("\n");
|
2017-02-23 18:26:23 -05:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-04-20 08:25:00 -04:00
|
|
|
/* Stops tracing operation on a previously established set of performance counters */
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
|
2014-07-29 11:16:00 +03:00
|
|
|
{
|
2017-02-23 18:26:23 -05:00
|
|
|
struct perf_trace *trace =
|
|
|
|
|
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
|
|
|
|
uint32_t i;
|
|
|
|
|
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-11-21 11:47:25 -05:00
|
|
|
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (TraceId == 0)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_PARAMETER;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
|
|
|
|
return HSAKMT_STATUS_INVALID_HANDLE;
|
2014-07-29 13:59:07 +03:00
|
|
|
|
2017-02-23 18:26:23 -05:00
|
|
|
for (i = 0; i < trace->num_blocks; i++) {
|
|
|
|
|
ret = perf_trace_ioctl(&trace->blocks[i],
|
|
|
|
|
PERF_EVENT_IOC_DISABLE);
|
|
|
|
|
if (ret != HSAKMT_STATUS_SUCCESS)
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-03 09:59:21 -05:00
|
|
|
trace->state = PERF_TRACE_STATE__STOPPED;
|
2014-07-29 11:16:00 +03:00
|
|
|
|
2017-02-23 18:26:23 -05:00
|
|
|
return ret;
|
2014-07-29 11:16:00 +03:00
|
|
|
}
|
2026-01-30 09:42:25 +08:00
|
|
|
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
|
|
|
|
HsaCounterProperties **CounterProperties)
|
|
|
|
|
{
|
|
|
|
|
return hsaKmtPmcGetCounterPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, CounterProperties);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
|
|
|
|
HSAuint32 NumberOfCounters,
|
|
|
|
|
HsaCounter *Counters,
|
|
|
|
|
HsaPmcTraceRoot *TraceRoot)
|
|
|
|
|
{
|
|
|
|
|
return hsaKmtPmcRegisterTraceCtx(&hsakmt_primary_kfd_ctx,
|
|
|
|
|
NodeId, NumberOfCounters, Counters, TraceRoot);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
|
|
|
|
|
HSATraceId TraceId)
|
|
|
|
|
{
|
|
|
|
|
return hsaKmtPmcUnregisterTraceCtx(&hsakmt_primary_kfd_ctx,
|
|
|
|
|
NodeId, TraceId);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
|
|
|
|
|
HSATraceId TraceId)
|
|
|
|
|
{
|
|
|
|
|
return hsaKmtPmcAcquireTraceAccessCtx(&hsakmt_primary_kfd_ctx,
|
|
|
|
|
NodeId, TraceId);
|
|
|
|
|
}
|