Implement Start/Stop/Query Trace

StartTrace and StopTrace send ioctl requests to enable/disable performance
counters. QueryTrace reads the counter from the perf_event fd.

Change-Id: Ibf79675bc23fcf129371bfd100f8e262121bc684


[ROCm/ROCR-Runtime commit: e17c67f049]
Cette révision appartient à :
Amber Lin
2017-02-23 18:26:23 -05:00
Parent e7d887e83b
révision 492c9623eb
+98 -11
Voir le fichier
@@ -56,6 +56,8 @@ struct perf_trace {
uint32_t gpu_id;
enum perf_trace_state state;
uint32_t num_blocks;
void *buf;
uint64_t buf_size;
struct perf_trace_block blocks[0];
};
@@ -92,10 +94,14 @@ static ssize_t readn(int fd, void *buf, size_t n)
while (left) {
bytes = read(fd, buf, left);
if (bytes < 0 && errno == EINTR) /* read got interrupted */
continue;
if (bytes <= 0)
if (!bytes) /* reach EOF */
return (n - left);
if (bytes < 0 ) {
if (errno == EINTR) /* read got interrupted */
continue;
else
return -errno;
}
left -= bytes;
buf = VOID_PTR_ADD(buf, bytes);
}
@@ -144,11 +150,9 @@ HSAKMT_STATUS init_counter_props(unsigned int NumNodes)
return HSAKMT_STATUS_NO_MEMORY;
counter_props_count = NumNodes;
alloc_pmc_blocks();
init_lockf_perf_section();
return HSAKMT_STATUS_SUCCESS;
return init_lockf_perf_section();
}
void destroy_counter_props(void)
@@ -214,7 +218,13 @@ static HSAKMT_STATUS update_block_slots(enum perf_trace_action action,
if (lockf(amd_hsa_thunk_lock_fd, F_TLOCK, sizeof(lockf_tbl)) < 0)
return HSAKMT_STATUS_ERROR;
if (readn(amd_hsa_thunk_lock_fd, &lockf_tbl, sizeof(lockf_tbl)) < 0) {
if (lseek(amd_hsa_thunk_lock_fd, 0, SEEK_SET)) {
ret = HSAKMT_STATUS_ERROR;
goto out;
}
if (readn(amd_hsa_thunk_lock_fd, &lockf_tbl, sizeof(lockf_tbl))
!= sizeof(lockf_tbl)) {
ret = HSAKMT_STATUS_ERROR;
goto out;
}
@@ -344,6 +354,34 @@ static HSAKMT_STATUS open_perf_event_fd(struct perf_trace_block *block)
return ret;
}
static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
uint32_t cmd)
{
uint32_t i;
for (i = 0; i < block->num_counters; i++) {
if (block->perf_event_fd[i] < 0)
return HSAKMT_STATUS_UNAVAILABLE;
if (ioctl(block->perf_event_fd[i], cmd, NULL))
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
{
struct perf_counts_values content;
if (fd < 0)
return HSAKMT_STATUS_ERROR;
if(readn(fd, &content, sizeof(content)) != sizeof(content))
return HSAKMT_STATUS_ERROR;
*buf = content.val;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterProperties(
@@ -706,7 +744,11 @@ hsaKmtPmcStartTrace(
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
)
{
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i;
int32_t j;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -714,7 +756,24 @@ hsaKmtPmcStartTrace(
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
for (i = 0; i < trace->num_blocks; i++) {
ret = perf_trace_ioctl(&trace->blocks[i],
PERF_EVENT_IOC_ENABLE);
if (ret != HSAKMT_STATUS_SUCCESS)
break;
}
if (ret != HSAKMT_STATUS_SUCCESS) {
/* Disable enabled blocks before returning the failure. */
j = (int32_t)i;
while (--j >= 0)
perf_trace_ioctl(&trace->blocks[j],
PERF_EVENT_IOC_DISABLE);
return ret;
}
trace->state = PERF_TRACE_STATE__STARTED;
trace->buf = TraceBuffer;
trace->buf_size = TraceBufferSizeBytes;
return HSAKMT_STATUS_SUCCESS;
}
@@ -730,7 +789,12 @@ hsaKmtPmcQueryTrace(
HSATraceId TraceId //IN
)
{
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i, j;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint64_t *buf;
uint64_t buf_filled = 0;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -738,6 +802,19 @@ hsaKmtPmcQueryTrace(
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
buf = (uint64_t *)trace->buf;
for (i = 0; i < trace->num_blocks; i++)
for (j = 0; j < trace->blocks[i].num_counters; j++) {
buf_filled += sizeof(uint64_t);
if (buf_filled > trace->buf_size)
return HSAKMT_STATUS_NO_MEMORY;
ret = query_trace(trace->blocks[i].perf_event_fd[j],
buf);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
buf++;
}
return HSAKMT_STATUS_SUCCESS;
}
@@ -752,7 +829,10 @@ hsaKmtPmcStopTrace(
HSATraceId TraceId //IN
)
{
struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -760,7 +840,14 @@ hsaKmtPmcStopTrace(
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
for (i = 0; i < trace->num_blocks; i++) {
ret = perf_trace_ioctl(&trace->blocks[i],
PERF_EVENT_IOC_DISABLE);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
}
trace->state = PERF_TRACE_STATE__STOPPED;
return HSAKMT_STATUS_SUCCESS;
return ret;
}