adding dispatch_packet field to rocprofiler_callback_data_t
Change-Id: I3cbe256130fa23dd24b094f27286d566ed339230
[ROCm/rocprofiler commit: b8fc3818de]
Этот коммит содержится в:
@@ -67,7 +67,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#define ROCPROFILER_VERSION_MAJOR 1
|
||||
#define ROCPROFILER_VERSION_MINOR 0
|
||||
#define ROCPROFILER_VERSION_MINOR 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -237,7 +237,7 @@ typedef struct {
|
||||
uint32_t agent_index; // GPU index
|
||||
const hsa_queue_t* queue; // HSA queue
|
||||
uint64_t queue_index; // Index in the queue
|
||||
uint64_t kernel_object; // Kernel object handle
|
||||
const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
|
||||
const char* kernel_name; // Kernel name
|
||||
const rocprofiler_dispatch_record_t* record; // Dispatch record
|
||||
} rocprofiler_callback_data_t;
|
||||
|
||||
@@ -101,7 +101,7 @@ class InterceptQueue {
|
||||
obj->agent_info_->dev_index,
|
||||
obj->queue_,
|
||||
user_que_idx,
|
||||
dispatch_packet->kernel_object,
|
||||
dispatch_packet,
|
||||
kernel_name,
|
||||
record};
|
||||
hsa_status_t status = dispatch_callback_(&data, callback_data_, &group);
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
# FETCH_SIZE, kilobytes
|
||||
# The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
|
||||
<metric name="FETCH_SIZE" expr=(TCC_MC_RDREQ_sum*32)/1024 ></metric>
|
||||
# WRITE_SIZE
|
||||
# WRITE_SIZE, kilobytes
|
||||
# The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
|
||||
<metric name="WRITE_SIZE" expr=(TCC_MC_WRREQ_sum*32)/1024 ></metric>
|
||||
</gfx8>
|
||||
@@ -41,7 +41,7 @@
|
||||
# FETCH_SIZE, kilobytes
|
||||
# The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
|
||||
<metric name="FETCH_SIZE" expr=(TCC_EA_RDREQ_32B_sum*32+(TCC_EA_RDREQ_sum-TCC_EA_RDREQ_32B_sum)*64)/1024 ></metric>
|
||||
# WRITE_SIZE
|
||||
# WRITE_SIZE, kilobytes
|
||||
# The total kilobytes written to the video memory. This is measured with all extra fetches and any cache or memory effects taken into account.
|
||||
<metric name="WRITE_SIZE" expr=((TCC_EA_WRREQ_sum-TCC_EA_WRREQ_64B_sum)*32+TCC_EA_WRREQ_64B_sum*64)/1024 ></metric>
|
||||
</gfx9>
|
||||
|
||||
Ссылка в новой задаче
Block a user