adding major and minor library versions
[ROCm/rocprofiler commit: 6560364df1]
This commit is contained in:
@@ -66,10 +66,19 @@
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define ROCPROFILER_VERSION_MAJOR 1
|
||||
#define ROCPROFILER_VERSION_MINOR 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returning library version
|
||||
|
||||
uint32_t rocprofiler_version_major();
|
||||
uint32_t rocprofiler_version_minor();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returning the error string method
|
||||
|
||||
@@ -305,6 +314,7 @@ typedef union {
|
||||
|
||||
// Profiling info data
|
||||
typedef struct {
|
||||
uint32_t agent_idx;
|
||||
rocprofiler_info_kind_t kind; // info data kind
|
||||
union {
|
||||
struct {
|
||||
@@ -327,20 +337,20 @@ typedef struct {
|
||||
|
||||
// Return the info for a given info kind
|
||||
hsa_status_t rocprofiler_get_info(
|
||||
hsa_agent_t agent, // GFXIP handle
|
||||
const hsa_agent_t* agent, // [in] GFXIP handle
|
||||
rocprofiler_info_kind_t kind, // kind of iterated info
|
||||
void *data); // [in/out] returned data
|
||||
|
||||
// Iterate over the info for a given info kind, and invoke an application-defined callback on every iteration
|
||||
hsa_status_t rocprofiler_iterate_info(
|
||||
hsa_agent_t agent, // GFXIP handle
|
||||
const hsa_agent_t* agent, // [in] GFXIP handle
|
||||
rocprofiler_info_kind_t kind, // kind of iterated info
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
|
||||
void *data); // [in/out] data passed to callback
|
||||
|
||||
// Iterate over the info for a given info query, and invoke an application-defined callback on every iteration
|
||||
hsa_status_t rocprofiler_query_info(
|
||||
hsa_agent_t agent, // GFXIP handle
|
||||
const hsa_agent_t *agent, // [in] GFXIP handle
|
||||
rocprofiler_info_query_t query, // iterated info query
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
|
||||
void *data); // [in/out] data passed to callback
|
||||
|
||||
@@ -190,6 +190,7 @@ class Context {
|
||||
// Register input features to not duplicate by features referencing
|
||||
for (unsigned i = 0; i < info_count; ++i) {
|
||||
rocprofiler_feature_t* info = &info_array[i];
|
||||
if (!info->name) EXC_RAISING(HSA_STATUS_ERROR, "input feature name is NULL");
|
||||
info_map_[info->name] = info;
|
||||
}
|
||||
|
||||
|
||||
@@ -199,8 +199,10 @@ class MetricsDict {
|
||||
for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) {
|
||||
std::ostringstream full_name;
|
||||
full_name << name << '[' << block_index << ']';
|
||||
std::ostringstream block_insance;
|
||||
block_insance << block_name << "[" << block_index << "]";
|
||||
std::ostringstream alias;
|
||||
alias << block_name << "[" << block_index << "]:" << event_str;
|
||||
alias << block_insance.str() << ":" << event_str;
|
||||
const counter_t counter = {full_name.str(), {block_id, block_index, event_id}};
|
||||
AddMetric(full_name.str(), alias.str(), counter);
|
||||
}
|
||||
@@ -211,8 +213,10 @@ class MetricsDict {
|
||||
}
|
||||
} else {
|
||||
xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_));
|
||||
#if 0
|
||||
std::cout << "# " << descr << std::endl;
|
||||
std::cout << name << "=" << expr_obj->String() << "\n" << std::endl;
|
||||
#endif
|
||||
counters_vec_t counters_vec;
|
||||
for (const std::string var : expr_obj->GetVars()) {
|
||||
auto it = cache_.find(var);
|
||||
|
||||
@@ -153,6 +153,10 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
|
||||
// HSA-runtime tool on-unload method
|
||||
PUBLIC_API void OnUnload() { rocprofiler::RestoreHsaApi(); }
|
||||
|
||||
// Returns library vesrion
|
||||
PUBLIC_API uint32_t rocprofiler_version_major() { return ROCPROFILER_VERSION_MAJOR; }
|
||||
PUBLIC_API uint32_t rocprofiler_version_minor() { return ROCPROFILER_VERSION_MINOR; }
|
||||
|
||||
// Returns the last error message
|
||||
PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) {
|
||||
API_METHOD_PREFIX
|
||||
@@ -320,16 +324,17 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(
|
||||
|
||||
// Return the info for a given info kind
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_info(
|
||||
hsa_agent_t agent,
|
||||
const hsa_agent_t *agent,
|
||||
rocprofiler_info_kind_t kind,
|
||||
void *data)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
if (agent == NULL) EXC_RAISING(HSA_STATUS_ERROR, "NULL agent");
|
||||
uint32_t* result_32bit_ptr = reinterpret_cast<uint32_t*>(data);
|
||||
|
||||
switch (kind) {
|
||||
case ROCPROFILER_INFO_KIND_METRIC_COUNT:
|
||||
*result_32bit_ptr = rocprofiler::GetMetrics(agent)->Size();
|
||||
*result_32bit_ptr = rocprofiler::GetMetrics(*agent)->Size();
|
||||
break;
|
||||
case ROCPROFILER_INFO_KIND_TRACE_COUNT:
|
||||
*result_32bit_ptr = 1;
|
||||
@@ -342,44 +347,63 @@ PUBLIC_API hsa_status_t rocprofiler_get_info(
|
||||
|
||||
// Iterate over the info for a given info kind, and invoke an application-defined callback on every iteration
|
||||
PUBLIC_API hsa_status_t rocprofiler_iterate_info(
|
||||
hsa_agent_t agent,
|
||||
const hsa_agent_t* agent,
|
||||
rocprofiler_info_kind_t kind,
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data),
|
||||
void *data)
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void* data),
|
||||
void* data)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
|
||||
rocprofiler_info_data_t info{};
|
||||
info.kind = kind;
|
||||
uint32_t agent_idx = 0;
|
||||
uint32_t agent_max = 0;
|
||||
const rocprofiler::util::AgentInfo* agent_info = NULL;
|
||||
|
||||
switch (kind) {
|
||||
case ROCPROFILER_INFO_KIND_METRIC:
|
||||
{
|
||||
const rocprofiler::MetricsDict* dict = rocprofiler::GetMetrics(agent);
|
||||
rocprofiler::MetricsDict::const_iterator_t it = dict->Begin();
|
||||
rocprofiler::MetricsDict::const_iterator_t end = dict->End();
|
||||
while (it != end) {
|
||||
const rocprofiler::Metric* metric = it->second;
|
||||
std::string name = metric->GetName();
|
||||
const auto* expr = metric->GetExpr();
|
||||
std::string description = "Performance metric " + name + " " + ((expr == NULL) ? "basic" : "= " + expr->String());
|
||||
info.metric.name = strdup(name.c_str());
|
||||
info.metric.description = strdup(description.c_str());
|
||||
status = callback(info, data);
|
||||
if (agent != NULL) {
|
||||
agent_info = hsa_rsrc->GetAgentInfo(*agent);
|
||||
agent_idx = agent_info->dev_index;
|
||||
agent_max = agent_idx + 1;
|
||||
}
|
||||
|
||||
++it;
|
||||
while (hsa_rsrc->GetGpuAgentInfo(agent_idx, &agent_info)) {
|
||||
info.agent_idx = agent_idx;
|
||||
|
||||
switch (kind) {
|
||||
case ROCPROFILER_INFO_KIND_METRIC:
|
||||
{
|
||||
const rocprofiler::MetricsDict* dict = rocprofiler::GetMetrics(agent_info->dev_id);
|
||||
rocprofiler::MetricsDict::const_iterator_t it = dict->Begin();
|
||||
rocprofiler::MetricsDict::const_iterator_t end = dict->End();
|
||||
while (it != end) {
|
||||
const rocprofiler::Metric* metric = it->second;
|
||||
std::string name = metric->GetName();
|
||||
//std::string descr = metric->GetDescr();
|
||||
const auto* expr = metric->GetExpr();
|
||||
std::string description = "Performance metric " + name + " " + ((expr == NULL) ? "basic" : "= " + expr->String());
|
||||
info.metric.name = strdup(name.c_str());
|
||||
info.metric.description = strdup(description.c_str());
|
||||
status = callback(info, data);
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
++it;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ROCPROFILER_INFO_KIND_TRACE:
|
||||
{
|
||||
info.trace.name = strdup("TT");
|
||||
info.trace.description = strdup("Thread Trace");
|
||||
info.trace.parameter_count = 5;
|
||||
status = callback(info, data);
|
||||
if (status != HSA_STATUS_SUCCESS) break;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "unknown info kind(" << kind << ")");
|
||||
}
|
||||
case ROCPROFILER_INFO_KIND_TRACE:
|
||||
{
|
||||
info.trace.name = strdup("TT");
|
||||
info.trace.description = strdup("Thread Trace");
|
||||
info.trace.parameter_count = 5;
|
||||
status = callback(info, data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "unknown info kind(" << kind << ")");
|
||||
|
||||
++agent_idx;
|
||||
if (agent_idx == agent_max) break;
|
||||
}
|
||||
|
||||
if (status == HSA_STATUS_INFO_BREAK) status = HSA_STATUS_SUCCESS;
|
||||
@@ -390,7 +414,7 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_info(
|
||||
|
||||
// Iterate over the info for a given info query, and invoke an application-defined callback on every iteration
|
||||
PUBLIC_API hsa_status_t rocprofiler_query_info(
|
||||
hsa_agent_t agent,
|
||||
const hsa_agent_t *agent,
|
||||
rocprofiler_info_query_t query,
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data),
|
||||
void *data)
|
||||
|
||||
@@ -91,6 +91,8 @@ class Logger {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging) {
|
||||
message_[GetTid()] = "";
|
||||
} else if (streaming_) {
|
||||
Put("\n");
|
||||
}
|
||||
messaging_ = messaging;
|
||||
streaming_ = messaging;
|
||||
|
||||
@@ -30,6 +30,7 @@ target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_R
|
||||
target_link_libraries( ${EXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/*.xml ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "mkdir -p ${PROJECT_BINARY_DIR}/RESULTS" )
|
||||
|
||||
## Build test library
|
||||
set ( TEST_LIB "tool" )
|
||||
|
||||
@@ -322,6 +322,11 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
return status;
|
||||
}
|
||||
|
||||
static hsa_status_t info_callback(const rocprofiler_info_data_t info, void * arg) {
|
||||
printf(" gpu-agent%d.%s : %s\n", info.agent_idx, info.metric.name, info.metric.description);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
CONSTRUCTOR_API void constructor()
|
||||
{
|
||||
@@ -337,6 +342,11 @@ CONSTRUCTOR_API void constructor()
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] =
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
|
||||
|
||||
if (getenv("ROCP_INFO") != NULL) {
|
||||
rocprofiler_iterate_info(NULL, ROCPROFILER_INFO_KIND_METRIC, info_callback, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
// Set output file
|
||||
result_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
if (result_prefix != NULL) {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
<metric name=SQ_CYCLES,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],CPC_ALWAYS_COUNT,CPC_ME1_STALL_WAIT_ON_RCIU_READ,GPU_BUSY,MEM_BUSY,SFETCH_INSTS,VALU_INSTS,FETCH_SIZE,VWRITE_INSTS ></metric>
|
||||
<metric name=SQ_CYCLES,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],CPC_ALWAYS_COUNT,CPC_ME1_STALL_WAIT_ON_RCIU_READ,GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
|
||||
></metric>
|
||||
|
||||
<trace name=SQTT copy=true >
|
||||
<parameters
|
||||
|
||||
@@ -19,10 +19,8 @@
|
||||
<metric name=SQ_WAIT_INST_LDS block=SQ event=61 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
|
||||
<metric name=SQ_ACTIVE_INST_VALU block=SQ event=69 descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic)"></metric>
|
||||
<metric name=SQ_INST_CYCLES_SALU block=SQ event=86 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
|
||||
|
||||
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=89 ></metric>
|
||||
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=90 ></metric>
|
||||
|
||||
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=89 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
|
||||
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=90 descr="Maximum number of thread-cycles VALU operations that could have been executed given the instruction mix (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd, emulated)"></metric>
|
||||
<metric name=SQ_LDS_BANK_CONFLICT block=SQ event=97 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
|
||||
|
||||
<metric name=TA_BUSY block=TA event=15 ></metric>
|
||||
@@ -83,10 +81,8 @@
|
||||
<metric name=SQ_WAIT_INST_LDS block=SQ event=63 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
|
||||
<metric name=SQ_ACTIVE_INST_VALU block=SQ event=71 descr="regspec 71? Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic)"></metric>
|
||||
<metric name=SQ_INST_CYCLES_SALU block=SQ event=84 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
|
||||
|
||||
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=85 ></metric>
|
||||
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=86 ></metric>
|
||||
|
||||
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=85 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
|
||||
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=86 descr="Maximum number of thread-cycles VALU operations that could have been executed given the instruction mix (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd, emulated)"></metric>
|
||||
<metric name=SQ_LDS_BANK_CONFLICT block=SQ event=93 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
|
||||
|
||||
<metric name=TA_BUSY block=TA event=15 ></metric>
|
||||
@@ -95,11 +91,16 @@
|
||||
|
||||
<metric name=TCC_CYCLE block=TCC event=1 ></metric>
|
||||
<metric name=TCC_REQ block=TCC event=3 ></metric>
|
||||
<metric name=TCC_HIT block=TCC event=17 ></metric>
|
||||
<metric name=TCC_MISS block=TCC event=19 ></metric>
|
||||
<metric name=TCC_WRITEBACK block=TCC event=22 ></metric>
|
||||
<metric name=TCC_EA_RDREQ block=TCC event=36 ></metric>
|
||||
<metric name=TCC_EA_RDREQ_32B block=TCC event=37 ></metric>
|
||||
<metric name=TCC_HIT block=TCC event=20 ></metric>
|
||||
<metric name=TCC_MISS block=TCC event=22 ></metric>
|
||||
<metric name=TCC_WRITEBACK block=TCC event=25 ></metric>
|
||||
<metric name=TCC_EA_WRREQ block=TCC event=29 ></metric>
|
||||
<metric name=TCC_EA_WRREQ_64B block=TCC event=30 ></metric>
|
||||
<metric name=TCC_EA_WRREQ_STALL block=TCC event=33 ></metric>
|
||||
<metric name=TCC_EA_RDREQ block=TCC event=41 ></metric>
|
||||
<metric name=TCC_EA_RDREQ_32B block=TCC event=42 ></metric>
|
||||
|
||||
<metric name=TCP_TA_DATA_STALL_CYCLES block=TCP event=6 descr="TCP stalls TA data interface. Now Windowed."></metric>
|
||||
|
||||
<metric name=CPC_ALWAYS_COUNT block=CPC event=0 ></metric>
|
||||
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ block=CPC event=8 ></metric>
|
||||
@@ -127,7 +128,7 @@
|
||||
# GPU_BUSY, percentage
|
||||
# The percentage of time GPU was busy.
|
||||
<metric
|
||||
name=GPU_BUSY
|
||||
name="GPUBusy"
|
||||
expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT
|
||||
descr="The percentage of time GPU was busy."
|
||||
></metric>
|
||||
@@ -169,7 +170,7 @@
|
||||
|
||||
# VWriteInsts The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory.
|
||||
<metric
|
||||
name=VWriteInsts
|
||||
name="VWriteInsts"
|
||||
expr=(SQ_INSTS_VMEM_WR-TA_FLAT_WRITE_WAVEFRONTS_sum)/SQ_WAVES
|
||||
descr="The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory."
|
||||
></metric>
|
||||
@@ -219,7 +220,7 @@
|
||||
# SALUBusy The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
|
||||
<metric
|
||||
name="SALUBusy"
|
||||
expr=100*SQ_INST_CYCLES_SALU*4/(NUM_SIMDS/NUM_SHADER_ENGINES)/GRBM_GUI_ACTIVE
|
||||
expr=100*SQ_INST_CYCLES_SALU*4/NUM_SIMDS/GRBM_GUI_ACTIVE
|
||||
descr="The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
|
||||
></metric>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user