adding major and minor library versions

[ROCm/rocprofiler commit: 6560364df1]
This commit is contained in:
Evgeny
2018-02-07 09:41:17 -06:00
parent 4b7e5f51da
commit 6960dcd77e
9 changed files with 107 additions and 53 deletions
+13 -3
View File
@@ -66,10 +66,19 @@
#include <hsa_ven_amd_aqlprofile.h>
#include <stdint.h>
#define ROCPROFILER_VERSION_MAJOR 1
#define ROCPROFILER_VERSION_MINOR 0
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Returning library version
uint32_t rocprofiler_version_major();
uint32_t rocprofiler_version_minor();
////////////////////////////////////////////////////////////////////////////////
// Returning the error string method
@@ -305,6 +314,7 @@ typedef union {
// Profiling info data
typedef struct {
uint32_t agent_idx;
rocprofiler_info_kind_t kind; // info data kind
union {
struct {
@@ -327,20 +337,20 @@ typedef struct {
// Return the info for a given info kind
hsa_status_t rocprofiler_get_info(
hsa_agent_t agent, // GFXIP handle
const hsa_agent_t* agent, // [in] GFXIP handle
rocprofiler_info_kind_t kind, // kind of iterated info
void *data); // [in/out] returned data
// Iterate over the info for a given info kind, and invoke an application-defined callback on every iteration
hsa_status_t rocprofiler_iterate_info(
hsa_agent_t agent, // GFXIP handle
const hsa_agent_t* agent, // [in] GFXIP handle
rocprofiler_info_kind_t kind, // kind of iterated info
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
void *data); // [in/out] data passed to callback
// Iterate over the info for a given info query, and invoke an application-defined callback on every iteration
hsa_status_t rocprofiler_query_info(
hsa_agent_t agent, // GFXIP handle
const hsa_agent_t *agent, // [in] GFXIP handle
rocprofiler_info_query_t query, // iterated info query
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
void *data); // [in/out] data passed to callback
+1
View File
@@ -190,6 +190,7 @@ class Context {
// Register input features to not duplicate by features referencing
for (unsigned i = 0; i < info_count; ++i) {
rocprofiler_feature_t* info = &info_array[i];
if (!info->name) EXC_RAISING(HSA_STATUS_ERROR, "input feature name is NULL");
info_map_[info->name] = info;
}
+5 -1
View File
@@ -199,8 +199,10 @@ class MetricsDict {
for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) {
std::ostringstream full_name;
full_name << name << '[' << block_index << ']';
std::ostringstream block_insance;
block_insance << block_name << "[" << block_index << "]";
std::ostringstream alias;
alias << block_name << "[" << block_index << "]:" << event_str;
alias << block_insance.str() << ":" << event_str;
const counter_t counter = {full_name.str(), {block_id, block_index, event_id}};
AddMetric(full_name.str(), alias.str(), counter);
}
@@ -211,8 +213,10 @@ class MetricsDict {
}
} else {
xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_));
#if 0
std::cout << "# " << descr << std::endl;
std::cout << name << "=" << expr_obj->String() << "\n" << std::endl;
#endif
counters_vec_t counters_vec;
for (const std::string var : expr_obj->GetVars()) {
auto it = cache_.find(var);
+56 -32
View File
@@ -153,6 +153,10 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
// HSA-runtime tool on-unload method
PUBLIC_API void OnUnload() { rocprofiler::RestoreHsaApi(); }
// Returns library vesrion
PUBLIC_API uint32_t rocprofiler_version_major() { return ROCPROFILER_VERSION_MAJOR; }
PUBLIC_API uint32_t rocprofiler_version_minor() { return ROCPROFILER_VERSION_MINOR; }
// Returns the last error message
PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) {
API_METHOD_PREFIX
@@ -320,16 +324,17 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(
// Return the info for a given info kind
PUBLIC_API hsa_status_t rocprofiler_get_info(
hsa_agent_t agent,
const hsa_agent_t *agent,
rocprofiler_info_kind_t kind,
void *data)
{
API_METHOD_PREFIX
if (agent == NULL) EXC_RAISING(HSA_STATUS_ERROR, "NULL agent");
uint32_t* result_32bit_ptr = reinterpret_cast<uint32_t*>(data);
switch (kind) {
case ROCPROFILER_INFO_KIND_METRIC_COUNT:
*result_32bit_ptr = rocprofiler::GetMetrics(agent)->Size();
*result_32bit_ptr = rocprofiler::GetMetrics(*agent)->Size();
break;
case ROCPROFILER_INFO_KIND_TRACE_COUNT:
*result_32bit_ptr = 1;
@@ -342,44 +347,63 @@ PUBLIC_API hsa_status_t rocprofiler_get_info(
// Iterate over the info for a given info kind, and invoke an application-defined callback on every iteration
PUBLIC_API hsa_status_t rocprofiler_iterate_info(
hsa_agent_t agent,
const hsa_agent_t* agent,
rocprofiler_info_kind_t kind,
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data),
void *data)
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void* data),
void* data)
{
API_METHOD_PREFIX
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
rocprofiler_info_data_t info{};
info.kind = kind;
uint32_t agent_idx = 0;
uint32_t agent_max = 0;
const rocprofiler::util::AgentInfo* agent_info = NULL;
switch (kind) {
case ROCPROFILER_INFO_KIND_METRIC:
{
const rocprofiler::MetricsDict* dict = rocprofiler::GetMetrics(agent);
rocprofiler::MetricsDict::const_iterator_t it = dict->Begin();
rocprofiler::MetricsDict::const_iterator_t end = dict->End();
while (it != end) {
const rocprofiler::Metric* metric = it->second;
std::string name = metric->GetName();
const auto* expr = metric->GetExpr();
std::string description = "Performance metric " + name + " " + ((expr == NULL) ? "basic" : "= " + expr->String());
info.metric.name = strdup(name.c_str());
info.metric.description = strdup(description.c_str());
status = callback(info, data);
if (agent != NULL) {
agent_info = hsa_rsrc->GetAgentInfo(*agent);
agent_idx = agent_info->dev_index;
agent_max = agent_idx + 1;
}
++it;
while (hsa_rsrc->GetGpuAgentInfo(agent_idx, &agent_info)) {
info.agent_idx = agent_idx;
switch (kind) {
case ROCPROFILER_INFO_KIND_METRIC:
{
const rocprofiler::MetricsDict* dict = rocprofiler::GetMetrics(agent_info->dev_id);
rocprofiler::MetricsDict::const_iterator_t it = dict->Begin();
rocprofiler::MetricsDict::const_iterator_t end = dict->End();
while (it != end) {
const rocprofiler::Metric* metric = it->second;
std::string name = metric->GetName();
//std::string descr = metric->GetDescr();
const auto* expr = metric->GetExpr();
std::string description = "Performance metric " + name + " " + ((expr == NULL) ? "basic" : "= " + expr->String());
info.metric.name = strdup(name.c_str());
info.metric.description = strdup(description.c_str());
status = callback(info, data);
if (status != HSA_STATUS_SUCCESS) break;
++it;
}
break;
}
break;
case ROCPROFILER_INFO_KIND_TRACE:
{
info.trace.name = strdup("TT");
info.trace.description = strdup("Thread Trace");
info.trace.parameter_count = 5;
status = callback(info, data);
if (status != HSA_STATUS_SUCCESS) break;
break;
}
default:
EXC_RAISING(HSA_STATUS_ERROR, "unknown info kind(" << kind << ")");
}
case ROCPROFILER_INFO_KIND_TRACE:
{
info.trace.name = strdup("TT");
info.trace.description = strdup("Thread Trace");
info.trace.parameter_count = 5;
status = callback(info, data);
break;
}
default:
EXC_RAISING(HSA_STATUS_ERROR, "unknown info kind(" << kind << ")");
++agent_idx;
if (agent_idx == agent_max) break;
}
if (status == HSA_STATUS_INFO_BREAK) status = HSA_STATUS_SUCCESS;
@@ -390,7 +414,7 @@ PUBLIC_API hsa_status_t rocprofiler_iterate_info(
// Iterate over the info for a given info query, and invoke an application-defined callback on every iteration
PUBLIC_API hsa_status_t rocprofiler_query_info(
hsa_agent_t agent,
const hsa_agent_t *agent,
rocprofiler_info_query_t query,
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data),
void *data)
+2
View File
@@ -91,6 +91,8 @@ class Logger {
std::lock_guard<mutex_t> lck(mutex_);
if (messaging) {
message_[GetTid()] = "";
} else if (streaming_) {
Put("\n");
}
messaging_ = messaging;
streaming_ = messaging;
+1
View File
@@ -30,6 +30,7 @@ target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_R
target_link_libraries( ${EXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/*.xml ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "mkdir -p ${PROJECT_BINARY_DIR}/RESULTS" )
## Build test library
set ( TEST_LIB "tool" )
+10
View File
@@ -322,6 +322,11 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
return status;
}
static hsa_status_t info_callback(const rocprofiler_info_data_t info, void * arg) {
printf(" gpu-agent%d.%s : %s\n", info.agent_idx, info.metric.name, info.metric.description);
return HSA_STATUS_SUCCESS;
}
// Tool constructor
CONSTRUCTOR_API void constructor()
{
@@ -337,6 +342,11 @@ CONSTRUCTOR_API void constructor()
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
if (getenv("ROCP_INFO") != NULL) {
rocprofiler_iterate_info(NULL, ROCPROFILER_INFO_KIND_METRIC, info_callback, NULL);
return;
}
// Set output file
result_prefix = getenv("ROCP_OUTPUT_DIR");
if (result_prefix != NULL) {
+2 -1
View File
@@ -1,4 +1,5 @@
<metric name=SQ_CYCLES,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],CPC_ALWAYS_COUNT,CPC_ME1_STALL_WAIT_ON_RCIU_READ,GPU_BUSY,MEM_BUSY,SFETCH_INSTS,VALU_INSTS,FETCH_SIZE,VWRITE_INSTS ></metric>
<metric name=SQ_CYCLES,SQ_WAVES,SQ_INSTS_SMEM,SQ_INSTS_VALU,TA_FLAT_WRITE_WAVEFRONTS[0],TA_FLAT_WRITE_WAVEFRONTS[1],CPC_ALWAYS_COUNT,CPC_ME1_STALL_WAIT_ON_RCIU_READ,GPUBusy,VALUBusy,SALUBusy,MemUnitBusy,SFetchInsts,FetchSize,VWriteInsts,WriteSize
></metric>
<trace name=SQTT copy=true >
<parameters
+17 -16
View File
@@ -19,10 +19,8 @@
<metric name=SQ_WAIT_INST_LDS block=SQ event=61 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metric name=SQ_ACTIVE_INST_VALU block=SQ event=69 descr="Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic)"></metric>
<metric name=SQ_INST_CYCLES_SALU block=SQ event=86 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=89 ></metric>
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=90 ></metric>
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=89 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=90 descr="Maximum number of thread-cycles VALU operations that could have been executed given the instruction mix (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd, emulated)"></metric>
<metric name=SQ_LDS_BANK_CONFLICT block=SQ event=97 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metric name=TA_BUSY block=TA event=15 ></metric>
@@ -83,10 +81,8 @@
<metric name=SQ_WAIT_INST_LDS block=SQ event=63 descr="Number of wave-cycles spent waiting for LDS instruction issue. In units of 4 cycles. (per-simd, nondeterministic)"></metric>
<metric name=SQ_ACTIVE_INST_VALU block=SQ event=71 descr="regspec 71? Number of cycles the SQ instruction arbiter is working on a VALU instruction. (per-simd, nondeterministic)"></metric>
<metric name=SQ_INST_CYCLES_SALU block=SQ event=84 descr="Number of cycles needed to execute non-memory read scalar operations. (per-simd, emulated)"></metric>
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=85 ></metric>
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=86 ></metric>
<metric name=SQ_THREAD_CYCLES_VALU block=SQ event=85 descr="Number of thread-cycles used to execute VALU operations (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd)"></metric>
<metric name=SQ_THREAD_CYCLES_VALU_MAX block=SQ event=86 descr="Maximum number of thread-cycles VALU operations that could have been executed given the instruction mix (similar to INST_CYCLES_VALU but multiplied by # of active threads). (per-simd, emulated)"></metric>
<metric name=SQ_LDS_BANK_CONFLICT block=SQ event=93 descr="Number of cycles LDS is stalled by bank conflicts. (emulated)"></metric>
<metric name=TA_BUSY block=TA event=15 ></metric>
@@ -95,11 +91,16 @@
<metric name=TCC_CYCLE block=TCC event=1 ></metric>
<metric name=TCC_REQ block=TCC event=3 ></metric>
<metric name=TCC_HIT block=TCC event=17 ></metric>
<metric name=TCC_MISS block=TCC event=19 ></metric>
<metric name=TCC_WRITEBACK block=TCC event=22 ></metric>
<metric name=TCC_EA_RDREQ block=TCC event=36 ></metric>
<metric name=TCC_EA_RDREQ_32B block=TCC event=37 ></metric>
<metric name=TCC_HIT block=TCC event=20 ></metric>
<metric name=TCC_MISS block=TCC event=22 ></metric>
<metric name=TCC_WRITEBACK block=TCC event=25 ></metric>
<metric name=TCC_EA_WRREQ block=TCC event=29 ></metric>
<metric name=TCC_EA_WRREQ_64B block=TCC event=30 ></metric>
<metric name=TCC_EA_WRREQ_STALL block=TCC event=33 ></metric>
<metric name=TCC_EA_RDREQ block=TCC event=41 ></metric>
<metric name=TCC_EA_RDREQ_32B block=TCC event=42 ></metric>
<metric name=TCP_TA_DATA_STALL_CYCLES block=TCP event=6 descr="TCP stalls TA data interface. Now Windowed."></metric>
<metric name=CPC_ALWAYS_COUNT block=CPC event=0 ></metric>
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ block=CPC event=8 ></metric>
@@ -127,7 +128,7 @@
# GPU_BUSY, percentage
# The percentage of time GPU was busy.
<metric
name=GPU_BUSY
name="GPUBusy"
expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT
descr="The percentage of time GPU was busy."
></metric>
@@ -169,7 +170,7 @@
# VWriteInsts The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory.
<metric
name=VWriteInsts
name="VWriteInsts"
expr=(SQ_INSTS_VMEM_WR-TA_FLAT_WRITE_WAVEFRONTS_sum)/SQ_WAVES
descr="The average number of vector write instructions to the video memory executed per work-item (affected by flow control). Excludes FLAT instructions that write to video memory."
></metric>
@@ -219,7 +220,7 @@
# SALUBusy The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
<metric
name="SALUBusy"
expr=100*SQ_INST_CYCLES_SALU*4/(NUM_SIMDS/NUM_SHADER_ENGINES)/GRBM_GUI_ACTIVE
expr=100*SQ_INST_CYCLES_SALU*4/NUM_SIMDS/GRBM_GUI_ACTIVE
descr="The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
></metric>