Change-Id: I48968966ffe164218ebff88d0e3a1268e96bf1dd
Этот коммит содержится в:
Evgeny
2017-06-23 17:54:27 -05:00
коммит произвёл Evgeny Shcherbakov
родитель c533229bc1
Коммит 4174f07fd1
120 изменённых файлов: 1300 добавлений и 918 удалений
+3 -3
Просмотреть файл
@@ -25,6 +25,6 @@ add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
#
# Style format
#
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
+10 -5
Просмотреть файл
@@ -6,9 +6,9 @@ Current library implementation supports only GFX9.
The library source tree:
- doc - Documantation, the API specification and the presentation
- inc - Public API
- hsa_ext_amd_aql_profile.h - AMD AQL profile library public API
- hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API
- src - AMD AQL profile library sources
- aqlprofile - AMD AQL profile library
- core - the library sources
- commandwriter - PM4 command writer originated from 'hsa-runtime/tools'
- perfcounter - PM4 perfcounter manager originated from 'hsa-runtime/tools'
- threadtrace - PM4 threadtrace manager originated from 'hsa-runtime/tools'
@@ -19,7 +19,7 @@ The library source tree:
To build the library:
$ cd .../hsa-ext-aql-profile
$ cd .../hsa-amd-aqlprofile
$ mkdir build
$ cd build
$ cmake ..
@@ -27,8 +27,9 @@ $ make
To run the test:
$ cd .../hsa-ext-aql-profile/build
$ test/ctrl
$ cd .../hsa-amd-aqlprofile/build
$ export LD_LIBRARY_PATH=$PWD
$ ./test/ctrl
To enable PMC profiling:
@@ -37,3 +38,7 @@ $ export ROCR_ENABLE_PMC=1
To enable SQTT profiling:
$ export ROCR_ENABLE_SQTT=1
Or to use the script:
$ ./run.sh
Двоичный файл не отображается.
+9 -5
Просмотреть файл
@@ -20,10 +20,9 @@ if ( NOT DEFINED PROJ_DIR )
set ( ROOT_DIR ${PROJ_DIR}/.. )
endif ()
set ( API_DIR ${ROOT_DIR}/inc )
set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../.. )
set ( HSA_RUNTIME_OSC_DIR ${HSA_RUNTIME_DIR}/opensrc/hsa-runtime )
set ( CORE_UTIL_DIR ${HSA_RUNTIME_OSC_DIR}/core/util )
set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../hsa-runtime )
set ( API_DIR ${HSA_RUNTIME_DIR}/inc )
set ( CORE_UTIL_DIR ${HSA_RUNTIME_DIR}/core/util )
include_directories ( ${ROOT_DIR} )
@@ -63,4 +62,9 @@ add_subdirectory ( ${PROJ_DIR}/perfcounter "${PROJECT_BINARY_DIR}/perfcounter" )
# libraries that have been built in this regard
#
set ( TARGET_LIB "${TARGET_NAME}${ONLY64STR}" )
add_subdirectory ( ${PROJ_DIR}/${TARGET_NAME} "${PROJECT_BINARY_DIR}/${TARGET_NAME}" )
add_subdirectory ( ${PROJ_DIR}/core "${PROJECT_BINARY_DIR}/core" )
#
# Creating the library link
#
execute_process ( COMMAND sh -xc "/bin/ln -s core/lib${TARGET_LIB}.so libhsa-amd-${TARGET_LIB}.so.1" )
@@ -1,6 +1,9 @@
#include <string>
#include "aql_profile.h"
#include <string>
#include <map>
#include <vector>
#include "pm4_factory.h"
#include "cmdwriter.h" // commandwriter
#include "perf_counter.h" // perfcounter
@@ -9,6 +12,14 @@
#include "logger.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define DESTRUCTOR_API __attribute__((destructor))
#define ERR_CHECK(cond, err, msg) \
{ \
if (cond) { \
ERR_LOGGING << msg; \
return err; \
} \
}
namespace aql_profile {
@@ -31,7 +42,7 @@ class CommandBufferMgr {
uint32_t align(const uint32_t& size) { return (size + align_mask) & ~align_mask; }
public:
CommandBufferMgr(const profile_t* profile)
explicit CommandBufferMgr(const profile_t* profile)
: buffer(profile->command_buffer), postfix_size(0), info(NULL) {
info = (info_t*)setPostfix(sizeof(info_t));
}
@@ -44,12 +55,16 @@ class CommandBufferMgr {
postfix_size = size;
buffer.size -= (delta < buffer.size) ? delta : buffer.size;
}
if (buffer.size == 0)
throw aql_profile_exc_msg("CommandBufferMgr::setPostfix(): buffer size set to zero");
return (buffer.size != 0) ? buffer.ptr + buffer.size : NULL;
}
bool setPreSize(const uint32_t& size) {
bool suc = (size <= buffer.size);
if (suc) info->precmds_size = size;
if (!suc)
throw aql_profile_exc_msg("CommandBufferMgr::setPreSize(): size set out of the buffer");
return suc;
}
@@ -62,6 +77,8 @@ class CommandBufferMgr {
info->postcmds_size = size - info->precmds_size;
suc = ((getPostOffset() + info->postcmds_size) <= buffer.size);
}
if (!suc)
throw aql_profile_exc_msg("CommandBufferMgr::checkTotalSize(): size set out of the buffer");
return suc;
}
@@ -80,19 +97,55 @@ class CommandBufferMgr {
}
};
static inline pm4_profile::CountersMap CountersMapCreate(const profile_t* profile,
const Pm4Factory* pm4_factory) {
pm4_profile::CountersMap countersMap;
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
p < profile->events + profile->event_count; ++p) {
countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id);
}
return countersMap;
}
typedef std::vector<const event_t*> EventsVec;
static inline EventsVec EventsVecCreate(const profile_t* profile, const Pm4Factory* pm4_factory) {
pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
std::map<uint32_t, const event_t*> id_map;
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
p < profile->events + profile->event_count; ++p) {
id_map.insert(decltype(id_map)::value_type(pm4_factory->getBlockId(p), p));
}
// Iterate through the list of blocks/counters to generate correct order events vector
EventsVec eventsVec;
for (pm4_profile::CountersMap::const_iterator block_it = countersMap.begin();
block_it != countersMap.end(); ++block_it) {
const uint32_t block_id = block_it->first;
const pm4_profile::CountersVec& counters = block_it->second;
const uint32_t counter_count = counters.size();
for (uint32_t ind = 0; ind < counter_count; ++ind) {
eventsVec.push_back(id_map[block_id] + ind);
}
}
return eventsVec;
}
static inline bool is_event_match(const event_t& event1, const event_t& event2) {
return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) &&
(event1.counter_id == event2.counter_id);
}
hsa_status_t default_pmcdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type,
hsa_ext_amd_aql_profile_info_data_t* info_data,
hsa_status_t default_pmcdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
hsa_ext_amd_aql_profile_info_data_t* passed_data =
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(callback_data);
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
if (info_type == HSA_EXT_AQL_PROFILE_INFO_PMC_DATA) {
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
if (is_event_match(info_data->pmc_data.event, passed_data->pmc_data.event)) {
if (passed_data->sample_id == UINT32_MAX) {
passed_data->pmc_data.result += info_data->pmc_data.result;
@@ -112,14 +165,14 @@ struct sqtt_ctrl_t {
uint32_t writePtr;
};
hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type,
hsa_ext_amd_aql_profile_info_data_t* info_data,
hsa_status_t default_sqttdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
hsa_ext_amd_aql_profile_info_data_t* passed_data =
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(callback_data);
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
if (info_type == HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA) {
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
if (info_data->sample_id == passed_data->sample_id) {
passed_data->sqtt_data = info_data->sqtt_data;
status = HSA_STATUS_INFO_BREAK;
@@ -129,83 +182,98 @@ hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_
return status;
}
Pm4Factory::tables_t Pm4Factory::tables;
std::mutex Logger::mutex;
Logger* Logger::instance = NULL;
std::mutex Pm4Factory::mutex;
Pm4Factory::instances_t Pm4Factory::instances;
DESTRUCTOR_API void destructor() {
Logger::Destroy();
Pm4Factory::Destroy();
}
} // aql_profile
extern "C" {
// Check if event is valid for the specific GPU
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_validate_event(
hsa_agent_t agent, const hsa_ext_amd_aql_profile_event_t* event, bool* result) {
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) {
*str = aql_profile::Logger::LastMessage().c_str();
return HSA_STATUS_SUCCESS;
}
// Method to populate the provided AQL packet with profiling start commands
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_start_packet) {
aql_profile::Logger logger;
// Check if event is valid for the specific GPU
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) {
hsa_status_t status = HSA_STATUS_SUCCESS;
*result = false;
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
if (pm4_factory->getBlockInfo(event) != NULL) *result = true;
} catch (aql_profile::event_exception& e) {
INFO_LOGGING << e.what();
} catch (std::exception& e) {
ERR_LOGGING << e.what();
status = HSA_STATUS_ERROR;
}
return status;
}
// Method to populate the provided AQL packet with profiling start commands
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(
const hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_start_packet) {
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
if (cmdWriter == NULL) return HSA_STATUS_ERROR;
pm4_profile::DefaultCmdBuf commands;
aql_profile::CommandBufferMgr cmdBufMgr(profile);
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) {
pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr();
if (pmcMgr == NULL) return HSA_STATUS_ERROR;
pm4_profile::CountersMap countersMap;
for (const hsa_ext_amd_aql_profile_event_t* p = profile->events;
p < profile->events + profile->event_count; ++p) {
countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id);
}
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
// Generate start commands
const pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
pmcMgr->begin(&commands, cmdWriter, countersMap);
cmdBufMgr.setPreSize(commands.Size());
// Generate stop commands
const uint32_t data_size =
pmcMgr->end(&commands, cmdWriter, countersMap, profile->output_buffer.ptr);
if (data_size == 0) return HSA_STATUS_ERROR;
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC mgr end(): data size set to zero");
assert(data_size <= profile->output_buffer.size);
if (data_size > profile->output_buffer.size) return HSA_STATUS_ERROR;
} else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) {
if (data_size > profile->output_buffer.size) {
ERR_LOGGING << "data size assertion failed, data_size(" << data_size << "), buffer size("
<< profile->output_buffer.size << ")";
return HSA_STATUS_ERROR;
}
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
if (sqttMgr == NULL) return HSA_STATUS_ERROR;
pm4_profile::ThreadTraceConfig sqtt_config;
sqttMgr->InitThreadTraceConfig(&sqtt_config);
if (profile->parameters) {
for (const hsa_ext_amd_aql_profile_parameters_t* p = profile->parameters;
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
p < (profile->parameters + profile->parameter_count); ++p) {
switch (p->parameter_name) {
case HSA_EXT_AQL_PROFILE_PARAM_COMPUTE_UNIT_TARGET:
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
sqtt_config.threadTraceTargetCu = p->value;
break;
case HSA_EXT_AQL_PROFILE_PARAM_VM_ID_MASK:
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
sqtt_config.threadTraceVmIdMask = p->value;
break;
case HSA_EXT_AQL_PROFILE_PARAM_MASK:
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
sqtt_config.threadTraceMask = p->value;
break;
case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK:
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
sqtt_config.threadTraceTokenMask = p->value;
break;
case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK2:
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
sqtt_config.threadTraceTokenMask2 = p->value;
break;
default:
ERR_LOGGING(logger) << "Bad SQTT parameter name (" << p->parameter_name << ")";
return HSA_STATUS_ERROR;
ERR_LOGGING << "Bad SQTT parameter name (" << p->parameter_name << ")";
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
}
@@ -213,10 +281,9 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
sqttMgr->setSqttDataBuff((uint8_t*)profile->output_buffer.ptr, profile->output_buffer.size);
// Control buffer registering
const uint32_t status_size = sqttMgr->StatusSizeInfo();
void* status_ptr = cmdBufMgr.setPostfix(status_size);
if (status_ptr == NULL) return HSA_STATUS_ERROR;
// Control buffer registering
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
// Generate start commands
@@ -224,10 +291,12 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
cmdBufMgr.setPreSize(commands.Size());
// Generate stop commands
sqttMgr->StopSession(&commands, cmdWriter);
} else
return HSA_STATUS_ERROR;
} else {
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
if (!cmdBufMgr.checkTotalSize(commands.Size())) return HSA_STATUS_ERROR;
cmdBufMgr.checkTotalSize(commands.Size());
const aql_profile::descriptor_t pre_descr = cmdBufMgr.getPreDescr();
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
@@ -236,7 +305,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
// Populate start aql packet
aql_profile::populateAql(pre_descr.ptr, pre_descr.size, cmdWriter, aql_start_packet);
} catch (std::exception& e) {
ERR_LOGGING(logger) << e.what();
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
}
@@ -244,25 +313,18 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
}
// Method to populate the provided AQL packet with profiling stop commands
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop(
const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_stop_packet) {
aql_profile::Logger logger;
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile,
aql_profile::packet_t* aql_stop_packet) {
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
if (cmdWriter == NULL) return HSA_STATUS_ERROR;
aql_profile::CommandBufferMgr cmdBufMgr(profile);
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
// Populate stop aql packet
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
aql_profile::populateAql(post_descr.ptr, post_descr.size, cmdWriter, aql_stop_packet);
} catch (std::exception& e) {
ERR_LOGGING(logger) << e.what();
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
}
@@ -271,9 +333,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop(
// Legacy devices, converting of the profiling AQL packet to PM4 packet blob
PUBLIC_API hsa_status_t
hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
aql_profile::Logger logger;
hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
try {
// Populate GFX8 pm4 packet blob
// Adding HSA barrier acquire packet
@@ -283,7 +343,7 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet,
// Adding HSA barrier release packet
data = aql_profile::legacyAqlRelease(aql_packet, data);
} catch (std::exception& e) {
ERR_LOGGING(logger) << e.what();
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
}
@@ -292,34 +352,33 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet,
// Method for getting the profile info
PUBLIC_API hsa_status_t
hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profile,
hsa_ext_amd_aql_profile_info_type_t attribute, void* value) {
hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) {
hsa_status_t status = HSA_STATUS_SUCCESS;
aql_profile::Logger logger;
try {
switch (attribute) {
case HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE:
case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE:
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
break;
case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE:
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE:
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
break;
case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA:
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(value)->pmc_data.result = 0;
status = hsa_ext_amd_aql_profile_iterate_data(profile,
aql_profile::default_pmcdata_callback, value);
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA:
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(value)->pmc_data.result = 0;
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::default_pmcdata_callback,
value);
break;
case HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA:
status = hsa_ext_amd_aql_profile_iterate_data(
profile, aql_profile::default_sqttdata_callback, value);
case HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA:
status = hsa_ven_amd_aqlprofile_iterate_data(profile,
aql_profile::default_sqttdata_callback, value);
break;
default:
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
ERR_LOGGING(logger) << "Invalid attribute (" << attribute << ")";
ERR_LOGGING << "Invalid attribute (" << attribute << ")";
}
} catch (std::exception& e) {
ERR_LOGGING(logger) << e.what();
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
}
@@ -328,29 +387,26 @@ hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profil
// Method for iterating the events output data
PUBLIC_API hsa_status_t
hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* profile,
hsa_ext_amd_aql_profile_data_callback_t callback, void* data) {
hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
aql_profile::Logger logger;
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) {
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
uint32_t info_size = 0;
void* info_data;
uint64_t* samples = (uint64_t*)profile->output_buffer.ptr;
const uint32_t sample_count = profile->output_buffer.size / sizeof(uint64_t);
uint32_t sample_index = 0;
pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr();
if (pmcMgr == NULL) return HSA_STATUS_ERROR;
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
for (const hsa_ext_amd_aql_profile_event_t* p = profile->events;
p < (profile->events + profile->event_count); ++p) {
const pm4_profile::GpuBlockInfo* block_info = pm4_factory->getBlockInfo(p);
if (block_info == NULL) return HSA_STATUS_ERROR;
aql_profile::EventsVec eventsVec = EventsVecCreate(profile, pm4_factory);
for (aql_profile::EventsVec::const_iterator it = eventsVec.begin(); it != eventsVec.end();
++it) {
const hsa_ven_amd_aqlprofile_event_t* p = *it;
const pm4_profile::CntlMethod method = pm4_factory->getBlockInfo(p)->method;
// A perfcounter data sample per ShaderEngine
const uint32_t block_samples_count = (method == pm4_profile::CntlMethodBySe ||
@@ -359,46 +415,52 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr
: 1;
for (uint32_t i = 0; i < block_samples_count; ++i) {
assert(sample_index < sample_count);
if (sample_index >= sample_count) return HSA_STATUS_ERROR;
if (sample_index >= sample_count) {
ERR_LOGGING << "Bad sample index (" << sample_index << "/" << sample_count << ")";
return HSA_STATUS_ERROR;
}
hsa_ext_amd_aql_profile_info_data_t sample_info;
hsa_ven_amd_aqlprofile_info_data_t sample_info;
sample_info.sample_id = i;
sample_info.pmc_data.event = *p;
sample_info.pmc_data.result = samples[sample_index];
status = callback(HSA_EXT_AQL_PROFILE_INFO_PMC_DATA, &sample_info, data);
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
if (status == HSA_STATUS_INFO_BREAK) {
status = HSA_STATUS_SUCCESS;
break;
}
if (status != HSA_STATUS_SUCCESS) {
ERR_LOGGING(logger) << "PMC data callback error, sample_id(" << i << ") status("
<< status << ")";
ERR_LOGGING << "PMC data callback error, sample_id(" << i << ") status(" << status
<< ")";
break;
}
++sample_index;
}
}
} else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) {
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
if (sqttMgr == NULL) return HSA_STATUS_ERROR;
aql_profile::CommandBufferMgr cmdBufMgr(profile);
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
const uint32_t status_size = sqttMgr->StatusSizeInfo();
// Control buffer was allocated as the CmdBuffer postfix partition
const uint32_t status_size = sqttMgr->StatusSizeInfo();
void* status_ptr = cmdBufMgr.setPostfix(status_size);
if (status_ptr == NULL) return HSA_STATUS_ERROR;
// Control buffer registering
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
// Validate SQTT status and normalize WRPTR
if (sqttMgr->Validate() == false) return HSA_STATUS_ERROR;
if (sqttMgr->Validate() == false) {
ERR_LOGGING << "SQTT data corrupted";
return HSA_STATUS_ERROR;
}
const uint32_t se_number = sqttMgr->getNumSe();
// Casting status pointer to SQTT control per ShaderEngine array
aql_profile::sqtt_ctrl_t* sqtt_ctrl = (aql_profile::sqtt_ctrl_t*)status_ptr;
assert(status_size == sizeof(aql_profile::sqtt_ctrl_t) * se_number);
if (status_size != sizeof(aql_profile::sqtt_ctrl_t) * se_number) {
const uint32_t status_size_exp = sizeof(aql_profile::sqtt_ctrl_t) * se_number;
assert(status_size == status_size_exp);
if (status_size != status_size_exp) {
ERR_LOGGING << "Bad SQTT controll data structure"
<< ", status_size(" << status_size << "), status_size_exp(" << status_size_exp
<< "), se_number(" << se_number << ")";
return HSA_STATUS_ERROR;
}
// SQTT output buffer and capacity per ShaderEngine
@@ -410,29 +472,29 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr
// written by hardware. The index is incremented by size of 32 bytes.
uint32_t sample_size = sqtt_ctrl[i].writePtr * TT_WRITE_PTR_BLK;
hsa_ext_amd_aql_profile_info_data_t sample_info;
hsa_ven_amd_aqlprofile_info_data_t sample_info;
sample_info.sample_id = i;
sample_info.sqtt_data.ptr = sample_ptr;
sample_info.sqtt_data.size = sample_size;
status = callback(HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA, &sample_info, data);
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA, &sample_info, data);
if (status == HSA_STATUS_INFO_BREAK) {
status = HSA_STATUS_SUCCESS;
break;
}
if (status != HSA_STATUS_SUCCESS) {
ERR_LOGGING(logger) << "SQTT data callback error, sample_id(" << i << ") status("
<< status << ")";
ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status
<< ")";
break;
}
sample_ptr += sample_capacity;
}
} else {
ERR_LOGGING(logger) << "Bad profile type (" << profile->type << ")";
status = HSA_STATUS_ERROR;
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
} catch (std::exception& e) {
ERR_LOGGING(logger) << e.what();
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
}
+40
Просмотреть файл
@@ -0,0 +1,40 @@
#ifndef _AQL_PROFILE_H_
#define _AQL_PROFILE_H_
#include <iostream>
#include <string>
#include "hsa_ven_amd_aqlprofile.h"
#include "aql_profile_exception.h"
namespace pm4_profile {
class CommandWriter;
}
namespace aql_profile {
typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t;
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t;
typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t;
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
typedef hsa_ven_amd_aqlprofile_event_t event_t;
void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer,
packet_t* aql_packet);
void* legacyAqlAcquire(const packet_t* aql_packet, void* data);
void* legacyAqlRelease(const packet_t* aql_packet, void* data);
void* legacyPm4(const packet_t* aql_packet, void* data);
class event_exception : public aql_profile_exc_val<event_t> {
public:
event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {}
};
static std::ostream& operator<<(std::ostream& os, const event_t& ev) {
os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id
<< "))";
return os;
}
} // namespace aql_profile
#endif // _AQL_PROFILE_H_
+34
Просмотреть файл
@@ -0,0 +1,34 @@
#ifndef _AQL_PROFILE_EXCEPTION_H_
#define _AQL_PROFILE_EXCEPTION_H_
#include <string.h>
#include <string>
#include <sstream>
namespace aql_profile {
class aql_profile_exc_msg : public std::exception {
public:
explicit aql_profile_exc_msg(const std::string& msg) : str(msg) {}
virtual const char* what() const throw() { return str.c_str(); }
protected:
std::string str;
};
template <typename T> class aql_profile_exc_val : public std::exception {
public:
aql_profile_exc_val(const std::string& msg, const T& val) {
std::ostringstream oss;
oss << msg << "(" << val << ")";
str = oss.str();
}
virtual const char* what() const throw() { return str.c_str(); }
protected:
std::string str;
};
} // namespace aql_profile
#endif // _AQL_PROFILE_EXCEPTION_H_
@@ -11,7 +11,7 @@
namespace aql_profile {
// GFX9 block ID mapping table
uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
uint32_t Gfx8Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
pm4_profile::kHsaViCounterBlockIdCb0, pm4_profile::kHsaViCounterBlockIdCpf,
pm4_profile::kHsaViCounterBlockIdDb0, pm4_profile::kHsaViCounterBlockIdGrbm,
pm4_profile::kHsaViCounterBlockIdGrbmSe, pm4_profile::kHsaViCounterBlockIdPaSu,
@@ -30,11 +30,21 @@ uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
pm4_profile::kHsaViCounterBlockIdCpc};
pm4_profile::CommandWriter* Gfx8Factory::getCommandWriter() {
return new pm4_profile::gfx8::Gfx8CmdWriter(false, true);
auto p = new pm4_profile::gfx8::Gfx8CmdWriter(false, true);
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
return p;
}
pm4_profile::Pmu* Gfx8Factory::getPmcMgr() { return new pm4_profile::Gfx8PerfCounter(); }
pm4_profile::PerfCounter* Gfx8Factory::getPmcMgr() {
auto p = new pm4_profile::Gfx8PerfCounter();
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
return p;
}
pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() { return new pm4_profile::Gfx8ThreadTrace(); }
pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() {
auto p = new pm4_profile::Gfx8ThreadTrace();
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
return p;
}
} // aql_profile
@@ -11,7 +11,7 @@
namespace aql_profile {
// GFX9 block ID mapping table
uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
uint32_t Gfx9Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
pm4_profile::kHsaAiCounterBlockIdCb0,
kBadBlockId /*CPF*/,
pm4_profile::kHsaAiCounterBlockIdDb0,
@@ -42,14 +42,24 @@ uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
pm4_profile::kHsaAiCounterBlockIdTcs,
pm4_profile::kHsaAiCounterBlockIdWd,
kBadBlockId /*CPG*/,
kBadBlockId /*CPC*/};
pm4_profile::kHsaAiCounterBlockIdCpc};
pm4_profile::CommandWriter* Gfx9Factory::getCommandWriter() {
return new pm4_profile::gfx9::Gfx9CmdWriter(false, true);
auto p = new pm4_profile::gfx9::Gfx9CmdWriter(false, true);
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
return p;
}
pm4_profile::Pmu* Gfx9Factory::getPmcMgr() { return new pm4_profile::Gfx9PerfCounter(); }
pm4_profile::PerfCounter* Gfx9Factory::getPmcMgr() {
auto p = new pm4_profile::Gfx9PerfCounter();
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
return p;
}
pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() { return new pm4_profile::Gfx9ThreadTrace(); }
pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() {
auto p = new pm4_profile::Gfx9ThreadTrace();
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
return p;
}
} // aql_profile
@@ -1,8 +1,9 @@
#include <string.h>
#include <assert.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <assert.h>
#include <string.h>
#include "aql_profile.h"
#include "amd_aql_pm4_ib_packet.h"
+137
Просмотреть файл
@@ -0,0 +1,137 @@
#ifndef _LOGGER_H_
#define _LOGGER_H_
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/file.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <exception>
#include <mutex>
#include <map>
namespace aql_profile {
class Logger {
public:
template <typename T> Logger& operator<<(const T& m) {
std::ostringstream oss;
oss << m;
if (!streaming)
log(oss.str());
else
put(oss.str());
streaming = true;
return *this;
}
typedef void (Logger::*manip_t)();
Logger& operator<<(manip_t f) {
(this->*f)();
return *this;
}
void begm() { messaging = true; }
void endl() { resetStreaming(); }
static const std::string& LastMessage() {
Logger& logger = Instance();
std::lock_guard<std::mutex> lck(mutex);
return logger.message[GetTid()];
}
static Logger& Instance() {
std::lock_guard<std::mutex> lck(mutex);
if (instance == NULL) instance = new Logger();
return *instance;
}
static void Destroy() {
std::lock_guard<std::mutex> lck(mutex);
if (instance != NULL) delete instance;
instance = NULL;
}
private:
static uint32_t GetPid() { return syscall(__NR_getpid); }
static uint32_t GetTid() { return syscall(__NR_gettid); }
Logger() : file(NULL), dirty(false), streaming(false), messaging(false) {
const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG");
if (path != NULL) {
file = fopen("/tmp/aql_profile_log.txt", "a");
}
resetStreaming();
}
~Logger() {
if (file != NULL) {
if (dirty) put("\n");
fclose(file);
}
}
void resetStreaming() {
std::lock_guard<std::mutex> lck(mutex);
if (messaging) {
message[GetTid()] = "";
}
messaging = false;
streaming = false;
}
void put(const std::string& m) {
std::lock_guard<std::mutex> lck(mutex);
if (messaging) {
message[GetTid()] += m;
}
if (file != NULL) {
dirty = true;
flock(fileno(file), LOCK_EX);
fprintf(file, "%s", m.c_str());
fflush(file);
flock(fileno(file), LOCK_UN);
}
}
void log(const std::string& m) {
const time_t rawtime = time(NULL);
tm tm_info;
localtime_r(&rawtime, &tm_info);
char tm_str[26];
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
std::ostringstream oss;
oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
put(oss.str());
}
FILE* file;
bool dirty;
bool streaming;
bool messaging;
static std::mutex mutex;
static Logger* instance;
std::map<uint32_t, std::string> message;
};
} // namespace aql_profile
#define ERR_LOGGING \
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
<< "Error: " << __FUNCTION__ \
<< "(): " << aql_profile::Logger::begm)
#define INFO_LOGGING \
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
<< "Info: " << __FUNCTION__ \
<< "(): " << aql_profile::Logger::begm)
#endif // _LOGGER_H_
+157
Просмотреть файл
@@ -0,0 +1,157 @@
#ifndef _PM4_FACTORY_H_
#define _PM4_FACTORY_H_
#include <string.h>
#include <assert.h>
#include <stdint.h>
#include <climits>
#include <map>
#include <mutex>
#include <string>
#include "aql_profile.h"
#include "gpu_block_info.h"
#include "aql_profile_exception.h"
namespace pm4_profile {
class CommandWriter;
class PerfCounter;
class ThreadTrace;
extern GpuBlockInfo Gfx9HwBlocks[];
extern const uint32_t Gfx9HwBlockCount;
extern GpuBlockInfo Gfx8HwBlocks[];
extern const uint32_t Gfx8HwBlockCount;
}
namespace aql_profile {
class BlockMap {
public:
typedef std::map<uint32_t, const pm4_profile::GpuBlockInfo*> map_t;
typedef map_t::const_iterator iter_t;
void init(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
if (block_map.size() == 0) fill(id_table, info_table, info_count);
}
const pm4_profile::GpuBlockInfo* get(const uint32_t& id) const {
iter_t it = block_map.find(id);
return (it != block_map.end()) ? it->second : NULL;
}
private:
void fill(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
map_t info_map;
for (uint32_t i = 0; i < info_count; ++i) {
const pm4_profile::GpuBlockInfo& entry = info_table[i];
info_map[entry.counterGroupId] = &entry;
}
for (uint32_t i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
iter_t it = info_map.find(id_table[i]);
if (it != info_map.end()) block_map[i] = it->second;
}
}
map_t block_map;
};
class Pm4Factory {
public:
enum { kBadBlockId = UINT_MAX };
static Pm4Factory* Create(const hsa_agent_t agent);
static Pm4Factory* Create(const profile_t* profile) { return Create(profile->agent); }
static void Destroy();
virtual pm4_profile::CommandWriter* getCommandWriter() = 0;
virtual pm4_profile::PerfCounter* getPmcMgr() = 0;
virtual pm4_profile::ThreadTrace* getSqttMgr() = 0;
const pm4_profile::GpuBlockInfo* getBlockInfo(const event_t* event) const {
const pm4_profile::GpuBlockInfo* info = block_map.get(event->block_name);
if (info == NULL) throw event_exception(std::string("Bad block, "), *event);
if (event->block_index >= info->maxInstanceCount)
throw event_exception(std::string("Bad block index, "), *event);
if (event->counter_id > info->maxEventId)
throw event_exception(std::string("Bad event ID, "), *event);
return info;
}
uint32_t getBlockId(const event_t* event) const {
return getBlockInfo(event)->counterGroupId + event->block_index;
}
protected:
explicit Pm4Factory(const BlockMap& map) : block_map(map) {}
virtual ~Pm4Factory() {}
private:
typedef std::map<std::string, Pm4Factory*> instances_t;
static std::mutex mutex;
static instances_t instances;
const BlockMap& block_map;
};
class Gfx8Factory : public Pm4Factory {
public:
Gfx8Factory() : Pm4Factory(block_map) {
block_map.init(block_id_table, pm4_profile::Gfx8HwBlocks, pm4_profile::Gfx8HwBlockCount);
}
pm4_profile::CommandWriter* getCommandWriter();
pm4_profile::PerfCounter* getPmcMgr();
pm4_profile::ThreadTrace* getSqttMgr();
private:
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
BlockMap block_map;
};
class Gfx9Factory : public Pm4Factory {
public:
Gfx9Factory() : Pm4Factory(block_map) {
block_map.init(block_id_table, pm4_profile::Gfx9HwBlocks, pm4_profile::Gfx9HwBlockCount);
}
pm4_profile::CommandWriter* getCommandWriter();
pm4_profile::PerfCounter* getPmcMgr();
pm4_profile::ThreadTrace* getSqttMgr();
private:
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
BlockMap block_map;
};
inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent) {
std::lock_guard<std::mutex> lck(mutex);
char agent_name[64];
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
instances_t::iterator it = instances.find(agent_name);
if (it == instances.end()) {
if (strncmp(agent_name, "gfx801", 6) == 0) {
throw aql_profile_exc_val<std::string>(std::string("GFX8 Carrizo is not supported "),
agent_name);
} else if (strncmp(agent_name, "gfx8", 4) == 0) {
it->second = new Gfx8Factory();
} else if (strncmp(agent_name, "gfx9", 4) == 0) {
it->second = new Gfx9Factory();
} else {
throw aql_profile_exc_val<std::string>("Unsupported GFXIP", agent_name);
}
}
if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory allocation failed");
return it->second;
}
inline void Pm4Factory::Destroy() {
std::lock_guard<std::mutex> lck(mutex);
for (auto it : instances) delete it.second;
instances.clear();
}
} // namespace aql_profile
#endif // _PM4_FACTORY_H_
@@ -1,7 +1,8 @@
#include <assert.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <assert.h>
#include "aql_profile.h"
#include "cmdwriter.h"
@@ -18,14 +18,6 @@ using namespace pm4_profile;
namespace pm4_profile {
static char errorString[][64] = {{"No error"},
{"unknow countergroup id"},
{"no countergroup id"},
{"invalid operation"},
{"counter is not available"},
{"countegroup error state"},
{"countegroup is not completed"}};
Gfx8PerfCounter::Gfx8PerfCounter() {
// Initialize the number of shader engines
num_se_ = 4;
@@ -33,8 +25,6 @@ Gfx8PerfCounter::Gfx8PerfCounter() {
}
void Gfx8PerfCounter::Init() {
error_code_ = 0;
// Initialize the value to use in resetting GRBM
regGRBM_GFX_INDEX grbm_gfx_index;
grbm_gfx_index.u32All = 0;
@@ -49,6 +39,10 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
// Reset Grbm to its default state - broadcast
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, reset_grbm_);
// Reset the counter list
regCP_PERFMON_CNTL cp_perfmon_cntl = {0};
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All);
// Iterate through the list of blocks to generate Pm4 commands to
// program corresponding perf counters of each block
for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end();
@@ -60,7 +54,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
// Iterate through each enabled perf counter and building
// corresponding Pm4 commands to program the various control
// registers involved
for (uint32_t ind = 0; ind < counter_count; ++ind) {
const uint32_t counter_id = counters[ind];
@@ -89,9 +82,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
cp_perfcount_enable.u32All);
// Reset the counter list
regCP_PERFMON_CNTL cp_perfmon_cntl;
cp_perfmon_cntl.u32All = 0;
cp_perfmon_cntl.bits.PERFMON_STATE = 0;
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All);
// Start the counter list
@@ -148,16 +138,6 @@ uint32_t Gfx8PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
return total_counter_num * sizeof(uint32_t);
}
int Gfx8PerfCounter::getLastError() { return error_code_; }
std::string Gfx8PerfCounter::getErrorString(int error) {
if ((error >= 0) && (error < kErrorCodeMax)) {
std::string err_string(errorString[error]);
return err_string;
}
return string("Error input code!");
}
uint32_t Gfx8PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal,
uint32_t blkId, uint32_t blkCntrIdx) {
regGRBM_GFX_INDEX grbm_gfx_index;
@@ -11,7 +11,7 @@ class CommandWriter;
// This class implement the VI PMU. It is responsible for setting up
// CounterGroups to represent each VI hardware block which exposes performance
// counters.
class Gfx8PerfCounter : public pm4_profile::Pmu {
class Gfx8PerfCounter : public pm4_profile::PerfCounter {
public:
Gfx8PerfCounter();
@@ -19,10 +19,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu {
// for the blocks featured shader engines instancing
uint32_t getNumSe() { return num_se_; }
int getLastError();
std::string getErrorString(int error);
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
@@ -64,8 +60,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu {
uint32_t* reg_val);
private:
int error_code_;
// Indicates the number of Shader Engines Present
uint32_t num_se_;
@@ -56,7 +56,7 @@ GpuBlockInfo Gfx9HwBlocks[] = {
AI_COUNTER_NUM_PER_SPI, 0, 0, true, 0, 0, false, 0, 0},
// Counter block SQ
{"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
{"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 171,
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
{"AI_SQ_GS", kHsaAiCounterBlockIdSqGs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
@@ -251,10 +251,8 @@ GpuBlockInfo Gfx9HwBlocks[] = {
// Counter block CPC
// Temp commented for Vega10
/*
{"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 24,
AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
*/
{"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 34,
AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
// Counter block IOMMUV2
{"AI_IOMMUV2", kHsaAiCounterBlockIdIommuV2, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 25,
@@ -192,8 +192,7 @@ typedef enum HsaAiCounterBlockId {
// Temp commented out for Vega10
// kHsaAiCounterBlockIdCpg,
// Temp commented out for Vega10
// kHsaAiCounterBlockIdCpc,
kHsaAiCounterBlockIdCpc,
// Counters retrieved by KFD
kHsaAiCounterBlockIdIommuV2,
@@ -233,12 +232,9 @@ extern GpuCounterRegInfo AiVgtCounterRegAddr[];
extern GpuCounterRegInfo AiIaCounterRegAddr[];
extern GpuCounterRegInfo AiMcCounterRegAddr[];
extern GpuCounterRegInfo AiSrbmCounterRegAddr[];
// No Tcs Counter block on AI
// extern GpuCounterRegInfo AiTcsCounterRegAddr[];
extern GpuCounterRegInfo AiWdCounterRegAddr[];
extern GpuCounterRegInfo AiCpgCounterRegAddr[];
extern GpuCounterRegInfo AiCpcCounterRegAddr[];
extern GpuPrivCounterBlockId AiBlockIdSq;
extern GpuPrivCounterBlockId AiBlockIdMc;
@@ -19,14 +19,6 @@ using namespace pm4_profile::gfx9;
namespace pm4_profile {
static char errorString[][64] = {{"No error"},
{"unknow countergroup id"},
{"no countergroup id"},
{"invalid operation"},
{"counter is not available"},
{"countegroup error state"},
{"countegroup is not completed"}};
Gfx9PerfCounter::Gfx9PerfCounter() {
// Initialize the number of shader engines
num_se_ = 4;
@@ -34,8 +26,6 @@ Gfx9PerfCounter::Gfx9PerfCounter() {
}
void Gfx9PerfCounter::Init() {
error_code_ = 0;
// Initialize the value to use in resetting GRBM
regGRBM_GFX_INDEX grbm_gfx_index;
grbm_gfx_index.u32All = 0;
@@ -54,6 +44,10 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
// On Vega this is needed to collect Perf Cntrs
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL, 1);
// Reset the counter list
regCP_PERFMON_CNTL cp_perfmon_cntl = {0};
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All);
// Iterate through the list of blocks to generate Pm4 commands to
// program corresponding perf counters of each block
for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end();
@@ -65,7 +59,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
// Iterate through each enabled perf counter and building
// corresponding Pm4 commands to program the various control
// registers involved
for (uint32_t ind = 0; ind < counter_count; ++ind) {
const uint32_t counter_id = counters[ind];
@@ -93,9 +86,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
cmdWriter->BuildWriteShRegPacket(cmdBuff, mmCOMPUTE_PERFCOUNT_ENABLE, cp_perfcount_enable.u32All);
// Reset the counter list
regCP_PERFMON_CNTL cp_perfmon_cntl;
cp_perfmon_cntl.u32All = 0;
cp_perfmon_cntl.bits.PERFMON_STATE = 0;
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All);
// Start the counter list
@@ -156,16 +146,6 @@ uint32_t Gfx9PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
return total_counter_num * sizeof(uint32_t);
}
int Gfx9PerfCounter::getLastError() { return error_code_; }
std::string Gfx9PerfCounter::getErrorString(int error) {
if ((error >= 0) && (error < kErrorCodeMax)) {
std::string err_string(errorString[error]);
return err_string;
}
return string("Error input code!");
}
uint32_t Gfx9PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal,
uint32_t blkId, uint32_t blkCntrIdx) {
regGRBM_GFX_INDEX grbm_gfx_index;
@@ -737,17 +717,15 @@ uint32_t Gfx9PerfCounter::BuildCounterSelRegister(uint32_t cntrIdx, uint32_t* re
*/
// Temp commented for Vega10
/*
case kHsaAiCounterBlockIdCpc: {
regCPC_PERFCOUNTER0_SELECT cpc_perf_counter_select;
cpc_perf_counter_select.u32All = 0;
cpc_perf_counter_select.bits.PERF_SEL = blkCntrIdx;
cpc_perf_counter_select.bits.CNTR_SEL0 = blkCntrIdx;
regVal[0] = cpc_perf_counter_select.u32All;
regAddr[0] = AiCpcCounterRegAddr[cntrIdx].counterSelRegAddr;
regIdx = 1;
break;
}
*/
/*
case kHsaAiCounterBlockIdMc: {
@@ -1316,7 +1294,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t
*/
// Temp commented for Vega10
/*
case kHsaAiCounterBlockIdCpc: {
reg_addr[reg_num] = mmGRBM_GFX_INDEX;
reg_val[reg_num] = reset_grbm_;
@@ -1331,7 +1308,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t
reg_num++;
break;
}
*/
// IommuV2, MC, kernel driver counters are retrieved via
// KFD implementation
@@ -11,23 +11,17 @@ class CommandWriter;
// This class implement the AI PMU. It is responsible for setting up
// CounterGroups to represent each AI hardware block which exposes performance
// counters.
class Gfx9PerfCounter : public pm4_profile::Pmu {
class Gfx9PerfCounter : public pm4_profile::PerfCounter {
public:
Gfx9PerfCounter();
// Returns number of shader engines per block
// for the blocks featured shader engines instancing
uint32_t getNumSe() { return num_se_; }
int getLastError();
std::string getErrorString(int error);
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
void* dataBuff);
uint32_t getNumSe() { return num_se_; }
private:
void Init();
@@ -64,8 +58,6 @@ class Gfx9PerfCounter : public pm4_profile::Pmu {
uint32_t* reg_val);
private:
int error_code_;
// Indicates the number of Shader Engines Present
uint32_t num_se_;
+35
Просмотреть файл
@@ -0,0 +1,35 @@
#ifndef _HSA_PERF_H_
#define _HSA_PERF_H_
#include <stdint.h>
#include <vector>
#include <map>
#include <string>
namespace pm4_profile {
class DefaultCmdBuf;
class CommandWriter;
typedef std::vector<uint32_t> CountersVec;
typedef std::map<uint32_t, CountersVec> CountersMap;
class PerfCounter {
public:
virtual ~PerfCounter() {}
// Generate start profiling commands.
virtual void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
const CountersMap& countersMap) = 0;
// Generate stop profiling commands.
// Return actual required data buffer size.
virtual uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
const CountersMap& countersMap, void* dataBuff) = 0;
// Returns number of shader engines per block
// for the blocks featured shader engines instancing
virtual uint32_t getNumSe() = 0;
};
} // namespace pm4_profile
#endif // _HSA_PERF_H_
@@ -28,7 +28,6 @@ include_directories ( ${TEST_DIR}/${TEST_NAME} )
set ( LIB_NAME "${TEST_NAME}${ONLY64STR}" )
add_library ( ${LIB_NAME} STATIC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
target_link_libraries( ${LIB_NAME} c stdc++ )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
set ( TEST_LIBS ${LIB_NAME} )
#
@@ -37,7 +36,13 @@ set ( TEST_LIBS ${LIB_NAME} )
set ( SRC_LIST ${TEST_DIR}/ctrl/test.cpp )
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_pmgr.cpp )
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_hsa.cpp )
set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} ${TARGET_LIB} )
set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} )
set ( EXE_NAME "ctrl" )
add_executable ( ${EXE_NAME} ${SRC_LIST} )
target_link_libraries( ${EXE_NAME} ${LIB_LIST} c stdc++ dl pthread rt atomic )
#
# Copy the test files
#
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
@@ -30,14 +30,14 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#include "hsa.h"
#include "hsa_rsrc_factory.h"
#include "hsa_ext_amd_aql_profile.h"
#include "hsa_ven_amd_aqlprofile.h"
// Test AQL interface
class TestAql {
TestAql* const test_aql;
public:
TestAql(TestAql* t = 0) : test_aql(t) {}
explicit TestAql(TestAql* t = 0) : test_aql(t) {}
virtual ~TestAql() {}
TestAql* testAql() { return test_aql; }
@@ -6,7 +6,7 @@
if (!(cond)) { \
std::cout << "ASSERT FAILED(" << #cond << ") at \"" << __FILE__ << "\" line " << __LINE__ \
<< std::endl; \
abort(); \
exit(-1); \
} \
}
@@ -36,7 +36,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
class TestHSA : public TestAql {
public:
// Constructor
TestHSA(TestKernel* test) : test_(test), name_(test->Name()) {
explicit TestHSA(TestKernel* test) : test_(test), name_(test->Name()) {
total_time_taken_ = 0;
setup_time_taken_ = 0;
dispatch_time_taken_ = 0;
@@ -29,18 +29,17 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#define _TEST_PGEN_H_
#include "test_pmgr.h"
#include "hsa_ext_amd_aql_profile.h"
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
class TestPGen : public TestPMgr {
protected:
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
protected:
packet_t* PrePacket() { return reinterpret_cast<packet_t*>(&prePacket); }
packet_t* PostPacket() { return reinterpret_cast<packet_t*>(&postPacket); }
public:
TestPGen(TestAql* t) : TestPMgr(t) {}
explicit TestPGen(TestAql* t) : TestPMgr(t) {}
};
#endif // _TEST_PGEN_H_
@@ -31,11 +31,13 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#include "test_assert.h"
#include "test_pgen.h"
hsa_status_t TestPGenPMC_Callback(hsa_ext_amd_aql_profile_info_type_t info_type,
hsa_ext_amd_aql_profile_info_data_t* info_data,
#include <vector>
hsa_status_t TestPGenPMC_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> passed_data_t;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
return status;
}
@@ -45,29 +47,30 @@ class TestPGenPMC : public TestPGen {
const static uint32_t buffer_alignment = 0x1000; // 4K
hsa_agent_t agent;
hsa_ext_amd_aql_profile_profile_t profile;
hsa_ext_amd_aql_profile_event_t events[2];
hsa_ven_amd_aqlprofile_profile_t profile;
hsa_ven_amd_aqlprofile_event_t* events;
bool buildPackets() { return true; }
bool dumpData() {
std::cout << "TestPGenPMC::dumpData :" << std::endl;
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> callback_data_t;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
callback_data_t data;
hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenPMC_Callback, &data);
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenPMC_Callback, &data);
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
std::cout << "> sample(" << dec << it->sample_id << ") block("
<< it->pmc_data.event.block_name << "_" << it->pmc_data.event.block_index
<< ") result(" << hex << it->pmc_data.result << ")" << std::endl;
std::cout << dec << "event( block(" << it->pmc_data.event.block_name << "_"
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
<< std::endl;
}
return true;
}
public:
TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; }
explicit TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; }
bool initialize(int arg_cnt, char** arg_list) {
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
@@ -85,31 +88,43 @@ class TestPGenPMC : public TestPGen {
// Instantiation of the profile object
// //////////////////////////////////////////////////////////////
// Set the event fields
events[0].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ;
events[0].block_index = 0;
events[0].counter_id = 0x4; // SQ_SQ_PERF_SEL_WAVES
events[1].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ;
events[1].block_index = 0;
events[1].counter_id = 0xe; // SQ_SQ_PERF_SEL_ITEMS
const hsa_ven_amd_aqlprofile_event_t events_arr[] = {
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 14 /*ITEMS*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 47 /*WAVE_READY*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 1 /*CYCLE*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 3 /*REQ*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 22 /*WRITEBACK*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS_COUNT*/},
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 8 /*ME1_STALL_WAIT_ON_RCIU_READ*/},
};
const size_t event_count = sizeof(events_arr) / sizeof(hsa_ven_amd_aqlprofile_event_t);
events = new hsa_ven_amd_aqlprofile_event_t[event_count];
memcpy(events, events_arr, sizeof(events_arr));
// Initialization the profile
memset(&profile, 0, sizeof(profile));
profile.agent = agent;
profile.type = HSA_EXT_AQL_PROFILE_EVENT_PMC;
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
// set enabled events list
profile.events = events;
profile.event_count = 2;
profile.event_count = event_count;
// Profile buffers attributes
command_buffer_alignment = buffer_alignment;
status = hsa_ext_amd_aql_profile_get_info(
&profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
status = api.hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
if (status != HSA_STATUS_SUCCESS) {
const char* str = "";
api.hsa_ven_amd_aqlprofile_error_string(&str);
std::cout << "aqlprofile err: " << str << std::endl;
}
test_assert(status == HSA_STATUS_SUCCESS);
output_buffer_alignment = buffer_alignment;
status = hsa_ext_amd_aql_profile_get_info(&profile, HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE,
&output_buffer_size);
status = api.hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE, &output_buffer_size);
test_assert(status == HSA_STATUS_SUCCESS);
// Application is allocating the command buffer
@@ -128,12 +143,17 @@ class TestPGenPMC : public TestPGen {
memset(profile.output_buffer.ptr, 0x77, output_buffer_size);
// Populating the AQL start packet
status = hsa_ext_amd_aql_profile_start(&profile, PrePacket());
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
if (status != HSA_STATUS_SUCCESS) {
const char* str;
api.hsa_ven_amd_aqlprofile_error_string(&str);
std::cout << "aqlprofile err: " << str << std::endl;
}
test_assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return false;
// Populating the AQL stop packet
status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket());
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
test_assert(status == HSA_STATUS_SUCCESS);
return (status == HSA_STATUS_SUCCESS);
@@ -31,15 +31,16 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include "test_assert.h"
#include "test_pgen.h"
hsa_status_t TestPGenSQTT_Callback(hsa_ext_amd_aql_profile_info_type_t info_type,
hsa_ext_amd_aql_profile_info_data_t* info_data,
hsa_status_t TestPGenSQTT_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> passed_data_t;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
return status;
}
@@ -50,17 +51,17 @@ class TestPGenSQTT : public TestPGen {
const static uint32_t buffer_size = 0x2000000; // 32M
hsa_agent_t agent;
hsa_ext_amd_aql_profile_profile_t profile;
hsa_ven_amd_aqlprofile_profile_t profile;
bool buildPackets() { return true; }
bool dumpData() {
std::cout << "TestPGenSQTT::dumpData :" << std::endl;
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> callback_data_t;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
callback_data_t data;
hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenSQTT_Callback, &data);
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenSQTT_Callback, &data);
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
std::cout << "> sample(" << dec << it->sample_id << ") ptr(" << hex << it->sqtt_data.ptr
<< ") size(" << dec << it->sqtt_data.size << ")" << std::endl;
@@ -93,7 +94,7 @@ class TestPGenSQTT : public TestPGen {
}
public:
TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; }
explicit TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; }
bool initialize(int arg_cnt, char** arg_list) {
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
@@ -116,7 +117,7 @@ class TestPGenSQTT : public TestPGen {
// Initialization the profile
memset(&profile, 0, sizeof(profile));
profile.agent = agent;
profile.type = HSA_EXT_AQL_PROFILE_EVENT_SQTT;
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
// set parameters
// profile.parameters = &event;
@@ -124,8 +125,8 @@ class TestPGenSQTT : public TestPGen {
// Profile buffers attributes
command_buffer_alignment = buffer_alignment;
status = hsa_ext_amd_aql_profile_get_info(
&profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
status = api.hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
test_assert(status == HSA_STATUS_SUCCESS);
output_buffer_alignment = buffer_alignment;
@@ -146,12 +147,12 @@ class TestPGenSQTT : public TestPGen {
profile.output_buffer.size = output_buffer_size;
// Populating the AQL start packet
status = hsa_ext_amd_aql_profile_start(&profile, PrePacket());
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
test_assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return false;
// Populating the AQL stop packet
status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket());
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
test_assert(status == HSA_STATUS_SUCCESS);
return (status == HSA_STATUS_SUCCESS);
@@ -61,7 +61,7 @@ bool TestPMgr::addPacketGfx8(const packet_t* packet) {
// Create legacy devices PM4 data
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
slot_pm4_s data;
hsa_ext_amd_aql_profile_legacy_get_pm4(aql_packet, (void*)data.words);
api.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
// Compute the write index of queue and copy Aql packet into it
uint64_t que_idx = hsa_queue_load_write_index_relaxed(getQueue());
@@ -122,4 +122,9 @@ bool TestPMgr::initialize(int argc, char** argv) {
TestPMgr::TestPMgr(TestAql* t) : TestAql(t) {
dummySignal.handle = 0;
postSignal = dummySignal;
hsa_status_t status = hsa_init();
test_assert(status == HSA_STATUS_SUCCESS);
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api);
test_assert(status == HSA_STATUS_SUCCESS);
}
@@ -25,18 +25,20 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef _TEST_SMGR_H_
#define _TEST_SMGR_H_
#ifndef _TEST_PMGR_H_
#define _TEST_PMGR_H_
#include <atomic>
#include "hsa.h"
#include "test_aql.h"
#include "hsa_ven_amd_aqlprofile.h"
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
class TestPMgr : public TestAql {
public:
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
TestPMgr(TestAql* t);
explicit TestPMgr(TestAql* t);
bool run();
protected:
@@ -45,14 +47,16 @@ class TestPMgr : public TestAql {
hsa_signal_t dummySignal;
hsa_signal_t postSignal;
hsa_ven_amd_aqlprofile_1_00_pfn_t api;
virtual bool buildPackets() { return false; }
virtual bool dumpData() { return false; }
virtual bool initialize(int argc, char** argv);
private:
enum {
SLOT_PM4_SIZE_DW = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
SLOT_PM4_SIZE_AQLP = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
};
struct slot_pm4_s {
uint32_t words[SLOT_PM4_SIZE_DW];
@@ -64,4 +68,4 @@ class TestPMgr : public TestAql {
bool addPacketGfx9(const packet_t* packet);
};
#endif // _TEST_SMGR_H_
#endif // _TEST_PMGR_H_
Исполняемый файл
+30
Просмотреть файл
@@ -0,0 +1,30 @@
#/bin/sh
set -x
tbin=./test/ctrl
CDIR=`pwd`
export LD_LIBRARY_PATH=$CDIR
export HSA_ENABLE_SDMA=0
export HSA_EMULATE_AQL=1
echo
echo "Run simple convolution kernel"
unset ROCR_ENABLE_PMC
unset ROCR_ENABLE_SQTT
eval $tbin
echo
echo "Run with PMC"
export ROCR_ENABLE_PMC=1
unset ROCR_ENABLE_SQTT
eval $tbin
echo
echo "Run with SQTT"
unset ROCR_ENABLE_PMC
export ROCR_ENABLE_SQTT=1
eval $tbin

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше