hsa-runtime integration
Change-Id: I48968966ffe164218ebff88d0e3a1268e96bf1dd
Этот коммит содержится в:
коммит произвёл
Evgeny Shcherbakov
родитель
c533229bc1
Коммит
4174f07fd1
+3
-3
@@ -25,6 +25,6 @@ add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
#
|
||||
# Style format
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} ${API_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
+10
-5
@@ -6,9 +6,9 @@ Current library implementation supports only GFX9.
|
||||
The library source tree:
|
||||
- doc - Documantation, the API specification and the presentation
|
||||
- inc - Public API
|
||||
- hsa_ext_amd_aql_profile.h - AMD AQL profile library public API
|
||||
- hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API
|
||||
- src - AMD AQL profile library sources
|
||||
- aqlprofile - AMD AQL profile library
|
||||
- core - the library sources
|
||||
- commandwriter - PM4 command writer originated from 'hsa-runtime/tools'
|
||||
- perfcounter - PM4 perfcounter manager originated from 'hsa-runtime/tools'
|
||||
- threadtrace - PM4 threadtrace manager originated from 'hsa-runtime/tools'
|
||||
@@ -19,7 +19,7 @@ The library source tree:
|
||||
|
||||
To build the library:
|
||||
|
||||
$ cd .../hsa-ext-aql-profile
|
||||
$ cd .../hsa-amd-aqlprofile
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ..
|
||||
@@ -27,8 +27,9 @@ $ make
|
||||
|
||||
To run the test:
|
||||
|
||||
$ cd .../hsa-ext-aql-profile/build
|
||||
$ test/ctrl
|
||||
$ cd .../hsa-amd-aqlprofile/build
|
||||
$ export LD_LIBRARY_PATH=$PWD
|
||||
$ ./test/ctrl
|
||||
|
||||
To enable PMC profiling:
|
||||
|
||||
@@ -37,3 +38,7 @@ $ export ROCR_ENABLE_PMC=1
|
||||
To enable SQTT profiling:
|
||||
|
||||
$ export ROCR_ENABLE_SQTT=1
|
||||
|
||||
Or to use the script:
|
||||
|
||||
$ ./run.sh
|
||||
Двоичные данные
Двоичный файл не отображается.
Двоичные данные
Двоичный файл не отображается.
+9
-5
@@ -20,10 +20,9 @@ if ( NOT DEFINED PROJ_DIR )
|
||||
set ( ROOT_DIR ${PROJ_DIR}/.. )
|
||||
endif ()
|
||||
|
||||
set ( API_DIR ${ROOT_DIR}/inc )
|
||||
set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../.. )
|
||||
set ( HSA_RUNTIME_OSC_DIR ${HSA_RUNTIME_DIR}/opensrc/hsa-runtime )
|
||||
set ( CORE_UTIL_DIR ${HSA_RUNTIME_OSC_DIR}/core/util )
|
||||
set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../hsa-runtime )
|
||||
set ( API_DIR ${HSA_RUNTIME_DIR}/inc )
|
||||
set ( CORE_UTIL_DIR ${HSA_RUNTIME_DIR}/core/util )
|
||||
|
||||
include_directories ( ${ROOT_DIR} )
|
||||
|
||||
@@ -63,4 +62,9 @@ add_subdirectory ( ${PROJ_DIR}/perfcounter "${PROJECT_BINARY_DIR}/perfcounter" )
|
||||
# libraries that have been built in this regard
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}${ONLY64STR}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/${TARGET_NAME} "${PROJECT_BINARY_DIR}/${TARGET_NAME}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/core "${PROJECT_BINARY_DIR}/core" )
|
||||
|
||||
#
|
||||
# Creating the library link
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "/bin/ln -s core/lib${TARGET_LIB}.so libhsa-amd-${TARGET_LIB}.so.1" )
|
||||
+177
-115
@@ -1,6 +1,9 @@
|
||||
#include <string>
|
||||
|
||||
#include "aql_profile.h"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "pm4_factory.h"
|
||||
#include "cmdwriter.h" // commandwriter
|
||||
#include "perf_counter.h" // perfcounter
|
||||
@@ -9,6 +12,14 @@
|
||||
#include "logger.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
@@ -31,7 +42,7 @@ class CommandBufferMgr {
|
||||
uint32_t align(const uint32_t& size) { return (size + align_mask) & ~align_mask; }
|
||||
|
||||
public:
|
||||
CommandBufferMgr(const profile_t* profile)
|
||||
explicit CommandBufferMgr(const profile_t* profile)
|
||||
: buffer(profile->command_buffer), postfix_size(0), info(NULL) {
|
||||
info = (info_t*)setPostfix(sizeof(info_t));
|
||||
}
|
||||
@@ -44,12 +55,16 @@ class CommandBufferMgr {
|
||||
postfix_size = size;
|
||||
buffer.size -= (delta < buffer.size) ? delta : buffer.size;
|
||||
}
|
||||
if (buffer.size == 0)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::setPostfix(): buffer size set to zero");
|
||||
return (buffer.size != 0) ? buffer.ptr + buffer.size : NULL;
|
||||
}
|
||||
|
||||
bool setPreSize(const uint32_t& size) {
|
||||
bool suc = (size <= buffer.size);
|
||||
if (suc) info->precmds_size = size;
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::setPreSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
@@ -62,6 +77,8 @@ class CommandBufferMgr {
|
||||
info->postcmds_size = size - info->precmds_size;
|
||||
suc = ((getPostOffset() + info->postcmds_size) <= buffer.size);
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::checkTotalSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
@@ -80,19 +97,55 @@ class CommandBufferMgr {
|
||||
}
|
||||
};
|
||||
|
||||
static inline pm4_profile::CountersMap CountersMapCreate(const profile_t* profile,
|
||||
const Pm4Factory* pm4_factory) {
|
||||
pm4_profile::CountersMap countersMap;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id);
|
||||
}
|
||||
return countersMap;
|
||||
}
|
||||
|
||||
typedef std::vector<const event_t*> EventsVec;
|
||||
static inline EventsVec EventsVecCreate(const profile_t* profile, const Pm4Factory* pm4_factory) {
|
||||
pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
|
||||
|
||||
std::map<uint32_t, const event_t*> id_map;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
id_map.insert(decltype(id_map)::value_type(pm4_factory->getBlockId(p), p));
|
||||
}
|
||||
|
||||
// Iterate through the list of blocks/counters to generate correct order events vector
|
||||
EventsVec eventsVec;
|
||||
for (pm4_profile::CountersMap::const_iterator block_it = countersMap.begin();
|
||||
block_it != countersMap.end(); ++block_it) {
|
||||
const uint32_t block_id = block_it->first;
|
||||
const pm4_profile::CountersVec& counters = block_it->second;
|
||||
const uint32_t counter_count = counters.size();
|
||||
|
||||
for (uint32_t ind = 0; ind < counter_count; ++ind) {
|
||||
eventsVec.push_back(id_map[block_id] + ind);
|
||||
}
|
||||
}
|
||||
|
||||
return eventsVec;
|
||||
}
|
||||
|
||||
static inline bool is_event_match(const event_t& event1, const event_t& event2) {
|
||||
return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) &&
|
||||
(event1.counter_id == event2.counter_id);
|
||||
}
|
||||
|
||||
hsa_status_t default_pmcdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type,
|
||||
hsa_ext_amd_aql_profile_info_data_t* info_data,
|
||||
hsa_status_t default_pmcdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ext_amd_aql_profile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(callback_data);
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_EXT_AQL_PROFILE_INFO_PMC_DATA) {
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
if (is_event_match(info_data->pmc_data.event, passed_data->pmc_data.event)) {
|
||||
if (passed_data->sample_id == UINT32_MAX) {
|
||||
passed_data->pmc_data.result += info_data->pmc_data.result;
|
||||
@@ -112,14 +165,14 @@ struct sqtt_ctrl_t {
|
||||
uint32_t writePtr;
|
||||
};
|
||||
|
||||
hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_type,
|
||||
hsa_ext_amd_aql_profile_info_data_t* info_data,
|
||||
hsa_status_t default_sqttdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ext_amd_aql_profile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(callback_data);
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA) {
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
|
||||
if (info_data->sample_id == passed_data->sample_id) {
|
||||
passed_data->sqtt_data = info_data->sqtt_data;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
@@ -129,83 +182,98 @@ hsa_status_t default_sqttdata_callback(hsa_ext_amd_aql_profile_info_type_t info_
|
||||
return status;
|
||||
}
|
||||
|
||||
Pm4Factory::tables_t Pm4Factory::tables;
|
||||
std::mutex Logger::mutex;
|
||||
Logger* Logger::instance = NULL;
|
||||
std::mutex Pm4Factory::mutex;
|
||||
Pm4Factory::instances_t Pm4Factory::instances;
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
Logger::Destroy();
|
||||
Pm4Factory::Destroy();
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_validate_event(
|
||||
hsa_agent_t agent, const hsa_ext_amd_aql_profile_event_t* event, bool* result) {
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) {
|
||||
*str = aql_profile::Logger::LastMessage().c_str();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
|
||||
const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_start_packet) {
|
||||
aql_profile::Logger logger;
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->getBlockInfo(event) != NULL) *result = true;
|
||||
} catch (aql_profile::event_exception& e) {
|
||||
INFO_LOGGING << e.what();
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_start_packet) {
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
|
||||
if (cmdWriter == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
pm4_profile::DefaultCmdBuf commands;
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
|
||||
|
||||
if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) {
|
||||
pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr();
|
||||
if (pmcMgr == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
pm4_profile::CountersMap countersMap;
|
||||
for (const hsa_ext_amd_aql_profile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id);
|
||||
}
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
|
||||
|
||||
// Generate start commands
|
||||
const pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
|
||||
pmcMgr->begin(&commands, cmdWriter, countersMap);
|
||||
cmdBufMgr.setPreSize(commands.Size());
|
||||
|
||||
// Generate stop commands
|
||||
const uint32_t data_size =
|
||||
pmcMgr->end(&commands, cmdWriter, countersMap, profile->output_buffer.ptr);
|
||||
if (data_size == 0) return HSA_STATUS_ERROR;
|
||||
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC mgr end(): data size set to zero");
|
||||
assert(data_size <= profile->output_buffer.size);
|
||||
if (data_size > profile->output_buffer.size) return HSA_STATUS_ERROR;
|
||||
|
||||
} else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) {
|
||||
if (data_size > profile->output_buffer.size) {
|
||||
ERR_LOGGING << "data size assertion failed, data_size(" << data_size << "), buffer size("
|
||||
<< profile->output_buffer.size << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
|
||||
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
|
||||
if (sqttMgr == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
pm4_profile::ThreadTraceConfig sqtt_config;
|
||||
sqttMgr->InitThreadTraceConfig(&sqtt_config);
|
||||
if (profile->parameters) {
|
||||
for (const hsa_ext_amd_aql_profile_parameters_t* p = profile->parameters;
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
switch (p->parameter_name) {
|
||||
case HSA_EXT_AQL_PROFILE_PARAM_COMPUTE_UNIT_TARGET:
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
sqtt_config.threadTraceTargetCu = p->value;
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_PARAM_VM_ID_MASK:
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
sqtt_config.threadTraceVmIdMask = p->value;
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_PARAM_MASK:
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
sqtt_config.threadTraceMask = p->value;
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK:
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
sqtt_config.threadTraceTokenMask = p->value;
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_PARAM_TOKEN_MASK2:
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
sqtt_config.threadTraceTokenMask2 = p->value;
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING(logger) << "Bad SQTT parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
ERR_LOGGING << "Bad SQTT parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -213,10 +281,9 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
|
||||
|
||||
sqttMgr->setSqttDataBuff((uint8_t*)profile->output_buffer.ptr, profile->output_buffer.size);
|
||||
|
||||
// Control buffer registering
|
||||
const uint32_t status_size = sqttMgr->StatusSizeInfo();
|
||||
void* status_ptr = cmdBufMgr.setPostfix(status_size);
|
||||
if (status_ptr == NULL) return HSA_STATUS_ERROR;
|
||||
// Control buffer registering
|
||||
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
|
||||
|
||||
// Generate start commands
|
||||
@@ -224,10 +291,12 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
|
||||
cmdBufMgr.setPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
sqttMgr->StopSession(&commands, cmdWriter);
|
||||
} else
|
||||
return HSA_STATUS_ERROR;
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (!cmdBufMgr.checkTotalSize(commands.Size())) return HSA_STATUS_ERROR;
|
||||
cmdBufMgr.checkTotalSize(commands.Size());
|
||||
|
||||
const aql_profile::descriptor_t pre_descr = cmdBufMgr.getPreDescr();
|
||||
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
|
||||
@@ -236,7 +305,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
|
||||
// Populate start aql packet
|
||||
aql_profile::populateAql(pre_descr.ptr, pre_descr.size, cmdWriter, aql_start_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING(logger) << e.what();
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -244,25 +313,18 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_start(
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop(
|
||||
const hsa_ext_amd_aql_profile_profile_t* profile, aql_profile::packet_t* aql_stop_packet) {
|
||||
aql_profile::Logger logger;
|
||||
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_stop_packet) {
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
|
||||
if (cmdWriter == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
|
||||
|
||||
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
|
||||
// Populate stop aql packet
|
||||
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
|
||||
aql_profile::populateAql(post_descr.ptr, post_descr.size, cmdWriter, aql_stop_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING(logger) << e.what();
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -271,9 +333,7 @@ PUBLIC_API hsa_status_t hsa_ext_amd_aql_profile_stop(
|
||||
|
||||
// Legacy devices, converting of the profiling AQL packet to PM4 packet blob
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
|
||||
aql_profile::Logger logger;
|
||||
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
|
||||
try {
|
||||
// Populate GFX8 pm4 packet blob
|
||||
// Adding HSA barrier acquire packet
|
||||
@@ -283,7 +343,7 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet,
|
||||
// Adding HSA barrier release packet
|
||||
data = aql_profile::legacyAqlRelease(aql_packet, data);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING(logger) << e.what();
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -292,34 +352,33 @@ hsa_ext_amd_aql_profile_legacy_get_pm4(const aql_profile::packet_t* aql_packet,
|
||||
|
||||
// Method for getting the profile info
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profile,
|
||||
hsa_ext_amd_aql_profile_info_type_t attribute, void* value) {
|
||||
hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
aql_profile::Logger logger;
|
||||
|
||||
try {
|
||||
switch (attribute) {
|
||||
case HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE:
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE:
|
||||
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE:
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE:
|
||||
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_INFO_PMC_DATA:
|
||||
reinterpret_cast<hsa_ext_amd_aql_profile_info_data_t*>(value)->pmc_data.result = 0;
|
||||
status = hsa_ext_amd_aql_profile_iterate_data(profile,
|
||||
aql_profile::default_pmcdata_callback, value);
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA:
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(value)->pmc_data.result = 0;
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::default_pmcdata_callback,
|
||||
value);
|
||||
break;
|
||||
case HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA:
|
||||
status = hsa_ext_amd_aql_profile_iterate_data(
|
||||
profile, aql_profile::default_sqttdata_callback, value);
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA:
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile,
|
||||
aql_profile::default_sqttdata_callback, value);
|
||||
break;
|
||||
default:
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
ERR_LOGGING(logger) << "Invalid attribute (" << attribute << ")";
|
||||
ERR_LOGGING << "Invalid attribute (" << attribute << ")";
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING(logger) << e.what();
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -328,29 +387,26 @@ hsa_ext_amd_aql_profile_get_info(const hsa_ext_amd_aql_profile_profile_t* profil
|
||||
|
||||
// Method for iterating the events output data
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* profile,
|
||||
hsa_ext_amd_aql_profile_data_callback_t callback, void* data) {
|
||||
hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
aql_profile::Logger logger;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
if (pm4_factory == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_PMC) {
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
uint32_t info_size = 0;
|
||||
void* info_data;
|
||||
uint64_t* samples = (uint64_t*)profile->output_buffer.ptr;
|
||||
const uint32_t sample_count = profile->output_buffer.size / sizeof(uint64_t);
|
||||
uint32_t sample_index = 0;
|
||||
|
||||
pm4_profile::Pmu* pmcMgr = pm4_factory->getPmcMgr();
|
||||
if (pmcMgr == NULL) return HSA_STATUS_ERROR;
|
||||
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
|
||||
|
||||
for (const hsa_ext_amd_aql_profile_event_t* p = profile->events;
|
||||
p < (profile->events + profile->event_count); ++p) {
|
||||
const pm4_profile::GpuBlockInfo* block_info = pm4_factory->getBlockInfo(p);
|
||||
if (block_info == NULL) return HSA_STATUS_ERROR;
|
||||
aql_profile::EventsVec eventsVec = EventsVecCreate(profile, pm4_factory);
|
||||
for (aql_profile::EventsVec::const_iterator it = eventsVec.begin(); it != eventsVec.end();
|
||||
++it) {
|
||||
const hsa_ven_amd_aqlprofile_event_t* p = *it;
|
||||
const pm4_profile::CntlMethod method = pm4_factory->getBlockInfo(p)->method;
|
||||
// A perfcounter data sample per ShaderEngine
|
||||
const uint32_t block_samples_count = (method == pm4_profile::CntlMethodBySe ||
|
||||
@@ -359,46 +415,52 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr
|
||||
: 1;
|
||||
for (uint32_t i = 0; i < block_samples_count; ++i) {
|
||||
assert(sample_index < sample_count);
|
||||
if (sample_index >= sample_count) return HSA_STATUS_ERROR;
|
||||
if (sample_index >= sample_count) {
|
||||
ERR_LOGGING << "Bad sample index (" << sample_index << "/" << sample_count << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_ext_amd_aql_profile_info_data_t sample_info;
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.pmc_data.event = *p;
|
||||
sample_info.pmc_data.result = samples[sample_index];
|
||||
status = callback(HSA_EXT_AQL_PROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING(logger) << "PMC data callback error, sample_id(" << i << ") status("
|
||||
<< status << ")";
|
||||
ERR_LOGGING << "PMC data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
++sample_index;
|
||||
}
|
||||
}
|
||||
} else if (profile->type == HSA_EXT_AQL_PROFILE_EVENT_SQTT) {
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
|
||||
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
|
||||
if (sqttMgr == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
if (cmdBufMgr.getSize() == 0) return HSA_STATUS_ERROR;
|
||||
|
||||
const uint32_t status_size = sqttMgr->StatusSizeInfo();
|
||||
// Control buffer was allocated as the CmdBuffer postfix partition
|
||||
const uint32_t status_size = sqttMgr->StatusSizeInfo();
|
||||
void* status_ptr = cmdBufMgr.setPostfix(status_size);
|
||||
if (status_ptr == NULL) return HSA_STATUS_ERROR;
|
||||
// Control buffer registering
|
||||
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
|
||||
// Validate SQTT status and normalize WRPTR
|
||||
if (sqttMgr->Validate() == false) return HSA_STATUS_ERROR;
|
||||
if (sqttMgr->Validate() == false) {
|
||||
ERR_LOGGING << "SQTT data corrupted";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
const uint32_t se_number = sqttMgr->getNumSe();
|
||||
// Casting status pointer to SQTT control per ShaderEngine array
|
||||
aql_profile::sqtt_ctrl_t* sqtt_ctrl = (aql_profile::sqtt_ctrl_t*)status_ptr;
|
||||
assert(status_size == sizeof(aql_profile::sqtt_ctrl_t) * se_number);
|
||||
if (status_size != sizeof(aql_profile::sqtt_ctrl_t) * se_number) {
|
||||
const uint32_t status_size_exp = sizeof(aql_profile::sqtt_ctrl_t) * se_number;
|
||||
assert(status_size == status_size_exp);
|
||||
if (status_size != status_size_exp) {
|
||||
ERR_LOGGING << "Bad SQTT controll data structure"
|
||||
<< ", status_size(" << status_size << "), status_size_exp(" << status_size_exp
|
||||
<< "), se_number(" << se_number << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
// SQTT output buffer and capacity per ShaderEngine
|
||||
@@ -410,29 +472,29 @@ hsa_ext_amd_aql_profile_iterate_data(const hsa_ext_amd_aql_profile_profile_t* pr
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
uint32_t sample_size = sqtt_ctrl[i].writePtr * TT_WRITE_PTR_BLK;
|
||||
|
||||
hsa_ext_amd_aql_profile_info_data_t sample_info;
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.sqtt_data.ptr = sample_ptr;
|
||||
sample_info.sqtt_data.size = sample_size;
|
||||
status = callback(HSA_EXT_AQL_PROFILE_INFO_SQTT_DATA, &sample_info, data);
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING(logger) << "SQTT data callback error, sample_id(" << i << ") status("
|
||||
<< status << ")";
|
||||
ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
|
||||
sample_ptr += sample_capacity;
|
||||
}
|
||||
} else {
|
||||
ERR_LOGGING(logger) << "Bad profile type (" << profile->type << ")";
|
||||
status = HSA_STATUS_ERROR;
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING(logger) << e.what();
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
#ifndef _AQL_PROFILE_H_
|
||||
#define _AQL_PROFILE_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
#include "aql_profile_exception.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t;
|
||||
typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
|
||||
void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer,
|
||||
packet_t* aql_packet);
|
||||
void* legacyAqlAcquire(const packet_t* aql_packet, void* data);
|
||||
void* legacyAqlRelease(const packet_t* aql_packet, void* data);
|
||||
void* legacyPm4(const packet_t* aql_packet, void* data);
|
||||
|
||||
class event_exception : public aql_profile_exc_val<event_t> {
|
||||
public:
|
||||
event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {}
|
||||
};
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const event_t& ev) {
|
||||
os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id
|
||||
<< "))";
|
||||
return os;
|
||||
}
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _AQL_PROFILE_H_
|
||||
@@ -0,0 +1,34 @@
|
||||
#ifndef _AQL_PROFILE_EXCEPTION_H_
|
||||
#define _AQL_PROFILE_EXCEPTION_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class aql_profile_exc_msg : public std::exception {
|
||||
public:
|
||||
explicit aql_profile_exc_msg(const std::string& msg) : str(msg) {}
|
||||
virtual const char* what() const throw() { return str.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str;
|
||||
};
|
||||
|
||||
template <typename T> class aql_profile_exc_val : public std::exception {
|
||||
public:
|
||||
aql_profile_exc_val(const std::string& msg, const T& val) {
|
||||
std::ostringstream oss;
|
||||
oss << msg << "(" << val << ")";
|
||||
str = oss.str();
|
||||
}
|
||||
virtual const char* what() const throw() { return str.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _AQL_PROFILE_EXCEPTION_H_
|
||||
+14
-4
@@ -11,7 +11,7 @@
|
||||
namespace aql_profile {
|
||||
|
||||
// GFX9 block ID mapping table
|
||||
uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
|
||||
uint32_t Gfx8Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaViCounterBlockIdCb0, pm4_profile::kHsaViCounterBlockIdCpf,
|
||||
pm4_profile::kHsaViCounterBlockIdDb0, pm4_profile::kHsaViCounterBlockIdGrbm,
|
||||
pm4_profile::kHsaViCounterBlockIdGrbmSe, pm4_profile::kHsaViCounterBlockIdPaSu,
|
||||
@@ -30,11 +30,21 @@ uint32_t Gfx8Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaViCounterBlockIdCpc};
|
||||
|
||||
pm4_profile::CommandWriter* Gfx8Factory::getCommandWriter() {
|
||||
return new pm4_profile::gfx8::Gfx8CmdWriter(false, true);
|
||||
auto p = new pm4_profile::gfx8::Gfx8CmdWriter(false, true);
|
||||
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::Pmu* Gfx8Factory::getPmcMgr() { return new pm4_profile::Gfx8PerfCounter(); }
|
||||
pm4_profile::PerfCounter* Gfx8Factory::getPmcMgr() {
|
||||
auto p = new pm4_profile::Gfx8PerfCounter();
|
||||
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() { return new pm4_profile::Gfx8ThreadTrace(); }
|
||||
pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() {
|
||||
auto p = new pm4_profile::Gfx8ThreadTrace();
|
||||
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
+15
-5
@@ -11,7 +11,7 @@
|
||||
namespace aql_profile {
|
||||
|
||||
// GFX9 block ID mapping table
|
||||
uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
|
||||
uint32_t Gfx9Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaAiCounterBlockIdCb0,
|
||||
kBadBlockId /*CPF*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdDb0,
|
||||
@@ -42,14 +42,24 @@ uint32_t Gfx9Factory::block_id_table[HSA_EXT_AQL_PROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaAiCounterBlockIdTcs,
|
||||
pm4_profile::kHsaAiCounterBlockIdWd,
|
||||
kBadBlockId /*CPG*/,
|
||||
kBadBlockId /*CPC*/};
|
||||
pm4_profile::kHsaAiCounterBlockIdCpc};
|
||||
|
||||
pm4_profile::CommandWriter* Gfx9Factory::getCommandWriter() {
|
||||
return new pm4_profile::gfx9::Gfx9CmdWriter(false, true);
|
||||
auto p = new pm4_profile::gfx9::Gfx9CmdWriter(false, true);
|
||||
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::Pmu* Gfx9Factory::getPmcMgr() { return new pm4_profile::Gfx9PerfCounter(); }
|
||||
pm4_profile::PerfCounter* Gfx9Factory::getPmcMgr() {
|
||||
auto p = new pm4_profile::Gfx9PerfCounter();
|
||||
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() { return new pm4_profile::Gfx9ThreadTrace(); }
|
||||
pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() {
|
||||
auto p = new pm4_profile::Gfx9ThreadTrace();
|
||||
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
+3
-2
@@ -1,8 +1,9 @@
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "amd_aql_pm4_ib_packet.h"
|
||||
@@ -0,0 +1,137 @@
|
||||
#ifndef _LOGGER_H_
|
||||
#define _LOGGER_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/file.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
template <typename T> Logger& operator<<(const T& m) {
|
||||
std::ostringstream oss;
|
||||
oss << m;
|
||||
if (!streaming)
|
||||
log(oss.str());
|
||||
else
|
||||
put(oss.str());
|
||||
streaming = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef void (Logger::*manip_t)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
(this->*f)();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void begm() { messaging = true; }
|
||||
void endl() { resetStreaming(); }
|
||||
|
||||
static const std::string& LastMessage() {
|
||||
Logger& logger = Instance();
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
return logger.message[GetTid()];
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (instance == NULL) instance = new Logger();
|
||||
return *instance;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (instance != NULL) delete instance;
|
||||
instance = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
Logger() : file(NULL), dirty(false), streaming(false), messaging(false) {
|
||||
const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG");
|
||||
if (path != NULL) {
|
||||
file = fopen("/tmp/aql_profile_log.txt", "a");
|
||||
}
|
||||
resetStreaming();
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file != NULL) {
|
||||
if (dirty) put("\n");
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
void resetStreaming() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (messaging) {
|
||||
message[GetTid()] = "";
|
||||
}
|
||||
messaging = false;
|
||||
streaming = false;
|
||||
}
|
||||
|
||||
void put(const std::string& m) {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (messaging) {
|
||||
message[GetTid()] += m;
|
||||
}
|
||||
if (file != NULL) {
|
||||
dirty = true;
|
||||
flock(fileno(file), LOCK_EX);
|
||||
fprintf(file, "%s", m.c_str());
|
||||
fflush(file);
|
||||
flock(fileno(file), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void log(const std::string& m) {
|
||||
const time_t rawtime = time(NULL);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file;
|
||||
bool dirty;
|
||||
bool streaming;
|
||||
bool messaging;
|
||||
|
||||
static std::mutex mutex;
|
||||
static Logger* instance;
|
||||
std::map<uint32_t, std::string> message;
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#define ERR_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ \
|
||||
<< "(): " << aql_profile::Logger::begm)
|
||||
#define INFO_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Info: " << __FUNCTION__ \
|
||||
<< "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#endif // _LOGGER_H_
|
||||
@@ -0,0 +1,157 @@
|
||||
#ifndef _PM4_FACTORY_H_
|
||||
#define _PM4_FACTORY_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "gpu_block_info.h"
|
||||
#include "aql_profile_exception.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
class PerfCounter;
|
||||
class ThreadTrace;
|
||||
extern GpuBlockInfo Gfx9HwBlocks[];
|
||||
extern const uint32_t Gfx9HwBlockCount;
|
||||
extern GpuBlockInfo Gfx8HwBlocks[];
|
||||
extern const uint32_t Gfx8HwBlockCount;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class BlockMap {
|
||||
public:
|
||||
typedef std::map<uint32_t, const pm4_profile::GpuBlockInfo*> map_t;
|
||||
typedef map_t::const_iterator iter_t;
|
||||
|
||||
void init(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
|
||||
if (block_map.size() == 0) fill(id_table, info_table, info_count);
|
||||
}
|
||||
|
||||
const pm4_profile::GpuBlockInfo* get(const uint32_t& id) const {
|
||||
iter_t it = block_map.find(id);
|
||||
return (it != block_map.end()) ? it->second : NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
void fill(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
|
||||
map_t info_map;
|
||||
for (uint32_t i = 0; i < info_count; ++i) {
|
||||
const pm4_profile::GpuBlockInfo& entry = info_table[i];
|
||||
info_map[entry.counterGroupId] = &entry;
|
||||
}
|
||||
for (uint32_t i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
iter_t it = info_map.find(id_table[i]);
|
||||
if (it != info_map.end()) block_map[i] = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
map_t block_map;
|
||||
};
|
||||
|
||||
class Pm4Factory {
|
||||
public:
|
||||
enum { kBadBlockId = UINT_MAX };
|
||||
|
||||
static Pm4Factory* Create(const hsa_agent_t agent);
|
||||
static Pm4Factory* Create(const profile_t* profile) { return Create(profile->agent); }
|
||||
static void Destroy();
|
||||
|
||||
virtual pm4_profile::CommandWriter* getCommandWriter() = 0;
|
||||
virtual pm4_profile::PerfCounter* getPmcMgr() = 0;
|
||||
virtual pm4_profile::ThreadTrace* getSqttMgr() = 0;
|
||||
|
||||
const pm4_profile::GpuBlockInfo* getBlockInfo(const event_t* event) const {
|
||||
const pm4_profile::GpuBlockInfo* info = block_map.get(event->block_name);
|
||||
if (info == NULL) throw event_exception(std::string("Bad block, "), *event);
|
||||
if (event->block_index >= info->maxInstanceCount)
|
||||
throw event_exception(std::string("Bad block index, "), *event);
|
||||
if (event->counter_id > info->maxEventId)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
return info;
|
||||
}
|
||||
|
||||
uint32_t getBlockId(const event_t* event) const {
|
||||
return getBlockInfo(event)->counterGroupId + event->block_index;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit Pm4Factory(const BlockMap& map) : block_map(map) {}
|
||||
virtual ~Pm4Factory() {}
|
||||
|
||||
private:
|
||||
typedef std::map<std::string, Pm4Factory*> instances_t;
|
||||
|
||||
static std::mutex mutex;
|
||||
static instances_t instances;
|
||||
const BlockMap& block_map;
|
||||
};
|
||||
|
||||
class Gfx8Factory : public Pm4Factory {
|
||||
public:
|
||||
Gfx8Factory() : Pm4Factory(block_map) {
|
||||
block_map.init(block_id_table, pm4_profile::Gfx8HwBlocks, pm4_profile::Gfx8HwBlockCount);
|
||||
}
|
||||
pm4_profile::CommandWriter* getCommandWriter();
|
||||
pm4_profile::PerfCounter* getPmcMgr();
|
||||
pm4_profile::ThreadTrace* getSqttMgr();
|
||||
|
||||
private:
|
||||
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
BlockMap block_map;
|
||||
};
|
||||
|
||||
class Gfx9Factory : public Pm4Factory {
|
||||
public:
|
||||
Gfx9Factory() : Pm4Factory(block_map) {
|
||||
block_map.init(block_id_table, pm4_profile::Gfx9HwBlocks, pm4_profile::Gfx9HwBlockCount);
|
||||
}
|
||||
pm4_profile::CommandWriter* getCommandWriter();
|
||||
pm4_profile::PerfCounter* getPmcMgr();
|
||||
pm4_profile::ThreadTrace* getSqttMgr();
|
||||
|
||||
private:
|
||||
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
BlockMap block_map;
|
||||
};
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent) {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
|
||||
char agent_name[64];
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
instances_t::iterator it = instances.find(agent_name);
|
||||
|
||||
if (it == instances.end()) {
|
||||
if (strncmp(agent_name, "gfx801", 6) == 0) {
|
||||
throw aql_profile_exc_val<std::string>(std::string("GFX8 Carrizo is not supported "),
|
||||
agent_name);
|
||||
} else if (strncmp(agent_name, "gfx8", 4) == 0) {
|
||||
it->second = new Gfx8Factory();
|
||||
} else if (strncmp(agent_name, "gfx9", 4) == 0) {
|
||||
it->second = new Gfx9Factory();
|
||||
} else {
|
||||
throw aql_profile_exc_val<std::string>("Unsupported GFXIP", agent_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory allocation failed");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
inline void Pm4Factory::Destroy() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
for (auto it : instances) delete it.second;
|
||||
instances.clear();
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _PM4_FACTORY_H_
|
||||
+2
-1
@@ -1,7 +1,8 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <assert.h>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "cmdwriter.h"
|
||||
+4
-24
@@ -18,14 +18,6 @@ using namespace pm4_profile;
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
static char errorString[][64] = {{"No error"},
|
||||
{"unknow countergroup id"},
|
||||
{"no countergroup id"},
|
||||
{"invalid operation"},
|
||||
{"counter is not available"},
|
||||
{"countegroup error state"},
|
||||
{"countegroup is not completed"}};
|
||||
|
||||
Gfx8PerfCounter::Gfx8PerfCounter() {
|
||||
// Initialize the number of shader engines
|
||||
num_se_ = 4;
|
||||
@@ -33,8 +25,6 @@ Gfx8PerfCounter::Gfx8PerfCounter() {
|
||||
}
|
||||
|
||||
void Gfx8PerfCounter::Init() {
|
||||
error_code_ = 0;
|
||||
|
||||
// Initialize the value to use in resetting GRBM
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
@@ -49,6 +39,10 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
// Reset Grbm to its default state - broadcast
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, reset_grbm_);
|
||||
|
||||
// Reset the counter list
|
||||
regCP_PERFMON_CNTL cp_perfmon_cntl = {0};
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All);
|
||||
|
||||
// Iterate through the list of blocks to generate Pm4 commands to
|
||||
// program corresponding perf counters of each block
|
||||
for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end();
|
||||
@@ -60,7 +54,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
// Iterate through each enabled perf counter and building
|
||||
// corresponding Pm4 commands to program the various control
|
||||
// registers involved
|
||||
|
||||
for (uint32_t ind = 0; ind < counter_count; ++ind) {
|
||||
const uint32_t counter_id = counters[ind];
|
||||
|
||||
@@ -89,9 +82,6 @@ void Gfx8PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
cp_perfcount_enable.u32All);
|
||||
|
||||
// Reset the counter list
|
||||
regCP_PERFMON_CNTL cp_perfmon_cntl;
|
||||
cp_perfmon_cntl.u32All = 0;
|
||||
cp_perfmon_cntl.bits.PERFMON_STATE = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL__CI__VI, cp_perfmon_cntl.u32All);
|
||||
|
||||
// Start the counter list
|
||||
@@ -148,16 +138,6 @@ uint32_t Gfx8PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
return total_counter_num * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
int Gfx8PerfCounter::getLastError() { return error_code_; }
|
||||
|
||||
std::string Gfx8PerfCounter::getErrorString(int error) {
|
||||
if ((error >= 0) && (error < kErrorCodeMax)) {
|
||||
std::string err_string(errorString[error]);
|
||||
return err_string;
|
||||
}
|
||||
return string("Error input code!");
|
||||
}
|
||||
|
||||
uint32_t Gfx8PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal,
|
||||
uint32_t blkId, uint32_t blkCntrIdx) {
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
+1
-7
@@ -11,7 +11,7 @@ class CommandWriter;
|
||||
// This class implement the VI PMU. It is responsible for setting up
|
||||
// CounterGroups to represent each VI hardware block which exposes performance
|
||||
// counters.
|
||||
class Gfx8PerfCounter : public pm4_profile::Pmu {
|
||||
class Gfx8PerfCounter : public pm4_profile::PerfCounter {
|
||||
public:
|
||||
Gfx8PerfCounter();
|
||||
|
||||
@@ -19,10 +19,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu {
|
||||
// for the blocks featured shader engines instancing
|
||||
uint32_t getNumSe() { return num_se_; }
|
||||
|
||||
int getLastError();
|
||||
|
||||
std::string getErrorString(int error);
|
||||
|
||||
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
|
||||
|
||||
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
|
||||
@@ -64,8 +60,6 @@ class Gfx8PerfCounter : public pm4_profile::Pmu {
|
||||
uint32_t* reg_val);
|
||||
|
||||
private:
|
||||
int error_code_;
|
||||
|
||||
// Indicates the number of Shader Engines Present
|
||||
uint32_t num_se_;
|
||||
|
||||
+3
-5
@@ -56,7 +56,7 @@ GpuBlockInfo Gfx9HwBlocks[] = {
|
||||
AI_COUNTER_NUM_PER_SPI, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SQ
|
||||
{"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
{"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 171,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_GS", kHsaAiCounterBlockIdSqGs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
@@ -251,10 +251,8 @@ GpuBlockInfo Gfx9HwBlocks[] = {
|
||||
|
||||
// Counter block CPC
|
||||
// Temp commented for Vega10
|
||||
/*
|
||||
{"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 24,
|
||||
AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
*/
|
||||
{"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 34,
|
||||
AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block IOMMUV2
|
||||
{"AI_IOMMUV2", kHsaAiCounterBlockIdIommuV2, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 25,
|
||||
+1
-5
@@ -192,8 +192,7 @@ typedef enum HsaAiCounterBlockId {
|
||||
// Temp commented out for Vega10
|
||||
// kHsaAiCounterBlockIdCpg,
|
||||
|
||||
// Temp commented out for Vega10
|
||||
// kHsaAiCounterBlockIdCpc,
|
||||
kHsaAiCounterBlockIdCpc,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
kHsaAiCounterBlockIdIommuV2,
|
||||
@@ -233,12 +232,9 @@ extern GpuCounterRegInfo AiVgtCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiIaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiMcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiSrbmCounterRegAddr[];
|
||||
|
||||
// No Tcs Counter block on AI
|
||||
// extern GpuCounterRegInfo AiTcsCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiWdCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCpgCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCpcCounterRegAddr[];
|
||||
|
||||
extern GpuPrivCounterBlockId AiBlockIdSq;
|
||||
extern GpuPrivCounterBlockId AiBlockIdMc;
|
||||
+5
-29
@@ -19,14 +19,6 @@ using namespace pm4_profile::gfx9;
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
static char errorString[][64] = {{"No error"},
|
||||
{"unknow countergroup id"},
|
||||
{"no countergroup id"},
|
||||
{"invalid operation"},
|
||||
{"counter is not available"},
|
||||
{"countegroup error state"},
|
||||
{"countegroup is not completed"}};
|
||||
|
||||
Gfx9PerfCounter::Gfx9PerfCounter() {
|
||||
// Initialize the number of shader engines
|
||||
num_se_ = 4;
|
||||
@@ -34,8 +26,6 @@ Gfx9PerfCounter::Gfx9PerfCounter() {
|
||||
}
|
||||
|
||||
void Gfx9PerfCounter::Init() {
|
||||
error_code_ = 0;
|
||||
|
||||
// Initialize the value to use in resetting GRBM
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
@@ -54,6 +44,10 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
// On Vega this is needed to collect Perf Cntrs
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL, 1);
|
||||
|
||||
// Reset the counter list
|
||||
regCP_PERFMON_CNTL cp_perfmon_cntl = {0};
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All);
|
||||
|
||||
// Iterate through the list of blocks to generate Pm4 commands to
|
||||
// program corresponding perf counters of each block
|
||||
for (CountersMap::const_iterator block_it = countersMap.begin(); block_it != countersMap.end();
|
||||
@@ -65,7 +59,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
// Iterate through each enabled perf counter and building
|
||||
// corresponding Pm4 commands to program the various control
|
||||
// registers involved
|
||||
|
||||
for (uint32_t ind = 0; ind < counter_count; ++ind) {
|
||||
const uint32_t counter_id = counters[ind];
|
||||
|
||||
@@ -93,9 +86,6 @@ void Gfx9PerfCounter::begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
cmdWriter->BuildWriteShRegPacket(cmdBuff, mmCOMPUTE_PERFCOUNT_ENABLE, cp_perfcount_enable.u32All);
|
||||
|
||||
// Reset the counter list
|
||||
regCP_PERFMON_CNTL cp_perfmon_cntl;
|
||||
cp_perfmon_cntl.u32All = 0;
|
||||
cp_perfmon_cntl.bits.PERFMON_STATE = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmCP_PERFMON_CNTL, cp_perfmon_cntl.u32All);
|
||||
|
||||
// Start the counter list
|
||||
@@ -156,16 +146,6 @@ uint32_t Gfx9PerfCounter::end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
return total_counter_num * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
int Gfx9PerfCounter::getLastError() { return error_code_; }
|
||||
|
||||
std::string Gfx9PerfCounter::getErrorString(int error) {
|
||||
if ((error >= 0) && (error < kErrorCodeMax)) {
|
||||
std::string err_string(errorString[error]);
|
||||
return err_string;
|
||||
}
|
||||
return string("Error input code!");
|
||||
}
|
||||
|
||||
uint32_t Gfx9PerfCounter::ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal,
|
||||
uint32_t blkId, uint32_t blkCntrIdx) {
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
@@ -737,17 +717,15 @@ uint32_t Gfx9PerfCounter::BuildCounterSelRegister(uint32_t cntrIdx, uint32_t* re
|
||||
*/
|
||||
|
||||
// Temp commented for Vega10
|
||||
/*
|
||||
case kHsaAiCounterBlockIdCpc: {
|
||||
regCPC_PERFCOUNTER0_SELECT cpc_perf_counter_select;
|
||||
cpc_perf_counter_select.u32All = 0;
|
||||
cpc_perf_counter_select.bits.PERF_SEL = blkCntrIdx;
|
||||
cpc_perf_counter_select.bits.CNTR_SEL0 = blkCntrIdx;
|
||||
regVal[0] = cpc_perf_counter_select.u32All;
|
||||
regAddr[0] = AiCpcCounterRegAddr[cntrIdx].counterSelRegAddr;
|
||||
regIdx = 1;
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
case kHsaAiCounterBlockIdMc: {
|
||||
@@ -1316,7 +1294,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t
|
||||
*/
|
||||
|
||||
// Temp commented for Vega10
|
||||
/*
|
||||
case kHsaAiCounterBlockIdCpc: {
|
||||
reg_addr[reg_num] = mmGRBM_GFX_INDEX;
|
||||
reg_val[reg_num] = reset_grbm_;
|
||||
@@ -1331,7 +1308,6 @@ uint32_t Gfx9PerfCounter::BuildCounterReadRegisters(uint32_t reg_index, uint32_t
|
||||
reg_num++;
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
// IommuV2, MC, kernel driver counters are retrieved via
|
||||
// KFD implementation
|
||||
+3
-11
@@ -11,23 +11,17 @@ class CommandWriter;
|
||||
// This class implement the AI PMU. It is responsible for setting up
|
||||
// CounterGroups to represent each AI hardware block which exposes performance
|
||||
// counters.
|
||||
class Gfx9PerfCounter : public pm4_profile::Pmu {
|
||||
class Gfx9PerfCounter : public pm4_profile::PerfCounter {
|
||||
public:
|
||||
Gfx9PerfCounter();
|
||||
|
||||
// Returns number of shader engines per block
|
||||
// for the blocks featured shader engines instancing
|
||||
uint32_t getNumSe() { return num_se_; }
|
||||
|
||||
int getLastError();
|
||||
|
||||
std::string getErrorString(int error);
|
||||
|
||||
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
|
||||
|
||||
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
|
||||
void* dataBuff);
|
||||
|
||||
uint32_t getNumSe() { return num_se_; }
|
||||
|
||||
private:
|
||||
void Init();
|
||||
|
||||
@@ -64,8 +58,6 @@ class Gfx9PerfCounter : public pm4_profile::Pmu {
|
||||
uint32_t* reg_val);
|
||||
|
||||
private:
|
||||
int error_code_;
|
||||
|
||||
// Indicates the number of Shader Engines Present
|
||||
uint32_t num_se_;
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
#ifndef _HSA_PERF_H_
|
||||
#define _HSA_PERF_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace pm4_profile {
|
||||
class DefaultCmdBuf;
|
||||
class CommandWriter;
|
||||
|
||||
typedef std::vector<uint32_t> CountersVec;
|
||||
typedef std::map<uint32_t, CountersVec> CountersMap;
|
||||
|
||||
class PerfCounter {
|
||||
public:
|
||||
virtual ~PerfCounter() {}
|
||||
|
||||
// Generate start profiling commands.
|
||||
virtual void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
const CountersMap& countersMap) = 0;
|
||||
|
||||
// Generate stop profiling commands.
|
||||
// Return actual required data buffer size.
|
||||
virtual uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
const CountersMap& countersMap, void* dataBuff) = 0;
|
||||
|
||||
// Returns number of shader engines per block
|
||||
// for the blocks featured shader engines instancing
|
||||
virtual uint32_t getNumSe() = 0;
|
||||
};
|
||||
} // namespace pm4_profile
|
||||
#endif // _HSA_PERF_H_
|
||||
+7
-2
@@ -28,7 +28,6 @@ include_directories ( ${TEST_DIR}/${TEST_NAME} )
|
||||
set ( LIB_NAME "${TEST_NAME}${ONLY64STR}" )
|
||||
add_library ( ${LIB_NAME} STATIC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
|
||||
target_link_libraries( ${LIB_NAME} c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
set ( TEST_LIBS ${LIB_NAME} )
|
||||
|
||||
#
|
||||
@@ -37,7 +36,13 @@ set ( TEST_LIBS ${LIB_NAME} )
|
||||
set ( SRC_LIST ${TEST_DIR}/ctrl/test.cpp )
|
||||
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_pmgr.cpp )
|
||||
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_hsa.cpp )
|
||||
set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} ${TARGET_LIB} )
|
||||
set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} )
|
||||
set ( EXE_NAME "ctrl" )
|
||||
add_executable ( ${EXE_NAME} ${SRC_LIST} )
|
||||
target_link_libraries( ${EXE_NAME} ${LIB_LIST} c stdc++ dl pthread rt atomic )
|
||||
|
||||
#
|
||||
# Copy the test files
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
+2
-2
@@ -30,14 +30,14 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_rsrc_factory.h"
|
||||
#include "hsa_ext_amd_aql_profile.h"
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
|
||||
// Test AQL interface
|
||||
class TestAql {
|
||||
TestAql* const test_aql;
|
||||
|
||||
public:
|
||||
TestAql(TestAql* t = 0) : test_aql(t) {}
|
||||
explicit TestAql(TestAql* t = 0) : test_aql(t) {}
|
||||
virtual ~TestAql() {}
|
||||
|
||||
TestAql* testAql() { return test_aql; }
|
||||
+1
-1
@@ -6,7 +6,7 @@
|
||||
if (!(cond)) { \
|
||||
std::cout << "ASSERT FAILED(" << #cond << ") at \"" << __FILE__ << "\" line " << __LINE__ \
|
||||
<< std::endl; \
|
||||
abort(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
}
|
||||
|
||||
+1
-1
@@ -36,7 +36,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
class TestHSA : public TestAql {
|
||||
public:
|
||||
// Constructor
|
||||
TestHSA(TestKernel* test) : test_(test), name_(test->Name()) {
|
||||
explicit TestHSA(TestKernel* test) : test_(test), name_(test->Name()) {
|
||||
total_time_taken_ = 0;
|
||||
setup_time_taken_ = 0;
|
||||
dispatch_time_taken_ = 0;
|
||||
+2
-3
@@ -29,18 +29,17 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define _TEST_PGEN_H_
|
||||
|
||||
#include "test_pmgr.h"
|
||||
#include "hsa_ext_amd_aql_profile.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPGen : public TestPMgr {
|
||||
protected:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
|
||||
protected:
|
||||
packet_t* PrePacket() { return reinterpret_cast<packet_t*>(&prePacket); }
|
||||
packet_t* PostPacket() { return reinterpret_cast<packet_t*>(&postPacket); }
|
||||
|
||||
public:
|
||||
TestPGen(TestAql* t) : TestPMgr(t) {}
|
||||
explicit TestPGen(TestAql* t) : TestPMgr(t) {}
|
||||
};
|
||||
|
||||
#endif // _TEST_PGEN_H_
|
||||
+45
-25
@@ -31,11 +31,13 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "test_assert.h"
|
||||
#include "test_pgen.h"
|
||||
|
||||
hsa_status_t TestPGenPMC_Callback(hsa_ext_amd_aql_profile_info_type_t info_type,
|
||||
hsa_ext_amd_aql_profile_info_data_t* info_data,
|
||||
#include <vector>
|
||||
|
||||
hsa_status_t TestPGenPMC_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> passed_data_t;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
@@ -45,29 +47,30 @@ class TestPGenPMC : public TestPGen {
|
||||
const static uint32_t buffer_alignment = 0x1000; // 4K
|
||||
|
||||
hsa_agent_t agent;
|
||||
hsa_ext_amd_aql_profile_profile_t profile;
|
||||
hsa_ext_amd_aql_profile_event_t events[2];
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
hsa_ven_amd_aqlprofile_event_t* events;
|
||||
|
||||
bool buildPackets() { return true; }
|
||||
|
||||
bool dumpData() {
|
||||
std::cout << "TestPGenPMC::dumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> callback_data_t;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenPMC_Callback, &data);
|
||||
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenPMC_Callback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << "> sample(" << dec << it->sample_id << ") block("
|
||||
<< it->pmc_data.event.block_name << "_" << it->pmc_data.event.block_index
|
||||
<< ") result(" << hex << it->pmc_data.result << ")" << std::endl;
|
||||
std::cout << dec << "event( block(" << it->pmc_data.event.block_name << "_"
|
||||
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
|
||||
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; }
|
||||
explicit TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; }
|
||||
|
||||
bool initialize(int arg_cnt, char** arg_list) {
|
||||
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
|
||||
@@ -85,31 +88,43 @@ class TestPGenPMC : public TestPGen {
|
||||
// Instantiation of the profile object
|
||||
// //////////////////////////////////////////////////////////////
|
||||
// Set the event fields
|
||||
events[0].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ;
|
||||
events[0].block_index = 0;
|
||||
events[0].counter_id = 0x4; // SQ_SQ_PERF_SEL_WAVES
|
||||
events[1].block_name = HSA_EXT_AQL_PROFILE_BLOCK_SQ;
|
||||
events[1].block_index = 0;
|
||||
events[1].counter_id = 0xe; // SQ_SQ_PERF_SEL_ITEMS
|
||||
const hsa_ven_amd_aqlprofile_event_t events_arr[] = {
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 14 /*ITEMS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 47 /*WAVE_READY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 1 /*CYCLE*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 3 /*REQ*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 22 /*WRITEBACK*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS_COUNT*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 8 /*ME1_STALL_WAIT_ON_RCIU_READ*/},
|
||||
};
|
||||
const size_t event_count = sizeof(events_arr) / sizeof(hsa_ven_amd_aqlprofile_event_t);
|
||||
events = new hsa_ven_amd_aqlprofile_event_t[event_count];
|
||||
memcpy(events, events_arr, sizeof(events_arr));
|
||||
|
||||
// Initialization the profile
|
||||
memset(&profile, 0, sizeof(profile));
|
||||
profile.agent = agent;
|
||||
profile.type = HSA_EXT_AQL_PROFILE_EVENT_PMC;
|
||||
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
|
||||
|
||||
// set enabled events list
|
||||
profile.events = events;
|
||||
profile.event_count = 2;
|
||||
profile.event_count = event_count;
|
||||
|
||||
// Profile buffers attributes
|
||||
command_buffer_alignment = buffer_alignment;
|
||||
status = hsa_ext_amd_aql_profile_get_info(
|
||||
&profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* str = "";
|
||||
api.hsa_ven_amd_aqlprofile_error_string(&str);
|
||||
std::cout << "aqlprofile err: " << str << std::endl;
|
||||
}
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
output_buffer_alignment = buffer_alignment;
|
||||
status = hsa_ext_amd_aql_profile_get_info(&profile, HSA_EXT_AQL_PROFILE_INFO_PMC_DATA_SIZE,
|
||||
&output_buffer_size);
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE, &output_buffer_size);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Application is allocating the command buffer
|
||||
@@ -128,12 +143,17 @@ class TestPGenPMC : public TestPGen {
|
||||
memset(profile.output_buffer.ptr, 0x77, output_buffer_size);
|
||||
|
||||
// Populating the AQL start packet
|
||||
status = hsa_ext_amd_aql_profile_start(&profile, PrePacket());
|
||||
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* str;
|
||||
api.hsa_ven_amd_aqlprofile_error_string(&str);
|
||||
std::cout << "aqlprofile err: " << str << std::endl;
|
||||
}
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
if (status != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
// Populating the AQL stop packet
|
||||
status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket());
|
||||
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
+13
-12
@@ -31,15 +31,16 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "test_assert.h"
|
||||
#include "test_pgen.h"
|
||||
|
||||
hsa_status_t TestPGenSQTT_Callback(hsa_ext_amd_aql_profile_info_type_t info_type,
|
||||
hsa_ext_amd_aql_profile_info_data_t* info_data,
|
||||
hsa_status_t TestPGenSQTT_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> passed_data_t;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
@@ -50,17 +51,17 @@ class TestPGenSQTT : public TestPGen {
|
||||
const static uint32_t buffer_size = 0x2000000; // 32M
|
||||
|
||||
hsa_agent_t agent;
|
||||
hsa_ext_amd_aql_profile_profile_t profile;
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
|
||||
bool buildPackets() { return true; }
|
||||
|
||||
bool dumpData() {
|
||||
std::cout << "TestPGenSQTT::dumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ext_amd_aql_profile_info_data_t> callback_data_t;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
hsa_ext_amd_aql_profile_iterate_data(&profile, TestPGenSQTT_Callback, &data);
|
||||
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenSQTT_Callback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << "> sample(" << dec << it->sample_id << ") ptr(" << hex << it->sqtt_data.ptr
|
||||
<< ") size(" << dec << it->sqtt_data.size << ")" << std::endl;
|
||||
@@ -93,7 +94,7 @@ class TestPGenSQTT : public TestPGen {
|
||||
}
|
||||
|
||||
public:
|
||||
TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; }
|
||||
explicit TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; }
|
||||
|
||||
bool initialize(int arg_cnt, char** arg_list) {
|
||||
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
|
||||
@@ -116,7 +117,7 @@ class TestPGenSQTT : public TestPGen {
|
||||
// Initialization the profile
|
||||
memset(&profile, 0, sizeof(profile));
|
||||
profile.agent = agent;
|
||||
profile.type = HSA_EXT_AQL_PROFILE_EVENT_SQTT;
|
||||
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
|
||||
|
||||
// set parameters
|
||||
// profile.parameters = &event;
|
||||
@@ -124,8 +125,8 @@ class TestPGenSQTT : public TestPGen {
|
||||
|
||||
// Profile buffers attributes
|
||||
command_buffer_alignment = buffer_alignment;
|
||||
status = hsa_ext_amd_aql_profile_get_info(
|
||||
&profile, HSA_EXT_AQL_PROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
output_buffer_alignment = buffer_alignment;
|
||||
@@ -146,12 +147,12 @@ class TestPGenSQTT : public TestPGen {
|
||||
profile.output_buffer.size = output_buffer_size;
|
||||
|
||||
// Populating the AQL start packet
|
||||
status = hsa_ext_amd_aql_profile_start(&profile, PrePacket());
|
||||
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
if (status != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
// Populating the AQL stop packet
|
||||
status = hsa_ext_amd_aql_profile_stop(&profile, PostPacket());
|
||||
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
+6
-1
@@ -61,7 +61,7 @@ bool TestPMgr::addPacketGfx8(const packet_t* packet) {
|
||||
// Create legacy devices PM4 data
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
|
||||
slot_pm4_s data;
|
||||
hsa_ext_amd_aql_profile_legacy_get_pm4(aql_packet, (void*)data.words);
|
||||
api.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(getQueue());
|
||||
@@ -122,4 +122,9 @@ bool TestPMgr::initialize(int argc, char** argv) {
|
||||
TestPMgr::TestPMgr(TestAql* t) : TestAql(t) {
|
||||
dummySignal.handle = 0;
|
||||
postSignal = dummySignal;
|
||||
|
||||
hsa_status_t status = hsa_init();
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
+10
-6
@@ -25,18 +25,20 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_SMGR_H_
|
||||
#define _TEST_SMGR_H_
|
||||
#ifndef _TEST_PMGR_H_
|
||||
#define _TEST_PMGR_H_
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "hsa.h"
|
||||
#include "test_aql.h"
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPMgr : public TestAql {
|
||||
public:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
TestPMgr(TestAql* t);
|
||||
explicit TestPMgr(TestAql* t);
|
||||
bool run();
|
||||
|
||||
protected:
|
||||
@@ -45,14 +47,16 @@ class TestPMgr : public TestAql {
|
||||
hsa_signal_t dummySignal;
|
||||
hsa_signal_t postSignal;
|
||||
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t api;
|
||||
|
||||
virtual bool buildPackets() { return false; }
|
||||
virtual bool dumpData() { return false; }
|
||||
virtual bool initialize(int argc, char** argv);
|
||||
|
||||
private:
|
||||
enum {
|
||||
SLOT_PM4_SIZE_DW = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
|
||||
SLOT_PM4_SIZE_AQLP = HSA_EXT_AQL_PROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
|
||||
SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
};
|
||||
struct slot_pm4_s {
|
||||
uint32_t words[SLOT_PM4_SIZE_DW];
|
||||
@@ -64,4 +68,4 @@ class TestPMgr : public TestAql {
|
||||
bool addPacketGfx9(const packet_t* packet);
|
||||
};
|
||||
|
||||
#endif // _TEST_SMGR_H_
|
||||
#endif // _TEST_PMGR_H_
|
||||
Исполняемый файл
+30
@@ -0,0 +1,30 @@
|
||||
#/bin/sh
|
||||
set -x
|
||||
|
||||
tbin=./test/ctrl
|
||||
|
||||
CDIR=`pwd`
|
||||
export LD_LIBRARY_PATH=$CDIR
|
||||
|
||||
export HSA_ENABLE_SDMA=0
|
||||
export HSA_EMULATE_AQL=1
|
||||
|
||||
echo
|
||||
echo "Run simple convolution kernel"
|
||||
unset ROCR_ENABLE_PMC
|
||||
unset ROCR_ENABLE_SQTT
|
||||
eval $tbin
|
||||
|
||||
echo
|
||||
echo "Run with PMC"
|
||||
export ROCR_ENABLE_PMC=1
|
||||
unset ROCR_ENABLE_SQTT
|
||||
eval $tbin
|
||||
|
||||
echo
|
||||
echo "Run with SQTT"
|
||||
unset ROCR_ENABLE_PMC
|
||||
export ROCR_ENABLE_SQTT=1
|
||||
eval $tbin
|
||||
|
||||
|
||||
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Ссылка в новой задаче
Block a user