Extend Rocr Samples to allow collection of Perf Cntrs

Change-Id: I9c7e75128fca28b23ec54efab00bf5d32c95a877
Этот коммит содержится в:
Ramesh Errabolu
2017-02-21 23:16:51 -06:00
родитель 470750cc3c
Коммит 315ae6439b
3 изменённых файлов: 292 добавлений и 0 удалений
+167
Просмотреть файл
@@ -0,0 +1,167 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <cassert>
#include <iostream>
#include <vector>
#include <string>
#include "hsa.h"
#include "tools/inc/hsa_ext_profiler.h"
#include "tools/inc/amd_hsa_tools_interfaces.h"
#include "hsa_perf_cntrs.hpp"
using namespace std;
void PreDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
assert((dispParam->pre_dispatch) && "Pre Dispatch Callback Param is Malformed");
hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
hsa_status_t status = hsa_ext_tools_pmu_begin(*perfMgr, dispParam->queue,
dispParam->aql_translation_handle, true);
assert((status == HSA_STATUS_SUCCESS) && "Error in beginning Perf Cntr Session");
}
void PostDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
assert((!dispParam->pre_dispatch) && "Post Dispatch Callback Param is Malformed");
hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
hsa_status_t status = hsa_ext_tools_pmu_end(*perfMgr, dispParam->queue,
dispParam->aql_translation_handle);
assert((status == HSA_STATUS_SUCCESS) && "Error in endning Perf Cntr Session");
}
// Constructor of the class
RocrPerfCntrApp::RocrPerfCntrApp( ) : perfMgr_(NULL) {
}
// Destructor of the class. Ideally it should delete the
// PMU and its counters
RocrPerfCntrApp::~RocrPerfCntrApp( ) {
}
// Return the number of perf counters
uint32_t RocrPerfCntrApp::GetNumPerfCntrs( ) {
return uint32_t(cntrList_.size());
}
// Return the handle of perf counter at specified index
CntrInfo* RocrPerfCntrApp::GetPerfCntr(uint32_t idx) {
return cntrList_[idx];
}
// Print the various fields of Perf Cntrs being programmed
bool RocrPerfCntrApp::PrintCntrs( ) {
CntrInfo *info;
int size = uint32_t(cntrList_.size());
for (int idx = 0; idx < size; idx++) {
info = cntrList_[idx];
std::cout << std::endl;
std::cout << "Rocr Perf Cntr Id: " << info->cntrId << std::endl;
std::cout << "Rocr Perf Cntr Name: " << info->cntrName << std::endl;
std::cout << "Rocr Perf Cntr Blk Id: " << info->blkId << std::endl;
std::cout << "Rocr Perf Cntr Value: " << info->cntrResult << std::endl;
std::cout << "Rocr Perf Cntr Validation: " << info->cnfType << std::endl;
std::cout << std::endl;
}
return true;
}
// Initialize the list of perf counters
// block id of kHsaAiCounterBlockSQ = 14 == 0x0E
hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) {
// Initialize the list of Perf Cntrs
// Add SQ counter for number of waves
CntrInfo* info = NULL;
cntrList_.reserve(23);
// Event for number of Waves
info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL,
0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
cntrList_.push_back(info);
// Event for number of Threads
info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL,
0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
cntrList_.push_back(info);
// Create an instance of Perf Mgr
hsa_status_t status;
status = hsa_ext_tools_create_pmu(agent, &perfMgr_);
assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr Mgr");
// Process each counter from the list as necessary
// each counter descriptor with its perf block handle
// and create an instance of counter in that block
uint32_t size = GetNumPerfCntrs();
for (uint32_t idx = 0; idx < size; idx++) {
info = GetPerfCntr(idx);
// Obtain the handle of perf block
if (info->blkHndl == NULL) {
status = hsa_ext_tools_get_counter_block_by_id(perfMgr_, info->blkId, &info->blkHndl);
assert((status == HSA_STATUS_SUCCESS) && "Error in getting Perf Cntr Blk Hndl");
}
// Create an instance of counter in the perf block
status = hsa_ext_tools_create_counter(info->blkHndl, &info->cntrHndl);
assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr in Perf Blk");
// Update the Event Index property of counter
uint32_t cntrProp = HSA_EXT_TOOLS_COUNTER_PARAMETER_EVENT_INDEX;
status = hsa_ext_tools_set_counter_parameter(info->cntrHndl, cntrProp,
sizeof(uint32_t), (void*)&info->cntrId);
assert((status == HSA_STATUS_SUCCESS) && "Error in updating Perf Cntr Property Event Index");
// Enable the updated perf counter
status = hsa_ext_tools_set_counter_enabled(info->cntrHndl, true);
assert((status == HSA_STATUS_SUCCESS) && "Error in enabing Perf Cntr");
}
return status;
}
// Register Pre and Post dispatch callbacks
void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t *queue){
hsa_status_t status;
status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback, PostDispatchCallback);
assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callbacks");
status = hsa_ext_tools_set_callback_arguments(queue, &perfMgr_, &perfMgr_);
assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callback Params");
return;
}
// Wait for perf counter collection to complete
hsa_status_t RocrPerfCntrApp::Wait() {
hsa_status_t status;
status = hsa_ext_tools_pmu_wait_for_completion(perfMgr_, 5000);
assert((status == HSA_STATUS_SUCCESS) && "Error in Waiting for Perf Cntr Completion");
return status;
}
// Validate perf counter values
hsa_status_t RocrPerfCntrApp::Validate() {
// Retrieve the results of the different Perf Cntrs
// and validate them as configured
CntrInfo* info = NULL;
hsa_status_t status = HSA_STATUS_SUCCESS;
uint32_t size = GetNumPerfCntrs();
for (uint32_t idx = 0; idx < size; idx++) {
info = GetPerfCntr(idx);
status = hsa_ext_tools_get_counter_result(info->cntrHndl, &info->cntrResult);
std::cout << "Value of Perf Cntr is: " << info->cntrResult << std::endl;
}
return status;
}
+115
Просмотреть файл
@@ -0,0 +1,115 @@
#ifndef ROCR_PERF_CNTR_APP_H_
#define ROCR_PERF_CNTR_APP_H_
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
#include "hsa.h"
#include "tools/inc/hsa_ext_profiler.h"
typedef enum CntrValCnfType {
///< no counter value validation should be performed
CntrValCnf_None,
///< counter value should be an exact match to expectedResult
CntrValCnf_Exact,
///< counter value should be greater than expectedResult
CntrValCnf_GreaterThan,
///< counter value should be less than expectedResult
CntrValCnf_LessThan
} CntrValCnfType;
/// Struct used to encapsulate Counter Info
typedef struct CntrInfo {
///< Id of counter in hardware block
uint32_t cntrId;
///< Name of counter
char cntrName[72];
///< Handle of perf counter
hsa_ext_tools_counter_t cntrHndl;
///< Id of hardware block containing the counter
uint32_t blkId;
///< Handle of counter block
hsa_ext_tools_counter_block_t blkHndl;
///< Expected value of perf counte
uint64_t expectedResult;
///< Value of perf counter expected
uint64_t cntrResult;
///< Type of validation upon completion of dispatch
CntrValCnfType cnfType;
CntrInfo(uint32_t cntrId, char* cntrName, void* cntrHndl,
uint32_t blkId, void* blkHndl,
uint64_t expResult, uint64_t result, CntrValCnfType cnfType) {
this->cntrId = cntrId;
this->cntrHndl = cntrHndl;
this->blkId = blkId;
this->blkHndl = blkHndl;
this->expectedResult = expResult;
this->cntrResult = result;
this->cnfType = cnfType;
memcpy(this->cntrName, cntrName, strlen(cntrName));
}
} CntrInfo;
class RocrPerfCntrApp {
public:
// Constructor of the class. Will initialize the list of perf counters
// that will be used to program the device
RocrPerfCntrApp( );
// Destructor of the class
~RocrPerfCntrApp( );
// Return the number of perf counters
uint32_t GetNumPerfCntrs();
// Return the handle of perf counter at specified index
CntrInfo* GetPerfCntr(uint32_t idx);
// Print the list of perf counters
bool PrintCntrs();
// Initialize the list of perf counters
hsa_status_t Init(hsa_agent_t agent);
// Register Pre and Post dispatch callbacks
void RegisterCallbacks(hsa_queue_t *queue);
// Wait for perf counter collection to complete
hsa_status_t Wait();
// Validate perf counter values
hsa_status_t Validate();
private:
// Number of queues to create
std::vector<CntrInfo *> cntrList_;
// Handle of Perf Cntr Manager
hsa_ext_tools_pmu_t perfMgr_;
};
#endif // ROCR_PERF_CNTR_APP_H_
+10
Просмотреть файл
@@ -11,6 +11,7 @@
#include "hsa.h"
#include "hsa_rsrc_factory.hpp"
#include "hsa_ext_finalize.h"
#include "tools/inc/hsa_ext_profiler.h"
#include "HSAILAmdExt.h"
#include "common.hpp"
@@ -199,6 +200,15 @@ bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info,
uint32_t num_pkts, hsa_queue_t **queue) {
hsa_status_t status;
// Code to create a Profile Queue object
if (num_pkts == UINT32_MAX) {
status = hsa_ext_tools_queue_create_profiled(agent_info->dev_id,
512, HSA_QUEUE_TYPE_SINGLE, NULL,
NULL, UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
status = hsa_queue_create(agent_info->dev_id, num_pkts,
HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);