diff --git a/samples/common/hsa_perf_cntrs.cpp b/samples/common/hsa_perf_cntrs.cpp new file mode 100644 index 0000000000..24b38c80cd --- /dev/null +++ b/samples/common/hsa_perf_cntrs.cpp @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "hsa.h" +#include "tools/inc/hsa_ext_profiler.h" +#include "tools/inc/amd_hsa_tools_interfaces.h" + +#include "hsa_perf_cntrs.hpp" + +using namespace std; + +void PreDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) { + assert((dispParam->pre_dispatch) && "Pre Dispatch Callback Param is Malformed"); + + hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast(usrArg); + hsa_status_t status = hsa_ext_tools_pmu_begin(*perfMgr, dispParam->queue, + dispParam->aql_translation_handle, true); + assert((status == HSA_STATUS_SUCCESS) && "Error in beginning Perf Cntr Session"); +} + +void PostDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) { + assert((!dispParam->pre_dispatch) && "Post Dispatch Callback Param is Malformed"); + + hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast(usrArg); + hsa_status_t status = hsa_ext_tools_pmu_end(*perfMgr, dispParam->queue, + dispParam->aql_translation_handle); + assert((status == HSA_STATUS_SUCCESS) && "Error in endning Perf Cntr Session"); +} + +// Constructor of the class +RocrPerfCntrApp::RocrPerfCntrApp( ) : perfMgr_(NULL) { + +} + +// Destructor of the class. Ideally it should delete the +// PMU and its counters +RocrPerfCntrApp::~RocrPerfCntrApp( ) { + +} + +// Return the number of perf counters +uint32_t RocrPerfCntrApp::GetNumPerfCntrs( ) { + return uint32_t(cntrList_.size()); +} + +// Return the handle of perf counter at specified index +CntrInfo* RocrPerfCntrApp::GetPerfCntr(uint32_t idx) { + return cntrList_[idx]; +} + +// Print the various fields of Perf Cntrs being programmed +bool RocrPerfCntrApp::PrintCntrs( ) { + + CntrInfo *info; + int size = uint32_t(cntrList_.size()); + for (int idx = 0; idx < size; idx++) { + info = cntrList_[idx]; + std::cout << std::endl; + std::cout << "Rocr Perf Cntr Id: " << info->cntrId << std::endl; + std::cout << "Rocr Perf Cntr Name: " << info->cntrName << std::endl; + std::cout << "Rocr Perf Cntr Blk Id: " << info->blkId << std::endl; + std::cout << "Rocr Perf Cntr Value: " << info->cntrResult << std::endl; + std::cout << "Rocr Perf Cntr Validation: " << info->cnfType << std::endl; + std::cout << std::endl; + } + return true; +} + +// Initialize the list of perf counters +// block id of kHsaAiCounterBlockSQ = 14 == 0x0E +hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) { + + // Initialize the list of Perf Cntrs + // Add SQ counter for number of waves + CntrInfo* info = NULL; + cntrList_.reserve(23); + + // Event for number of Waves + info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL, + 0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact); + cntrList_.push_back(info); + + // Event for number of Threads + info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL, + 0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact); + cntrList_.push_back(info); + + + // Create an instance of Perf Mgr + hsa_status_t status; + status = hsa_ext_tools_create_pmu(agent, &perfMgr_); + assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr Mgr"); + + // Process each counter from the list as necessary + // each counter descriptor with its perf block handle + // and create an instance of counter in that block + uint32_t size = GetNumPerfCntrs(); + for (uint32_t idx = 0; idx < size; idx++) { + info = GetPerfCntr(idx); + + // Obtain the handle of perf block + if (info->blkHndl == NULL) { + status = hsa_ext_tools_get_counter_block_by_id(perfMgr_, info->blkId, &info->blkHndl); + assert((status == HSA_STATUS_SUCCESS) && "Error in getting Perf Cntr Blk Hndl"); + } + + // Create an instance of counter in the perf block + status = hsa_ext_tools_create_counter(info->blkHndl, &info->cntrHndl); + assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr in Perf Blk"); + + // Update the Event Index property of counter + uint32_t cntrProp = HSA_EXT_TOOLS_COUNTER_PARAMETER_EVENT_INDEX; + status = hsa_ext_tools_set_counter_parameter(info->cntrHndl, cntrProp, + sizeof(uint32_t), (void*)&info->cntrId); + assert((status == HSA_STATUS_SUCCESS) && "Error in updating Perf Cntr Property Event Index"); + + // Enable the updated perf counter + status = hsa_ext_tools_set_counter_enabled(info->cntrHndl, true); + assert((status == HSA_STATUS_SUCCESS) && "Error in enabing Perf Cntr"); + } + + return status; +} + +// Register Pre and Post dispatch callbacks +void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t *queue){ + + hsa_status_t status; + status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback, PostDispatchCallback); + assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callbacks"); + status = hsa_ext_tools_set_callback_arguments(queue, &perfMgr_, &perfMgr_); + assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callback Params"); + return; +} + +// Wait for perf counter collection to complete +hsa_status_t RocrPerfCntrApp::Wait() { + + hsa_status_t status; + status = hsa_ext_tools_pmu_wait_for_completion(perfMgr_, 5000); + assert((status == HSA_STATUS_SUCCESS) && "Error in Waiting for Perf Cntr Completion"); + return status; +} + +// Validate perf counter values +hsa_status_t RocrPerfCntrApp::Validate() { + + // Retrieve the results of the different Perf Cntrs + // and validate them as configured + CntrInfo* info = NULL; + hsa_status_t status = HSA_STATUS_SUCCESS; + uint32_t size = GetNumPerfCntrs(); + for (uint32_t idx = 0; idx < size; idx++) { + info = GetPerfCntr(idx); + status = hsa_ext_tools_get_counter_result(info->cntrHndl, &info->cntrResult); + std::cout << "Value of Perf Cntr is: " << info->cntrResult << std::endl; + } + + return status; +} diff --git a/samples/common/hsa_perf_cntrs.hpp b/samples/common/hsa_perf_cntrs.hpp new file mode 100644 index 0000000000..ee3c68d527 --- /dev/null +++ b/samples/common/hsa_perf_cntrs.hpp @@ -0,0 +1,115 @@ +#ifndef ROCR_PERF_CNTR_APP_H_ +#define ROCR_PERF_CNTR_APP_H_ + +#include +#include +#include +#include + +#include +#include +#include + +#include "hsa.h" +#include "tools/inc/hsa_ext_profiler.h" + +typedef enum CntrValCnfType { + + ///< no counter value validation should be performed + CntrValCnf_None, + + ///< counter value should be an exact match to expectedResult + CntrValCnf_Exact, + + ///< counter value should be greater than expectedResult + CntrValCnf_GreaterThan, + + ///< counter value should be less than expectedResult + CntrValCnf_LessThan + +} CntrValCnfType; + +/// Struct used to encapsulate Counter Info +typedef struct CntrInfo { + + ///< Id of counter in hardware block + uint32_t cntrId; + + ///< Name of counter + char cntrName[72]; + + ///< Handle of perf counter + hsa_ext_tools_counter_t cntrHndl; + + ///< Id of hardware block containing the counter + uint32_t blkId; + + ///< Handle of counter block + hsa_ext_tools_counter_block_t blkHndl; + + ///< Expected value of perf counte + uint64_t expectedResult; + + ///< Value of perf counter expected + uint64_t cntrResult; + + ///< Type of validation upon completion of dispatch + CntrValCnfType cnfType; + + CntrInfo(uint32_t cntrId, char* cntrName, void* cntrHndl, + uint32_t blkId, void* blkHndl, + uint64_t expResult, uint64_t result, CntrValCnfType cnfType) { + this->cntrId = cntrId; + this->cntrHndl = cntrHndl; + this->blkId = blkId; + this->blkHndl = blkHndl; + this->expectedResult = expResult; + this->cntrResult = result; + this->cnfType = cnfType; + memcpy(this->cntrName, cntrName, strlen(cntrName)); + } + +} CntrInfo; + +class RocrPerfCntrApp { + + public: + + // Constructor of the class. Will initialize the list of perf counters + // that will be used to program the device + RocrPerfCntrApp( ); + + // Destructor of the class + ~RocrPerfCntrApp( ); + + // Return the number of perf counters + uint32_t GetNumPerfCntrs(); + + // Return the handle of perf counter at specified index + CntrInfo* GetPerfCntr(uint32_t idx); + + // Print the list of perf counters + bool PrintCntrs(); + + // Initialize the list of perf counters + hsa_status_t Init(hsa_agent_t agent); + + // Register Pre and Post dispatch callbacks + void RegisterCallbacks(hsa_queue_t *queue); + + // Wait for perf counter collection to complete + hsa_status_t Wait(); + + // Validate perf counter values + hsa_status_t Validate(); + + private: + + // Number of queues to create + std::vector cntrList_; + + // Handle of Perf Cntr Manager + hsa_ext_tools_pmu_t perfMgr_; +}; + +#endif // ROCR_PERF_CNTR_APP_H_ diff --git a/samples/common/hsa_rsrc_factory.cpp b/samples/common/hsa_rsrc_factory.cpp index 7291369d3d..f7f0a9208c 100755 --- a/samples/common/hsa_rsrc_factory.cpp +++ b/samples/common/hsa_rsrc_factory.cpp @@ -11,6 +11,7 @@ #include "hsa.h" #include "hsa_rsrc_factory.hpp" #include "hsa_ext_finalize.h" +#include "tools/inc/hsa_ext_profiler.h" #include "HSAILAmdExt.h" #include "common.hpp" @@ -199,6 +200,15 @@ bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info, uint32_t num_pkts, hsa_queue_t **queue) { hsa_status_t status; + + // Code to create a Profile Queue object + if (num_pkts == UINT32_MAX) { + status = hsa_ext_tools_queue_create_profiled(agent_info->dev_id, + 512, HSA_QUEUE_TYPE_SINGLE, NULL, + NULL, UINT32_MAX, UINT32_MAX, queue); + return (status == HSA_STATUS_SUCCESS); + } + status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, queue);