Files
rocm-systems/rocclr/runtime/device/rocm/roccounters.hpp
T
foreman 5616270bbd P4 to Git Change 1611614 by wchau@wchau_OCL_boltzmann on 2018/09/27 12:54:16
SWDEV-164014 - OCLPerfCounters results in "LLVM ERROR: out of memory"
	- use generic hsa_ven_amd_aqlprofile_pfn_t to point to extension function table

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccounters.hpp#3 edit
2018-09-27 13:15:51 -04:00

171 строка
5.4 KiB
C++

//
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef ROCCOUNTERS_HPP_
#define ROCCOUNTERS_HPP_
#include "top.hpp"
#include "device/device.hpp"
#include "device/rocm/rocdevice.hpp"
#include "hsa_ven_amd_aqlprofile.h"
namespace roc {
class VirtualGPU;
class PerfCounterProfile;
//! Performance counter implementation on GPU
class PerfCounter : public device::PerfCounter {
public:
enum {
ROC_UNSUPPORTED = 0,
ROC_GFX8,
ROC_GFX9
};
//! The performance counter info
struct Info : public amd::EmbeddedObject {
uint blockIndex_; //!< Index of the block to configure
uint counterIndex_; //!< Index of the hardware counter
uint eventIndex_; //!< Event you wish to count with the counter
};
//! Constructor for the ROC PerfCounter object
PerfCounter(const Device& device, //!< A ROC device object
cl_uint blockIndex, //!< HW block index
cl_uint counterIndex, //!< Counter index (Counter register) within the block
cl_uint eventIndex); //!< Event index (Counter selection) for profiling
//! Destructor for the ROCM PerfCounter object
virtual ~PerfCounter();
//! Returns the specific information about the counter
uint64_t getInfo(uint64_t infoType //!< The type of returned information
) const;
//! Returns the GPU device, associated with the current object
const Device& dev() const { return roc_device_; }
//! Returns the gfx version
const uint32_t gfxVersion() const { return gfxVersion_; }
//! Returns the profile reference
PerfCounterProfile* profileRef() const { return profileRef_; }
//! Update the profile associated with the counter
void setProfile(PerfCounterProfile* profileRef);
private:
//! Disable default copy constructor
PerfCounter(const PerfCounter&);
//! Disable default operator=
PerfCounter& operator=(const PerfCounter&);
//! Returns the ROC performance counter descriptor
const Info* info() const { return &info_; }
const Device& roc_device_; //!< The backend device
Info info_; //!< The info structure for perfcounter
hsa_ven_amd_aqlprofile_event_t event_; //!< event information
PerfCounterProfile* profileRef_; //!< perf counter profile object
uint32_t gfxVersion_; //!< The IP version of the device
};
//! Performance counter profile
class PerfCounterProfile : public amd::ReferenceCountedObject {
public:
//! Default constructor
PerfCounterProfile(const Device& device)
: api_({0}),
roc_device_(device) {
memset(&profile_, 0, sizeof(profile_));
profile_.agent = roc_device_.getBackendDevice();
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
memset(&prePacket_, 0, sizeof(hsa_ext_amd_aql_pm4_packet_t));
memset(&postPacket_, 0, sizeof(hsa_ext_amd_aql_pm4_packet_t));
memset(&completionSignal_, 0, sizeof(hsa_signal_t));
};
//! Destructor for the PerfCounter profile object
virtual ~PerfCounterProfile();
//! Get the API tables
bool Create() {
hsa_agent_t agent = roc_device_.getBackendDevice();
bool system_support, agent_support;
hsa_system_extension_supported(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &system_support);
hsa_agent_extension_supported(HSA_EXTENSION_AMD_AQLPROFILE, agent, 1, 0, &agent_support);
if (!system_support || !agent_support) {
return false;
}
if (hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(hsa_ven_amd_aqlprofile_pfn_t),
&api_) != HSA_STATUS_SUCCESS) {
return false;
}
return true;
};
//! Returns the performance counter vector
std::vector<PerfCounter*> perfCounters() { return perfCounters_; };
//! Add the event of performance counter object to the profile context object
void addEvent(hsa_ven_amd_aqlprofile_event_t event) { events_.push_back(event); };
//! Create the start packet for performance counter
hsa_ext_amd_aql_pm4_packet_t* createStartPacket();
//! Create the stop packet for performance counter
hsa_ext_amd_aql_pm4_packet_t* createStopPacket();
//! Create the profile context object
bool initialize(); //!< HSA profile context object
//! Return the extension API table
const hsa_ven_amd_aqlprofile_1_00_pfn_t* api() const { return &api_; }
//! Return the profile context object
const hsa_ven_amd_aqlprofile_profile_t* profile() const { return &profile_; }
//! Return the start AQL packet
hsa_ext_amd_aql_pm4_packet_t* prePacket() { return &prePacket_; }
//! Return the stop AQL packet
hsa_ext_amd_aql_pm4_packet_t* postPacket() { return &postPacket_; }
private:
//! Disable copy constructor
PerfCounterProfile(const PerfCounterProfile&);
//! Disable operator=
PerfCounterProfile& operator=(const PerfCounterProfile&);
hsa_ven_amd_aqlprofile_1_00_pfn_t api_; //!< The extension API table
const Device& roc_device_; //!< The backend device
std::vector<PerfCounter*> perfCounters_; //!< Perf counters associate with the profile
std::vector<hsa_ven_amd_aqlprofile_event_t> events_; //!< Events information
hsa_ven_amd_aqlprofile_profile_t profile_; //!< HSA profile context object
hsa_ext_amd_aql_pm4_packet_t prePacket_; //!< aql packet for starting perf counter
hsa_ext_amd_aql_pm4_packet_t postPacket_; //!< aql packet for stoping the perf counter
hsa_signal_t completionSignal_; //!< signal of completion
};
} // namespace roc
#endif // ROCCOUNTERS_HPP_