Add SPM support for RGP
RGP protocol supports SPM collection. Enable it in the PAL backend.
Change-Id: I0fa17334addad037ba6689d11fff0993f7899e66
[ROCm/clr commit: 234a94f838]
This commit is contained in:
@@ -51,8 +51,12 @@ RgpCaptureMgr::RgpCaptureMgr(Pal::IPlatform* platform, const Device& device)
|
||||
max_sqtt_disp_(device_.settings().rgpSqttDispCount_),
|
||||
trace_gpu_mem_limit_(0),
|
||||
global_disp_count_(1), // Must start from 1 according to RGP spec
|
||||
se_mask_(0),
|
||||
perf_counter_mem_limit_(0),
|
||||
perf_counter_frequency_(0),
|
||||
trace_enabled_(false),
|
||||
inst_tracing_enabled_(false) {
|
||||
inst_tracing_enabled_(false),
|
||||
perf_counters_enabled_(false) {
|
||||
memset(&trace_, 0, sizeof(trace_));
|
||||
}
|
||||
|
||||
@@ -213,6 +217,8 @@ void RgpCaptureMgr::Finalize() {
|
||||
rgp_server_->DisableTraces();
|
||||
}
|
||||
|
||||
dev_driver_server_->GetDriverControlServer()->StartLateDeviceInit();
|
||||
|
||||
// Finalize the devmode manager
|
||||
dev_driver_server_->Finalize();
|
||||
|
||||
@@ -436,6 +442,51 @@ Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu) {
|
||||
|
||||
trace_gpu_mem_limit_ = traceParameters.gpuMemoryLimitInMb * 1024 * 1024;
|
||||
inst_tracing_enabled_ = traceParameters.flags.enableInstructionTokens;
|
||||
se_mask_ = traceParameters.seMask;
|
||||
|
||||
// Setup streamed performance counters
|
||||
perf_counters_enabled_ = (traceParameters.flags.enableSpm != 0);
|
||||
|
||||
DevDriver::RGPProtocol::ServerSpmConfig counter_config = {};
|
||||
DevDriver::Vector<DevDriver::RGPProtocol::ServerSpmCounterId> counters(
|
||||
dev_driver_server_->GetMessageChannel()->GetAllocCb());
|
||||
rgp_server_->QuerySpmConfig(&counter_config, &counters);
|
||||
|
||||
Pal::PerfExperimentProperties perf_properties = {};
|
||||
|
||||
result = gpu->dev().iDev()->GetPerfExperimentProperties(&perf_properties);
|
||||
|
||||
// Querying performance properties should never fail
|
||||
assert(result == Pal::Result::Success);
|
||||
|
||||
perf_counter_frequency_ = counter_config.sampleFrequency;
|
||||
perf_counter_mem_limit_ = counter_config.memoryLimitInMb * 1024 * 1024;
|
||||
|
||||
perf_counter_ids_.clear();
|
||||
|
||||
for (size_t idx = 0; idx < counters.Size(); ++idx) {
|
||||
const DevDriver::RGPProtocol::ServerSpmCounterId server_counter = counters[idx];
|
||||
const Pal::GpuBlockPerfProperties& block_perf_prop =
|
||||
perf_properties.blocks[server_counter.blockId];
|
||||
|
||||
if (server_counter.instanceId == DevDriver::RGPProtocol::kSpmAllInstancesId) {
|
||||
for (uint32_t instance = 0; instance < block_perf_prop.instanceCount; ++instance) {
|
||||
GpuUtil::PerfCounterId counter_id = {};
|
||||
counter_id.block = static_cast<Pal::GpuBlock>(server_counter.blockId);
|
||||
counter_id.instance = instance;
|
||||
counter_id.eventId = server_counter.eventId;
|
||||
|
||||
perf_counter_ids_.push_back(counter_id);
|
||||
}
|
||||
} else {
|
||||
GpuUtil::PerfCounterId counter_id = {};
|
||||
counter_id.block = static_cast<Pal::GpuBlock>(server_counter.blockId);
|
||||
counter_id.instance = server_counter.instanceId;
|
||||
counter_id.eventId = server_counter.eventId;
|
||||
|
||||
perf_counter_ids_.push_back(counter_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Notify the RGP server that we are starting a trace
|
||||
if (rgp_server_->BeginTrace() != DevDriver::Result::Success) {
|
||||
@@ -504,11 +555,21 @@ Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu) {
|
||||
GpuUtil::GpaSampleConfig sampleConfig = {};
|
||||
|
||||
sampleConfig.type = GpuUtil::GpaSampleType::Trace;
|
||||
// Configure SQTT
|
||||
sampleConfig.sqtt.gpuMemoryLimit = trace_gpu_mem_limit_;
|
||||
sampleConfig.sqtt.seMask = 0xF;
|
||||
sampleConfig.sqtt.seMask = se_mask_;
|
||||
|
||||
sampleConfig.sqtt.flags.enable = true;
|
||||
sampleConfig.sqtt.flags.supressInstructionTokens = (inst_tracing_enabled_ == false);
|
||||
|
||||
// Configure SPM
|
||||
if (perf_counters_enabled_ && !perf_counter_ids_.empty()) {
|
||||
sampleConfig.perfCounters.gpuMemoryLimit = perf_counter_mem_limit_;
|
||||
sampleConfig.perfCounters.spmTraceSampleInterval = perf_counter_frequency_;
|
||||
sampleConfig.perfCounters.numCounters = perf_counter_ids_.size();
|
||||
sampleConfig.perfCounters.pIds = perf_counter_ids_.data();
|
||||
}
|
||||
|
||||
// Fill GPU commands
|
||||
gpu->eventBegin(MainEngine);
|
||||
result = trace_.gpa_session_->BeginSample(
|
||||
|
||||
@@ -59,6 +59,8 @@ enum class RgpSqqtBarrierReason : uint32_t {
|
||||
#ifdef PAL_GPUOPEN_OCL
|
||||
// gpuopen headers
|
||||
#include "gpuopen.h"
|
||||
// gpuutil headers
|
||||
#include "gpuUtil/palGpaSession.h"
|
||||
|
||||
// PAL forward declarations
|
||||
namespace Pal {
|
||||
@@ -68,11 +70,6 @@ class IQueueSemaphore;
|
||||
struct PalPublicSettings;
|
||||
} // namespace Pal
|
||||
|
||||
// GpuUtil forward declarations
|
||||
namespace GpuUtil {
|
||||
class GpaSession;
|
||||
};
|
||||
|
||||
// GPUOpen forward declarations
|
||||
namespace DevDriver {
|
||||
class DevDriverServer;
|
||||
@@ -375,8 +372,20 @@ class RgpCaptureMgr {
|
||||
uint32_t trace_gpu_mem_limit_;
|
||||
uint32_t global_disp_count_;
|
||||
|
||||
bool trace_enabled_; // True if tracing is currently enabled (master flag)
|
||||
bool inst_tracing_enabled_; // Enable instruction-level SQTT tokens
|
||||
uint32_t se_mask_; // Shader engine mask
|
||||
uint64_t perf_counter_mem_limit_; // Memory limit for perf counters
|
||||
uint32_t perf_counter_frequency_; // Counter sample frequency
|
||||
|
||||
std::vector<GpuUtil::PerfCounterId> perf_counter_ids_; // List of perf counter ids
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t trace_enabled_ : 1; // True if tracing is currently enabled (master flag)
|
||||
uint32_t inst_tracing_enabled_; // Enable instruction-level SQTT tokens
|
||||
uint32_t perf_counters_enabled_; // True if perf counters are enabled
|
||||
};
|
||||
uint32_t value_;
|
||||
};
|
||||
|
||||
PAL_DISALLOW_DEFAULT_CTOR(RgpCaptureMgr);
|
||||
PAL_DISALLOW_COPY_AND_ASSIGN(RgpCaptureMgr);
|
||||
|
||||
Reference in New Issue
Block a user