Add SPM support for RGP

RGP protocol supports SPM collection. Enable it in the PAL backend.

Change-Id: I0fa17334addad037ba6689d11fff0993f7899e66


[ROCm/clr commit: 234a94f838]
This commit is contained in:
German Andryeyev
2020-10-28 14:40:01 -04:00
parent e24333b132
commit f4e62df4cf
2 changed files with 79 additions and 9 deletions
+63 -2
View File
@@ -51,8 +51,12 @@ RgpCaptureMgr::RgpCaptureMgr(Pal::IPlatform* platform, const Device& device)
max_sqtt_disp_(device_.settings().rgpSqttDispCount_),
trace_gpu_mem_limit_(0),
global_disp_count_(1), // Must start from 1 according to RGP spec
se_mask_(0),
perf_counter_mem_limit_(0),
perf_counter_frequency_(0),
trace_enabled_(false),
inst_tracing_enabled_(false) {
inst_tracing_enabled_(false),
perf_counters_enabled_(false) {
memset(&trace_, 0, sizeof(trace_));
}
@@ -213,6 +217,8 @@ void RgpCaptureMgr::Finalize() {
rgp_server_->DisableTraces();
}
dev_driver_server_->GetDriverControlServer()->StartLateDeviceInit();
// Finalize the devmode manager
dev_driver_server_->Finalize();
@@ -436,6 +442,51 @@ Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu) {
trace_gpu_mem_limit_ = traceParameters.gpuMemoryLimitInMb * 1024 * 1024;
inst_tracing_enabled_ = traceParameters.flags.enableInstructionTokens;
se_mask_ = traceParameters.seMask;
// Setup streamed performance counters
perf_counters_enabled_ = (traceParameters.flags.enableSpm != 0);
DevDriver::RGPProtocol::ServerSpmConfig counter_config = {};
DevDriver::Vector<DevDriver::RGPProtocol::ServerSpmCounterId> counters(
dev_driver_server_->GetMessageChannel()->GetAllocCb());
rgp_server_->QuerySpmConfig(&counter_config, &counters);
Pal::PerfExperimentProperties perf_properties = {};
result = gpu->dev().iDev()->GetPerfExperimentProperties(&perf_properties);
// Querying performance properties should never fail
assert(result == Pal::Result::Success);
perf_counter_frequency_ = counter_config.sampleFrequency;
perf_counter_mem_limit_ = counter_config.memoryLimitInMb * 1024 * 1024;
perf_counter_ids_.clear();
for (size_t idx = 0; idx < counters.Size(); ++idx) {
const DevDriver::RGPProtocol::ServerSpmCounterId server_counter = counters[idx];
const Pal::GpuBlockPerfProperties& block_perf_prop =
perf_properties.blocks[server_counter.blockId];
if (server_counter.instanceId == DevDriver::RGPProtocol::kSpmAllInstancesId) {
for (uint32_t instance = 0; instance < block_perf_prop.instanceCount; ++instance) {
GpuUtil::PerfCounterId counter_id = {};
counter_id.block = static_cast<Pal::GpuBlock>(server_counter.blockId);
counter_id.instance = instance;
counter_id.eventId = server_counter.eventId;
perf_counter_ids_.push_back(counter_id);
}
} else {
GpuUtil::PerfCounterId counter_id = {};
counter_id.block = static_cast<Pal::GpuBlock>(server_counter.blockId);
counter_id.instance = server_counter.instanceId;
counter_id.eventId = server_counter.eventId;
perf_counter_ids_.push_back(counter_id);
}
}
// Notify the RGP server that we are starting a trace
if (rgp_server_->BeginTrace() != DevDriver::Result::Success) {
@@ -504,11 +555,21 @@ Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu) {
GpuUtil::GpaSampleConfig sampleConfig = {};
sampleConfig.type = GpuUtil::GpaSampleType::Trace;
// Configure SQTT
sampleConfig.sqtt.gpuMemoryLimit = trace_gpu_mem_limit_;
sampleConfig.sqtt.seMask = 0xF;
sampleConfig.sqtt.seMask = se_mask_;
sampleConfig.sqtt.flags.enable = true;
sampleConfig.sqtt.flags.supressInstructionTokens = (inst_tracing_enabled_ == false);
// Configure SPM
if (perf_counters_enabled_ && !perf_counter_ids_.empty()) {
sampleConfig.perfCounters.gpuMemoryLimit = perf_counter_mem_limit_;
sampleConfig.perfCounters.spmTraceSampleInterval = perf_counter_frequency_;
sampleConfig.perfCounters.numCounters = perf_counter_ids_.size();
sampleConfig.perfCounters.pIds = perf_counter_ids_.data();
}
// Fill GPU commands
gpu->eventBegin(MainEngine);
result = trace_.gpa_session_->BeginSample(
+16 -7
View File
@@ -59,6 +59,8 @@ enum class RgpSqqtBarrierReason : uint32_t {
#ifdef PAL_GPUOPEN_OCL
// gpuopen headers
#include "gpuopen.h"
// gpuutil headers
#include "gpuUtil/palGpaSession.h"
// PAL forward declarations
namespace Pal {
@@ -68,11 +70,6 @@ class IQueueSemaphore;
struct PalPublicSettings;
} // namespace Pal
// GpuUtil forward declarations
namespace GpuUtil {
class GpaSession;
};
// GPUOpen forward declarations
namespace DevDriver {
class DevDriverServer;
@@ -375,8 +372,20 @@ class RgpCaptureMgr {
uint32_t trace_gpu_mem_limit_;
uint32_t global_disp_count_;
bool trace_enabled_; // True if tracing is currently enabled (master flag)
bool inst_tracing_enabled_; // Enable instruction-level SQTT tokens
uint32_t se_mask_; // Shader engine mask
uint64_t perf_counter_mem_limit_; // Memory limit for perf counters
uint32_t perf_counter_frequency_; // Counter sample frequency
std::vector<GpuUtil::PerfCounterId> perf_counter_ids_; // List of perf counter ids
union {
struct {
uint32_t trace_enabled_ : 1; // True if tracing is currently enabled (master flag)
uint32_t inst_tracing_enabled_; // Enable instruction-level SQTT tokens
uint32_t perf_counters_enabled_; // True if perf counters are enabled
};
uint32_t value_;
};
PAL_DISALLOW_DEFAULT_CTOR(RgpCaptureMgr);
PAL_DISALLOW_COPY_AND_ASSIGN(RgpCaptureMgr);