SWDEV-286150 - Add detailed thread trace support in RGP

- Create hash values for binaries
- Add the binaries into RGP trace
- Add corresponding hash value for every dispatch

Change-Id: I2c3ce004d69f37d0d46bc4744e12f24273517f5e
Bu işleme şunda yer alıyor:
German Andryeyev
2021-11-03 16:24:04 -04:00
ebeveyn b2de4f625c
işleme 2a298f2ec3
5 değiştirilmiş dosya ile 110 ekleme ve 26 silme
+8 -8
Dosyayı Görüntüle
@@ -1125,6 +1125,14 @@ bool Device::initializeHeapResources() {
}
}
// Update RGP capture manager
if (rgpCaptureMgr_ != nullptr) {
if (!rgpCaptureMgr_->Update(platform_)) {
delete rgpCaptureMgr_;
rgpCaptureMgr_ = nullptr;
}
}
// Create a synchronized transfer queue
xferQueue_ = new VirtualGPU(*this);
if (!(xferQueue_ && xferQueue_->create(false))) {
@@ -1136,14 +1144,6 @@ bool Device::initializeHeapResources() {
return false;
}
xferQueue_->enableSyncedBlit();
// Update RGP capture manager
if (rgpCaptureMgr_ != nullptr) {
if (!rgpCaptureMgr_->Update(platform_)) {
delete rgpCaptureMgr_;
rgpCaptureMgr_ = nullptr;
}
}
}
return true;
}
+38
Dosyayı Görüntüle
@@ -76,6 +76,28 @@ RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& dev
return mgr;
}
// ================================================================================================
uint64_t RgpCaptureMgr::AddElfBinary(const void* exe_binary, size_t exe_binary_size,
const void* elf_binary, size_t elf_binary_size,
Pal::IGpuMemory* pGpuMemory, size_t offset) {
GpuUtil::ElfBinaryInfo elfBinaryInfo = {};
elfBinaryInfo.pBinary = exe_binary;
elfBinaryInfo.binarySize = exe_binary_size; ///< FAT Elf binary size.
elfBinaryInfo.pGpuMemory = pGpuMemory; ///< GPU Memory where the compiled ISA resides.
elfBinaryInfo.offset = static_cast<Pal::gpusize>(offset);
elfBinaryInfo.originalHash = DevDriver::MetroHash::MetroHash64(
reinterpret_cast<const DevDriver::uint8*>(elf_binary), elf_binary_size);
elfBinaryInfo.compiledHash = DevDriver::MetroHash::MetroHash64(
reinterpret_cast<const DevDriver::uint8*>(exe_binary), exe_binary_size);
assert(trace_.gpa_session_ != nullptr);
trace_.gpa_session_->RegisterElfBinary(elfBinaryInfo);
return elfBinaryInfo.originalHash;
}
// ================================================================================================
bool RgpCaptureMgr::Init(Pal::IPlatform* platform) {
if (dev_driver_server_ == nullptr) {
@@ -413,6 +435,9 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size
}
}
}
// Write the hash value
WriteComputeBindMarker(gpu, kernel.prog().ApiHash());
WriteUserEventMarker(gpu, RgpSqttMarkerUserEventObjectName, kernel.name());
// Write disaptch marker
WriteEventWithDimsMarker(gpu, apiEvent, static_cast<uint32_t>(x), static_cast<uint32_t>(y),
@@ -893,6 +918,19 @@ void RgpCaptureMgr::WriteUserEventMarker(const VirtualGPU* gpu,
WriteMarker(gpu, user_event_, markerSize);
}
// ================================================================================================
// Inserts a compute bind marker
void RgpCaptureMgr::WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const {
RgpSqttMarkerPipelineBind marker = {};
marker.identifier = RgpSqttMarkerIdentifierBindPipeline;
marker.cbID = gpu->queue(MainEngine).cmdBufId();;
marker.bindPoint = 1;
memcpy(marker.apiPsoHash, &api_hash, sizeof(api_hash));
WriteMarker(gpu, &marker, sizeof(marker));
}
} // namespace pal
#endif // PAL_GPUOPEN_OCL
+54 -18
Dosyayı Görüntüle
@@ -68,6 +68,7 @@ class ICmdBuffer;
class IFence;
class IQueueSemaphore;
struct PalPublicSettings;
class IGPuMemory;
} // namespace Pal
// GPUOpen forward declarations
@@ -91,22 +92,22 @@ namespace pal {
// ================================================================================================
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
enum RgpSqttMarkerIdentifier : uint32_t {
RgpSqttMarkerIdentifierEvent = 0x0,
RgpSqttMarkerIdentifierCbStart = 0x1,
RgpSqttMarkerIdentifierCbEnd = 0x2,
RgpSqttMarkerIdentifierBarrierStart = 0x3,
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
RgpSqttMarkerIdentifierUserEvent = 0x5,
RgpSqttMarkerIdentifierGeneralApi = 0x6,
RgpSqttMarkerIdentifierSync = 0x7,
RgpSqttMarkerIdentifierPresent = 0x8,
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
RgpSqttMarkerIdentifierRenderPass = 0xA,
RgpSqttMarkerIdentifierReserved2 = 0xB,
RgpSqttMarkerIdentifierReserved3 = 0xC,
RgpSqttMarkerIdentifierReserved4 = 0xD,
RgpSqttMarkerIdentifierReserved5 = 0xE,
RgpSqttMarkerIdentifierReserved6 = 0xF
RgpSqttMarkerIdentifierEvent = 0x0,
RgpSqttMarkerIdentifierCbStart = 0x1,
RgpSqttMarkerIdentifierCbEnd = 0x2,
RgpSqttMarkerIdentifierBarrierStart = 0x3,
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
RgpSqttMarkerIdentifierUserEvent = 0x5,
RgpSqttMarkerIdentifierGeneralApi = 0x6,
RgpSqttMarkerIdentifierSync = 0x7,
RgpSqttMarkerIdentifierPresent = 0x8,
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
RgpSqttMarkerIdentifierRenderPass = 0xA,
RgpSqttMarkerIdentifierReserved2 = 0xB,
RgpSqttMarkerIdentifierBindPipeline = 0xC,
RgpSqttMarkerIdentifierReserved4 = 0xD,
RgpSqttMarkerIdentifierReserved5 = 0xE,
RgpSqttMarkerIdentifierReserved6 = 0xF
};
// ================================================================================================
@@ -238,6 +239,33 @@ struct RgpSqttMarkerBarrierEnd {
};
};
// ================================================================================================
// RgpSqttMarkerPipelineBind - RGP SQ thread-tracing marker written whenever a pipeline is bound (Table 12).
struct RgpSqttMarkerPipelineBind {
union {
struct {
uint32_t identifier : 4; // Identifier for this marker
uint32_t extDwords : 3; // Number of extra dwords following this marker
uint32_t bindPoint : 1; // The bind point of the pipeline within a queue
// 0 = graphics bind point
// 1 = compute bind point
uint32_t cbID : 20; // A command buffer ID encoded as per Table 13.
uint32_t reserved : 4; // Reserved
};
uint32_t dword01; // The first dword
};
union {
uint32_t apiPsoHash[2]; // The API PSO hash of the pipeline being bound
struct {
uint32_t dword02; // The second dword
uint32_t dword03; // The third dword
};
};
};
// RGP SQTT Instrumentation Specification version (API-independent)
constexpr uint32_t RgpSqttInstrumentationSpecVersion = 1;
@@ -312,7 +340,8 @@ class RgpCaptureMgr {
Pal::Result TimedQueueSubmit(Pal::IQueue* queue, uint64_t cmdId,
const Pal::SubmitInfo& submitInfo) const;
bool Update(Pal::IPlatform* platform);
uint64_t AddElfBinary(const void* exe_binary, size_t exe_binary_size, const void* elf_binary,
size_t elf_binary_size, Pal::IGpuMemory* pGpuMemory, size_t offset);
private:
// Steps that an RGP trace goes through
enum class TraceStatus {
@@ -360,6 +389,7 @@ class RgpCaptureMgr {
uint32_t y, uint32_t z) const;
void WriteUserEventMarker(const VirtualGPU* gpu, RgpSqttMarkerUserEventType eventType,
const std::string& name) const;
void WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const;
const Device& device_;
DevDriver::DevDriverServer* dev_driver_server_;
@@ -411,8 +441,14 @@ class RgpCaptureMgr {
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) {}
void PostDispatch(VirtualGPU* gpu) {}
void FinishRGPTrace(VirtualGPU* gpu, bool aborted) {}
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const { return true; }
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const {
return true;
}
bool Update(Pal::IPlatform* platform) const { return true; }
bool AddElfBinary(const void* exe_binary, size_t exe_binary_size, const void* elf_binary,
size_t elf_binary_size, Pal::IGpuMemory* pGpuMemory, size_t offset) {
return true;
}
};
} // namespace pal
#endif // PAL_GPUOPEN_OCL
+6
Dosyayı Görüntüle
@@ -802,6 +802,12 @@ bool LightningProgram::setKernels(void* binary, size_t binSize,
return true;
}
// Collect the information about compiled binary
if (palDevice().rgpCaptureMgr() != nullptr) {
apiHash_ = palDevice().rgpCaptureMgr()->AddElfBinary(binary, binSize, binary, binSize,
codeSegGpu_->iMem(), codeSegGpu_->offset());
}
for (auto& kit : kernels()) {
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
if (!kernel->postLoad()) {
+4
Dosyayı Görüntüle
@@ -200,6 +200,9 @@ class HSAILProgram : public device::Program {
return executable_->GetSymbol(symbol_name, agent);
}
//! Returns API hash value of the program for RGP thread trace
uint64_t ApiHash() const { return apiHash_; }
protected:
bool saveBinaryAndSetType(type_t type);
@@ -246,6 +249,7 @@ class HSAILProgram : public device::Program {
//!< in the program by individual kernel
uint maxVgprs_; //!< Maximum number of VGPR(s) used
//!< in the program by individual kernel
uint64_t apiHash_ = 0; //!< API hash value for RGP thread trace
std::list<Sampler*> staticSamplers_; //!< List od internal static samplers