SWDEV-286150 - Add detailed thread trace support in RGP
- Create hash values for binaries - Add the binaries into RGP trace - Add corresponding hash value for every dispatch Change-Id: I2c3ce004d69f37d0d46bc4744e12f24273517f5e
Bu işleme şunda yer alıyor:
@@ -1125,6 +1125,14 @@ bool Device::initializeHeapResources() {
|
||||
}
|
||||
}
|
||||
|
||||
// Update RGP capture manager
|
||||
if (rgpCaptureMgr_ != nullptr) {
|
||||
if (!rgpCaptureMgr_->Update(platform_)) {
|
||||
delete rgpCaptureMgr_;
|
||||
rgpCaptureMgr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Create a synchronized transfer queue
|
||||
xferQueue_ = new VirtualGPU(*this);
|
||||
if (!(xferQueue_ && xferQueue_->create(false))) {
|
||||
@@ -1136,14 +1144,6 @@ bool Device::initializeHeapResources() {
|
||||
return false;
|
||||
}
|
||||
xferQueue_->enableSyncedBlit();
|
||||
|
||||
// Update RGP capture manager
|
||||
if (rgpCaptureMgr_ != nullptr) {
|
||||
if (!rgpCaptureMgr_->Update(platform_)) {
|
||||
delete rgpCaptureMgr_;
|
||||
rgpCaptureMgr_ = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -76,6 +76,28 @@ RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& dev
|
||||
return mgr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
uint64_t RgpCaptureMgr::AddElfBinary(const void* exe_binary, size_t exe_binary_size,
|
||||
const void* elf_binary, size_t elf_binary_size,
|
||||
Pal::IGpuMemory* pGpuMemory, size_t offset) {
|
||||
GpuUtil::ElfBinaryInfo elfBinaryInfo = {};
|
||||
elfBinaryInfo.pBinary = exe_binary;
|
||||
elfBinaryInfo.binarySize = exe_binary_size; ///< FAT Elf binary size.
|
||||
elfBinaryInfo.pGpuMemory = pGpuMemory; ///< GPU Memory where the compiled ISA resides.
|
||||
elfBinaryInfo.offset = static_cast<Pal::gpusize>(offset);
|
||||
|
||||
elfBinaryInfo.originalHash = DevDriver::MetroHash::MetroHash64(
|
||||
reinterpret_cast<const DevDriver::uint8*>(elf_binary), elf_binary_size);
|
||||
|
||||
elfBinaryInfo.compiledHash = DevDriver::MetroHash::MetroHash64(
|
||||
reinterpret_cast<const DevDriver::uint8*>(exe_binary), exe_binary_size);
|
||||
|
||||
assert(trace_.gpa_session_ != nullptr);
|
||||
|
||||
trace_.gpa_session_->RegisterElfBinary(elfBinaryInfo);
|
||||
return elfBinaryInfo.originalHash;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool RgpCaptureMgr::Init(Pal::IPlatform* platform) {
|
||||
if (dev_driver_server_ == nullptr) {
|
||||
@@ -413,6 +435,9 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write the hash value
|
||||
WriteComputeBindMarker(gpu, kernel.prog().ApiHash());
|
||||
|
||||
WriteUserEventMarker(gpu, RgpSqttMarkerUserEventObjectName, kernel.name());
|
||||
// Write disaptch marker
|
||||
WriteEventWithDimsMarker(gpu, apiEvent, static_cast<uint32_t>(x), static_cast<uint32_t>(y),
|
||||
@@ -893,6 +918,19 @@ void RgpCaptureMgr::WriteUserEventMarker(const VirtualGPU* gpu,
|
||||
WriteMarker(gpu, user_event_, markerSize);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
// Inserts a compute bind marker
|
||||
void RgpCaptureMgr::WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const {
|
||||
RgpSqttMarkerPipelineBind marker = {};
|
||||
|
||||
marker.identifier = RgpSqttMarkerIdentifierBindPipeline;
|
||||
marker.cbID = gpu->queue(MainEngine).cmdBufId();;
|
||||
marker.bindPoint = 1;
|
||||
|
||||
memcpy(marker.apiPsoHash, &api_hash, sizeof(api_hash));
|
||||
WriteMarker(gpu, &marker, sizeof(marker));
|
||||
}
|
||||
|
||||
} // namespace pal
|
||||
|
||||
#endif // PAL_GPUOPEN_OCL
|
||||
|
||||
@@ -68,6 +68,7 @@ class ICmdBuffer;
|
||||
class IFence;
|
||||
class IQueueSemaphore;
|
||||
struct PalPublicSettings;
|
||||
class IGPuMemory;
|
||||
} // namespace Pal
|
||||
|
||||
// GPUOpen forward declarations
|
||||
@@ -91,22 +92,22 @@ namespace pal {
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
|
||||
enum RgpSqttMarkerIdentifier : uint32_t {
|
||||
RgpSqttMarkerIdentifierEvent = 0x0,
|
||||
RgpSqttMarkerIdentifierCbStart = 0x1,
|
||||
RgpSqttMarkerIdentifierCbEnd = 0x2,
|
||||
RgpSqttMarkerIdentifierBarrierStart = 0x3,
|
||||
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
|
||||
RgpSqttMarkerIdentifierUserEvent = 0x5,
|
||||
RgpSqttMarkerIdentifierGeneralApi = 0x6,
|
||||
RgpSqttMarkerIdentifierSync = 0x7,
|
||||
RgpSqttMarkerIdentifierPresent = 0x8,
|
||||
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
|
||||
RgpSqttMarkerIdentifierRenderPass = 0xA,
|
||||
RgpSqttMarkerIdentifierReserved2 = 0xB,
|
||||
RgpSqttMarkerIdentifierReserved3 = 0xC,
|
||||
RgpSqttMarkerIdentifierReserved4 = 0xD,
|
||||
RgpSqttMarkerIdentifierReserved5 = 0xE,
|
||||
RgpSqttMarkerIdentifierReserved6 = 0xF
|
||||
RgpSqttMarkerIdentifierEvent = 0x0,
|
||||
RgpSqttMarkerIdentifierCbStart = 0x1,
|
||||
RgpSqttMarkerIdentifierCbEnd = 0x2,
|
||||
RgpSqttMarkerIdentifierBarrierStart = 0x3,
|
||||
RgpSqttMarkerIdentifierBarrierEnd = 0x4,
|
||||
RgpSqttMarkerIdentifierUserEvent = 0x5,
|
||||
RgpSqttMarkerIdentifierGeneralApi = 0x6,
|
||||
RgpSqttMarkerIdentifierSync = 0x7,
|
||||
RgpSqttMarkerIdentifierPresent = 0x8,
|
||||
RgpSqttMarkerIdentifierLayoutTransition = 0x9,
|
||||
RgpSqttMarkerIdentifierRenderPass = 0xA,
|
||||
RgpSqttMarkerIdentifierReserved2 = 0xB,
|
||||
RgpSqttMarkerIdentifierBindPipeline = 0xC,
|
||||
RgpSqttMarkerIdentifierReserved4 = 0xD,
|
||||
RgpSqttMarkerIdentifierReserved5 = 0xE,
|
||||
RgpSqttMarkerIdentifierReserved6 = 0xF
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
@@ -238,6 +239,33 @@ struct RgpSqttMarkerBarrierEnd {
|
||||
};
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerPipelineBind - RGP SQ thread-tracing marker written whenever a pipeline is bound (Table 12).
|
||||
struct RgpSqttMarkerPipelineBind {
|
||||
union {
|
||||
struct {
|
||||
uint32_t identifier : 4; // Identifier for this marker
|
||||
uint32_t extDwords : 3; // Number of extra dwords following this marker
|
||||
uint32_t bindPoint : 1; // The bind point of the pipeline within a queue
|
||||
// 0 = graphics bind point
|
||||
// 1 = compute bind point
|
||||
uint32_t cbID : 20; // A command buffer ID encoded as per Table 13.
|
||||
uint32_t reserved : 4; // Reserved
|
||||
};
|
||||
|
||||
uint32_t dword01; // The first dword
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t apiPsoHash[2]; // The API PSO hash of the pipeline being bound
|
||||
struct {
|
||||
uint32_t dword02; // The second dword
|
||||
uint32_t dword03; // The third dword
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// RGP SQTT Instrumentation Specification version (API-independent)
|
||||
constexpr uint32_t RgpSqttInstrumentationSpecVersion = 1;
|
||||
|
||||
@@ -312,7 +340,8 @@ class RgpCaptureMgr {
|
||||
Pal::Result TimedQueueSubmit(Pal::IQueue* queue, uint64_t cmdId,
|
||||
const Pal::SubmitInfo& submitInfo) const;
|
||||
bool Update(Pal::IPlatform* platform);
|
||||
|
||||
uint64_t AddElfBinary(const void* exe_binary, size_t exe_binary_size, const void* elf_binary,
|
||||
size_t elf_binary_size, Pal::IGpuMemory* pGpuMemory, size_t offset);
|
||||
private:
|
||||
// Steps that an RGP trace goes through
|
||||
enum class TraceStatus {
|
||||
@@ -360,6 +389,7 @@ class RgpCaptureMgr {
|
||||
uint32_t y, uint32_t z) const;
|
||||
void WriteUserEventMarker(const VirtualGPU* gpu, RgpSqttMarkerUserEventType eventType,
|
||||
const std::string& name) const;
|
||||
void WriteComputeBindMarker(const VirtualGPU* gpu, uint64_t api_hash) const;
|
||||
|
||||
const Device& device_;
|
||||
DevDriver::DevDriverServer* dev_driver_server_;
|
||||
@@ -411,8 +441,14 @@ class RgpCaptureMgr {
|
||||
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) {}
|
||||
void PostDispatch(VirtualGPU* gpu) {}
|
||||
void FinishRGPTrace(VirtualGPU* gpu, bool aborted) {}
|
||||
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const { return true; }
|
||||
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const {
|
||||
return true;
|
||||
}
|
||||
bool Update(Pal::IPlatform* platform) const { return true; }
|
||||
bool AddElfBinary(const void* exe_binary, size_t exe_binary_size, const void* elf_binary,
|
||||
size_t elf_binary_size, Pal::IGpuMemory* pGpuMemory, size_t offset) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace pal
|
||||
#endif // PAL_GPUOPEN_OCL
|
||||
|
||||
@@ -802,6 +802,12 @@ bool LightningProgram::setKernels(void* binary, size_t binSize,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Collect the information about compiled binary
|
||||
if (palDevice().rgpCaptureMgr() != nullptr) {
|
||||
apiHash_ = palDevice().rgpCaptureMgr()->AddElfBinary(binary, binSize, binary, binSize,
|
||||
codeSegGpu_->iMem(), codeSegGpu_->offset());
|
||||
}
|
||||
|
||||
for (auto& kit : kernels()) {
|
||||
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
|
||||
if (!kernel->postLoad()) {
|
||||
|
||||
@@ -200,6 +200,9 @@ class HSAILProgram : public device::Program {
|
||||
return executable_->GetSymbol(symbol_name, agent);
|
||||
}
|
||||
|
||||
//! Returns API hash value of the program for RGP thread trace
|
||||
uint64_t ApiHash() const { return apiHash_; }
|
||||
|
||||
protected:
|
||||
bool saveBinaryAndSetType(type_t type);
|
||||
|
||||
@@ -246,6 +249,7 @@ class HSAILProgram : public device::Program {
|
||||
//!< in the program by individual kernel
|
||||
uint maxVgprs_; //!< Maximum number of VGPR(s) used
|
||||
//!< in the program by individual kernel
|
||||
uint64_t apiHash_ = 0; //!< API hash value for RGP thread trace
|
||||
|
||||
std::list<Sampler*> staticSamplers_; //!< List od internal static samplers
|
||||
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle