/* ************************************************************************************************** * * Trade secret of Advanced Micro Devices, Inc. * Copyright (c) 2016, Advanced Micro Devices, Inc., (unpublished) * * All rights reserved. This notice is intended as a precaution against inadvertent publication * and does not imply publication or any waiver of confidentiality. The year included in * the foregoing notice is the year of creation of the work. * ************************************************************************************************** */ #include "device/pal/palgpuopen.hpp" #include "device/pal/paldevice.hpp" #include "device/pal/palvirtual.hpp" #include "device/pal/palprogram.hpp" #include "device/pal/palkernel.hpp" #include "device/pal/palblit.hpp" // PAL headers #include "palCmdAllocator.h" #include "palFence.h" #include "palQueueSemaphore.h" // gpuutil headers #include "gpuUtil/palGpaSession.h" // gpuopen headers #include "devDriverServer.h" #include "msgChannel.h" #include "msgTransport.h" #include "protocols/rgpServer.h" #include "protocols/driverControlServer.h" namespace pal { // ================================================================================================ RgpCaptureMgr::RgpCaptureMgr(Pal::IPlatform* platform, const Device& device) : device_(device), dev_driver_server_(platform->GetDevDriverServer()), user_event_(nullptr), num_prep_disp_(0), max_sqtt_disp_(device_.settings().rgpSqttDispCount_), trace_gpu_mem_limit_(0), global_disp_count_(1), // Must start from 1 according to RGP spec trace_enabled_(false), inst_tracing_enabled_(false) { memset(&trace_, 0, sizeof(trace_)); } // ================================================================================================ RgpCaptureMgr::~RgpCaptureMgr() { DestroyRGPTracing(); } // ================================================================================================ // Creates the GPU Open Developer Mode manager class. RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& device) { RgpCaptureMgr* mgr = new RgpCaptureMgr(platform, device); if (mgr != nullptr && !mgr->Init(platform)) { delete mgr; mgr = nullptr; } return mgr; } // ================================================================================================ bool RgpCaptureMgr::Init(Pal::IPlatform* platform) { if (dev_driver_server_ == nullptr) { return false; } const Settings& settings = device_.settings(); // Tell RGP that the server (i.e. the driver) supports tracing if requested. rgp_server_ = dev_driver_server_->GetRGPServer(); if (rgp_server_ == nullptr) { return false; } // Finalize RGP settings Finalize(); bool result = true; // Fail initialization of trace resources if SQTT tracing has been force-disabled from // the panel (this will consequently fail the trace), or if the chosen device's gfxip // does not support SQTT. // // It's necessary to check this during RGP tracing init in addition to devmode init because // during the earlier devmode init we may be in a situation where some enumerated physical // devices support tracing and others do not. if (GpuSupportsTracing(device_.properties(), settings) == false) { result = false; } // Create a GPA session object for this trace session if (result) { assert(trace_.gpa_session_ == nullptr); const uint32_t api_version = settings.oclVersion_; trace_.gpa_session_ = new GpuUtil::GpaSession( platform, device_.iDev(), api_version >> 4, // OCL API version major api_version & 0xf, // OCL API version minor RgpSqttInstrumentationSpecVersion, RgpSqttInstrumentationApiVersion); if (trace_.gpa_session_ == nullptr) { result = false; } } // Initialize the GPA session if (result && (trace_.gpa_session_->Init() != Pal::Result::Success)) { result = false; } if (result) { user_event_ = new RgpSqttMarkerUserEventWithString; if (nullptr == user_event_) { result = false; } } if (!result) { // If we've failed to initialize tracing, permanently disable traces if (rgp_server_ != nullptr) { rgp_server_->DisableTraces(); trace_enabled_ = false; } // Clean up if we failed DestroyRGPTracing(); } else { PostDeviceCreate(); } return result; } // ================================================================================================ // This function finds out all the queues in the device that we have to synchronize for RGP-traced // frames and initializes resources for them. bool RgpCaptureMgr::RegisterTimedQueue( uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const { bool result = true; // Get the OS context handle for this queue (this is a thing that RGP needs on DX clients; // it may be optional for Vulkan, but we provide it anyway if available). Pal::KernelContextInfo kernelContextInfo = {}; Pal::Result palResult = iQueue->QueryKernelContextInfo(&kernelContextInfo); // Ensure we've acquired the debug VMID (note that some platforms do not // implement this function, so don't fail the whole trace if so) *debug_vmid = kernelContextInfo.flags.hasDebugVmid; // Register the queue with the GPA session class for timed queue operation support. if (trace_.gpa_session_->RegisterTimedQueue(iQueue, queue_id, kernelContextInfo.contextIdentifier) != Pal::Result::Success) { result = false; } return result; } // ================================================================================================ Pal::Result RgpCaptureMgr::TimedQueueSubmit( Pal::IQueue* queue, uint64_t cmdId, const Pal::SubmitInfo& submitInfo) const { // Fill in extra meta-data information to associate the API command buffer data with // the generated timing information. GpuUtil::TimedSubmitInfo timedSubmitInfo = {}; Pal::uint64 apiCmdBufIds = cmdId; Pal::uint32 sqttCmdBufIds = 0; timedSubmitInfo.pApiCmdBufIds = &apiCmdBufIds; timedSubmitInfo.pSqttCmdBufIds = &sqttCmdBufIds; timedSubmitInfo.frameIndex = 0; // Do a timed submit of all the command buffers Pal::Result result = trace_.gpa_session_->TimedSubmit(queue, submitInfo, timedSubmitInfo); // Punt to non-timed submit if a timed submit fails (or is not supported) if (result != Pal::Result::Success) { result = queue->Submit(submitInfo); } return result; } // ================================================================================================ // Called during initial device enumeration prior to calling Pal::IDevice::CommitSettingsAndInit(). // // This finalizes the developer driver manager. void RgpCaptureMgr::Finalize() { // Figure out if the gfxip supports tracing. We decide tracing if there is at least one // enumerated GPU that can support tracing. Since we don't yet know if that GPU will be // picked as the target of an eventual VkDevice, this check is imperfect. // In mixed-GPU situations where an unsupported GPU is picked for tracing, // trace capture will fail with an error. bool hw_support_tracing = false; if ((rgp_server_->EnableTraces() == DevDriver::Result::Success)) { if (GpuSupportsTracing(device_.properties(), device_.settings())) { hw_support_tracing = true; } } if (hw_support_tracing == false) { rgp_server_->DisableTraces(); } // Finalize the devmode manager dev_driver_server_->Finalize(); // Figure out if tracing support should be enabled or not trace_enabled_ = (rgp_server_ != nullptr) && rgp_server_->TracesEnabled(); } // ================================================================================================ // Waits for the driver to be resumed if it's currently paused. void RgpCaptureMgr::WaitForDriverResume() { auto* pDriverControlServer = dev_driver_server_->GetDriverControlServer(); assert(pDriverControlServer != nullptr); pDriverControlServer->WaitForDriverResume(); } // ================================================================================================ // Called before a swap chain presents. This signals a frame-end boundary and // is used to coordinate RGP trace start/stop. void RgpCaptureMgr::PostDispatch(VirtualGPU* gpu) { if (rgp_server_->TracesEnabled()) { // If there's currently a trace running, submit the trace-end command buffer if (trace_.status_ == TraceStatus::Running) { amd::ScopedLock traceLock(&trace_mutex_); trace_.sqtt_disp_count_++; if (trace_.sqtt_disp_count_ >= max_sqtt_disp_) { Pal::Result res = EndRGPHardwareTrace(gpu); if (Pal::Result::ErrorIncompatibleQueue == res) { // continue until we find the right queue... } else if (Pal::Result::Success == res) { trace_.sqtt_disp_count_ = 0; } else { FinishRGPTrace(gpu, true); } } } if (IsQueueTimingActive()) { // Call TimedQueuePresent() to insert commands that collect GPU timestamp. Pal::IQueue* pPalQueue = gpu->queue(MainEngine).iQueue_; // Currently nothing in the PresentInfo struct is used for inserting a timed present marker. GpuUtil::TimedQueuePresentInfo timedPresentInfo = {}; //Pal::Result result = trace_.gpa_session_->TimedQueuePresent(pPalQueue, timedPresentInfo); //assert(result == Pal::Result::Success); } } } // ================================================================================================ Pal::Result RgpCaptureMgr::CheckForTraceResults() { assert(trace_.status_ == TraceStatus::WaitingForResults); Pal::Result result = Pal::Result::NotReady; // Check if trace results are ready if (trace_.gpa_session_->IsReady() && // GPA session is ready (trace_.begin_queue_->isDone(&trace_.end_event_))) // "Trace end" cmdbuf has retired { bool success = false; // Fetch required trace data size from GPA session size_t traceDataSize = 0; void* pTraceData = nullptr; trace_.gpa_session_->GetResults(trace_.gpa_sample_id_, &traceDataSize, nullptr); // Allocate memory for trace data if (traceDataSize > 0) { pTraceData = amd::AlignedMemory::allocate(traceDataSize, 256); } if (pTraceData != nullptr) { // Get trace data from GPA session if (trace_.gpa_session_->GetResults(trace_.gpa_sample_id_, &traceDataSize, pTraceData) == Pal::Result::Success) { // Transmit trace data to anyone who's listening auto devResult = rgp_server_->WriteTraceData( static_cast(pTraceData), traceDataSize); success = (devResult == DevDriver::Result::Success); } amd::AlignedMemory::deallocate(pTraceData); } if (success) { result = Pal::Result::Success; } } return result; } // ================================================================================================ // Called after a swap chain presents. This signals a (next) frame-begin boundary and is // used to coordinate RGP trace start/stop. void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) { // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); if (rgp_server_->TracesEnabled()) { amd::ScopedLock traceLock(&trace_mutex_); // Check if there's an RGP trace request pending and we're idle if ((trace_.status_ == TraceStatus::Idle) && rgp_server_->IsTracePending()) { // Attempt to start preparing for a trace if (PrepareRGPTrace(gpu) == Pal::Result::Success) { // Attempt to start the trace immediately if we do not need to prepare if (num_prep_disp_ == 0) { if (BeginRGPTrace(gpu) != Pal::Result::Success) { FinishRGPTrace(gpu, true); } } } } else if (trace_.status_ == TraceStatus::Preparing) { // Wait some number of "preparation frames" before starting the trace in order to get enough // timer samples to sync CPU/GPU clock domains. trace_.prepared_disp_count_++; // Take a calibration timing measurement sample for this frame. trace_.gpa_session_->SampleTimingClocks(); // Start the SQTT trace if we've waited a sufficient number of preparation frames if (trace_.prepared_disp_count_ >= num_prep_disp_) { Pal::Result result = BeginRGPTrace(gpu); if (Pal::Result::ErrorIncompatibleQueue == result) { // Let's wait until the app will reach the same queue } else if (result != Pal::Result::Success) { FinishRGPTrace(gpu, true); } } } // Check if we're ending a trace waiting for SQTT to turn off. // If SQTT has turned off, end the trace else if (trace_.status_ == TraceStatus::WaitingForSqtt) { Pal::Result result = Pal::Result::Success; if (trace_.begin_queue_->isDone(&trace_.end_sqtt_event_)) { result = EndRGPTrace(gpu); } else { // todo: There is a wait inside the trace end for now result = EndRGPTrace(gpu); } if (result != Pal::Result::Success) { FinishRGPTrace(gpu, true); } } // Check if we're waiting for final trace results. else if (trace_.status_ == TraceStatus::WaitingForResults) { Pal::Result result = CheckForTraceResults(); // Results ready: finish trace if (result == Pal::Result::Success) { FinishRGPTrace(gpu, false); } // Error while computing results: abort trace else if (result != Pal::Result::NotReady) { FinishRGPTrace(gpu, true); } } if (trace_.status_ == TraceStatus::Running) { RgpSqttMarkerEventType apiEvent = RgpSqttMarkerEventType::CmdNDRangeKernel; if (kernel.prog().isInternal()) { constexpr RgpSqttMarkerEventType ApiEvents[KernelBlitManager::BlitTotal] = { RgpSqttMarkerEventType::CmdCopyImage, RgpSqttMarkerEventType::CmdCopyImage, RgpSqttMarkerEventType::CmdCopyImageToBuffer, RgpSqttMarkerEventType::CmdCopyBufferToImage, RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdFillBuffer, RgpSqttMarkerEventType::CmdFillImage, RgpSqttMarkerEventType::CmdScheduler }; for (uint i = 0; i < KernelBlitManager::BlitTotal; ++i) { if (kernel.name().compare(BlitName[i]) == 0) { apiEvent = ApiEvents[i]; break; } } } WriteUserEventMarker(gpu, RgpSqttMarkerUserEventObjectName, kernel.name()); // Write disaptch marker WriteEventWithDimsMarker(gpu, apiEvent, static_cast(x), static_cast(y), static_cast(z)); } } global_disp_count_++; } // ================================================================================================ // This function starts preparing for an RGP trace. Preparation involves some N frames of // lead-up time during which timing samples are accumulated to synchronize CPU and GPU clock domains. // // This function transitions from the Idle state to the Preparing state. Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu) { assert(trace_.status_ == TraceStatus::Idle); // We can only trace using a single device at a time currently, so recreate RGP trace // resources against this new one if the device is changing. Pal::Result result = Pal::Result::Success; const auto traceParameters = rgp_server_->QueryTraceParameters(); num_prep_disp_ = traceParameters.captureStartIndex; uint32_t capture_disp = traceParameters.captureStopIndex - traceParameters.captureStartIndex; // Validate if the captured dispatches are in the range if ((capture_disp > 0) && (capture_disp < max_sqtt_disp_)) { max_sqtt_disp_ = capture_disp; } trace_gpu_mem_limit_ = traceParameters.gpuMemoryLimitInMb * 1024 * 1024; inst_tracing_enabled_ = traceParameters.flags.enableInstructionTokens; // Notify the RGP server that we are starting a trace if (rgp_server_->BeginTrace() != DevDriver::Result::Success) { result = Pal::Result::ErrorUnknown; } // Tell the GPA session class we're starting a trace if (result == Pal::Result::Success) { GpuUtil::GpaSessionBeginInfo info = {}; info.flags.enableQueueTiming = true;// trace_.queueTimingEnabled; result = trace_.gpa_session_->Begin(info); } trace_.prepared_disp_count_ = 0; trace_.sqtt_disp_count_ = 0; // Sample the timing clocks prior to starting a trace. if (result == Pal::Result::Success) { trace_.gpa_session_->SampleTimingClocks(); } if (result == Pal::Result::Success) { // Remember which queue started the trace trace_.prepare_queue_ = gpu; trace_.begin_queue_ = nullptr; trace_.status_ = TraceStatus::Preparing; } else { // We failed to prepare for the trace so abort it. if (rgp_server_ != nullptr) { const DevDriver::Result devDriverResult = rgp_server_->AbortTrace(); // AbortTrace should always succeed unless we've used the api incorrectly. assert(devDriverResult == DevDriver::Result::Success); } } return result; } // ================================================================================================ // This function begins an RGP trace by initializing all dependent resources and submitting // the "begin trace" information command buffer. // // This function transitions from the Preparing state to the Running state. Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu) { assert(trace_.status_ == TraceStatus::Preparing); assert(trace_enabled_); // We can only trace using a single device at a time currently, so recreate RGP trace // resources against this new one if the device is changing. Pal::Result result = Pal::Result::Success; if (result == Pal::Result::Success) { // Only allow trace to start if the queue family at prep-time matches the queue // family at begin time because the command buffer engine type must match if (trace_.prepare_queue_ != gpu) { result = Pal::Result::ErrorIncompatibleQueue; } } // Start a GPA tracing sample with SQTT enabled if (result == Pal::Result::Success) { GpuUtil::GpaSampleConfig sampleConfig = {}; sampleConfig.type = GpuUtil::GpaSampleType::Trace; sampleConfig.sqtt.gpuMemoryLimit = trace_gpu_mem_limit_; sampleConfig.sqtt.seMask = 0xF; sampleConfig.sqtt.flags.enable = true; sampleConfig.sqtt.flags.supressInstructionTokens = (inst_tracing_enabled_ == false); // Fill GPU commands gpu->eventBegin(MainEngine); trace_.gpa_sample_id_ = trace_.gpa_session_->BeginSample( gpu->queue(MainEngine).iCmd(), sampleConfig); gpu->eventEnd(MainEngine, trace_.begin_sqtt_event_); } // Submit the trace-begin command buffer if (result == Pal::Result::Success) { static constexpr bool NeedFlush = true; // Update the global GPU event gpu->setGpuEvent(trace_.begin_sqtt_event_, NeedFlush); } // Make the trace active and remember which queue started it if (result == Pal::Result::Success) { trace_.status_ = TraceStatus::Running; trace_.begin_queue_ = gpu; } return result; } // ================================================================================================ // This function submits the command buffer to stop SQTT tracing. Full tracing still continues. // // This function transitions from the Running state to the WaitingForSqtt state. Pal::Result RgpCaptureMgr::EndRGPHardwareTrace(VirtualGPU* gpu) { assert(trace_.status_ == TraceStatus::Running); Pal::Result result = Pal::Result::Success; // Only allow SQTT trace to start and end on the same queue because it's critical that these are // in the same order if (gpu != trace_.begin_queue_) { result = Pal::Result::ErrorIncompatibleQueue; } // Tell the GPA session to insert any necessary commands to end the tracing sample and // end the session itself if (result == Pal::Result::Success) { assert(trace_.gpa_session_ != nullptr); // Write CB commands to finish the SQTT gpu->eventBegin(MainEngine); trace_.gpa_session_->EndSample(gpu->queue(MainEngine).iCmd(), trace_.gpa_sample_id_); gpu->eventEnd(MainEngine, trace_.end_sqtt_event_); static constexpr bool NeedFlush = true; // Update the global GPU event gpu->setGpuEvent(trace_.end_sqtt_event_, NeedFlush); trace_.status_ = TraceStatus::WaitingForSqtt; // Execute a device wait idle if (device_.settings().rgpSqttWaitIdle_) { // Make sure the trace is done. Note: required for SDMA data write back gpu->waitForEvent(&trace_.end_sqtt_event_); } } return result; } // ================================================================================================ // This function ends a running RGP trace. // // This function transitions from the WaitingForSqtt state to WaitingForResults state. Pal::Result RgpCaptureMgr::EndRGPTrace(VirtualGPU* gpu) { assert(trace_.status_ == TraceStatus::WaitingForSqtt); Pal::Result result = Pal::Result::Success; // Tell the GPA session to insert any necessary commands to end the tracing sample and // end the session itself if (result == Pal::Result::Success) { assert(trace_.gpa_session_ != nullptr); // Initiate SDMA copy gpu->eventBegin(SdmaEngine); result = trace_.gpa_session_->End(gpu->queue(SdmaEngine).iCmd()); gpu->eventEnd(SdmaEngine, trace_.end_event_); } // Submit the trace-end command buffer if (result == Pal::Result::Success) { static constexpr bool NeedFlush = true; // Update the global GPU event gpu->setGpuEvent(trace_.end_event_, NeedFlush); trace_.status_ = TraceStatus::WaitingForResults; if (device_.settings().rgpSqttWaitIdle_) { // Make sure the transfer is done gpu->waitForEvent(&trace_.end_event_); } } return result; } // ================================================================================================ // This function resets and possibly cancels a currently active (between begin/end) RGP trace. // It frees any dependent resources. void RgpCaptureMgr::FinishRGPTrace(VirtualGPU* gpu, bool aborted) { if (trace_.prepare_queue_ == nullptr) { return; } // Finish the trace if the queue was destroyed before // OCL reached the number of captured dispatches if ((trace_.sqtt_disp_count_ != 0) && (gpu != nullptr)) { EndRGPHardwareTrace(gpu); } // Inform RGP protocol that we're done with the trace, either by aborting it or finishing normally if (aborted) { rgp_server_->AbortTrace(); } else { rgp_server_->EndTrace(); } if (trace_.gpa_session_ != nullptr) { trace_.gpa_session_->Reset(); } // Reset tracing state to idle trace_.prepared_disp_count_ = 0; trace_.sqtt_disp_count_ = 0; trace_.gpa_sample_id_ = 0; trace_.status_ = TraceStatus::Idle; trace_.prepare_queue_ = nullptr; trace_.begin_queue_ = nullptr; } // ================================================================================================ // Destroys device-persistent RGP resources void RgpCaptureMgr::DestroyRGPTracing() { if (trace_.status_ != TraceStatus::Idle) { FinishRGPTrace(nullptr, true); } delete user_event_; // Destroy the GPA session if (trace_.gpa_session_ != nullptr) { //Util::Destructor(trace_.gpa_session_); delete trace_.gpa_session_; trace_.gpa_session_ = nullptr; } memset(&trace_, 0, sizeof(trace_)); } // ================================================================================================ // Returns true if the given device properties/settings support tracing. bool RgpCaptureMgr::GpuSupportsTracing( const Pal::DeviceProperties& props, const Settings& settings) { return props.gfxipProperties.flags.supportRgpTraces && !settings.rgpSqttForceDisable_; } // ================================================================================================ // Called when a new device is created. This will preallocate reusable RGP trace resources // for that device. void RgpCaptureMgr::PostDeviceCreate() { amd::ScopedLock traceLock(&trace_mutex_); auto* pDriverControlServer = dev_driver_server_->GetDriverControlServer(); assert(pDriverControlServer != nullptr); // If the driver hasn't been marked as fully initialized yet, mark it now. // We consider the time after the logical device creation to be the fully initialized driver // position. This is mainly because PAL is fully initialized at this point and we also know // whether or not the debug vmid has been acquired. External tools use this information to // decide when it's reasonable to make certain requests of the driver through protocol functions. if (pDriverControlServer->IsDriverInitialized() == false) { pDriverControlServer->FinishDriverInitialization(); } } // ================================================================================================ // Called prior to a device's being destroyed. This will free persistent RGP trace resources for // that device. void RgpCaptureMgr::PreDeviceDestroy() { amd::ScopedLock traceLock(&trace_mutex_); // If we are idle, we can re-initialize trace resources based on the new device. if (trace_.status_ == TraceStatus::Idle) { DestroyRGPTracing(); } } // ================================================================================================ // Sets up an Event marker's basic data. RgpSqttMarkerEvent RgpCaptureMgr::BuildEventMarker( const VirtualGPU* gpu, RgpSqttMarkerEventType api_type) const { RgpSqttMarkerEvent marker = {}; marker.identifier = RgpSqttMarkerIdentifierEvent; marker.apiType = static_cast(api_type); marker.cmdID = trace_.current_event_id_++; marker.cbID = gpu->queue(MainEngine).cmdBufId(); return marker; } // ================================================================================================ void RgpCaptureMgr::WriteMarker(const VirtualGPU* gpu, const void* data, size_t data_size) const { assert((data_size % sizeof(uint32_t)) == 0); assert((data_size / sizeof(uint32_t)) > 0); gpu->queue(MainEngine).iCmd()->CmdInsertRgpTraceMarker( static_cast(data_size / sizeof(uint32_t)), data); } // ================================================================================================ // Inserts an RGP pre-dispatch marker void RgpCaptureMgr::WriteEventWithDimsMarker( const VirtualGPU* gpu, RgpSqttMarkerEventType apiType, uint32_t x, uint32_t y, uint32_t z) const { assert(apiType != RgpSqttMarkerEventType::Invalid); RgpSqttMarkerEventWithDims eventWithDims = {}; eventWithDims.event = BuildEventMarker(gpu, apiType); eventWithDims.event.hasThreadDims = 1; eventWithDims.threadX = x; eventWithDims.threadY = y; eventWithDims.threadZ = z; WriteMarker(gpu, &eventWithDims, sizeof(eventWithDims)); } // ================================================================================================ void RgpCaptureMgr::WriteBarrierStartMarker( const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const { if (rgp_server_->TracesEnabled() && (trace_.status_ == TraceStatus::Running)) { amd::ScopedLock traceLock(&trace_mutex_); RgpSqttMarkerBarrierStart marker = {}; marker.identifier = RgpSqttMarkerIdentifierBarrierStart; marker.cbId = trace_.begin_queue_->queue(MainEngine).cmdBufId(); marker.dword02 = data.reason; marker.internal = true; WriteMarker(gpu, &marker, sizeof(marker)); } } // ================================================================================================ void RgpCaptureMgr::WriteBarrierEndMarker( const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const { if (rgp_server_->TracesEnabled() && (trace_.status_ == TraceStatus::Running)) { amd::ScopedLock traceLock(&trace_mutex_); // Copy the operations part and include the same data from previous markers // within the same barrier sequence to create a full picture of all cache // syncs and pipeline stalls. auto operations = data.operations; operations.pipelineStalls.u16All |= 0; operations.caches.u16All |= 0; RgpSqttMarkerBarrierEnd marker = {}; marker.identifier = RgpSqttMarkerIdentifierBarrierEnd; marker.cbId = trace_.begin_queue_->queue(MainEngine).cmdBufId(); marker.waitOnEopTs = operations.pipelineStalls.waitOnEopTsBottomOfPipe; marker.vsPartialFlush = operations.pipelineStalls.vsPartialFlush; marker.psPartialFlush = operations.pipelineStalls.psPartialFlush; marker.csPartialFlush = operations.pipelineStalls.csPartialFlush; marker.pfpSyncMe = operations.pipelineStalls.pfpSyncMe; marker.syncCpDma = operations.pipelineStalls.syncCpDma; marker.invalTcp = operations.caches.invalTcp; marker.invalSqI = operations.caches.invalSqI$; marker.invalSqK = operations.caches.invalSqK$; marker.flushTcc = operations.caches.flushTcc; marker.invalTcc = operations.caches.invalTcc; marker.flushCb = operations.caches.flushCb; marker.invalCb = operations.caches.invalCb; marker.flushDb = operations.caches.flushDb; marker.invalDb = operations.caches.invalDb; marker.numLayoutTransitions = 0; WriteMarker(gpu, &marker, sizeof(marker)); } } // ================================================================================================ // Inserts a user event string marker void RgpCaptureMgr::WriteUserEventMarker( const VirtualGPU* gpu, RgpSqttMarkerUserEventType eventType, const std::string& name) const { memset(user_event_, 0, sizeof(RgpSqttMarkerUserEventWithString)); user_event_->header.identifier = RgpSqttMarkerIdentifierUserEvent; user_event_->header.dataType = eventType; size_t markerSize = sizeof(user_event_->header); if ((eventType != RgpSqttMarkerUserEventPop)) { size_t strLength = std::min(name.size(), RgpSqttMaxUserEventStringLengthInDwords * sizeof(uint32_t)); for (uint32_t charIdx = 0; charIdx < strLength; ++charIdx) { uint32_t c = static_cast(name[charIdx]); user_event_->stringData[charIdx / 4] |= (c << (8 * (charIdx % 4))); user_event_->stringLength = static_cast(strLength); } // Every data type other than Pop includes a string length markerSize += sizeof(uint32_t); // Include string length (padded up to the nearest dword) markerSize += sizeof(uint32_t) * ((strLength + sizeof(uint32_t) - 1) / sizeof(uint32_t)); } WriteMarker(gpu, user_event_, markerSize); } }; // namespace vk