0633d8d8ce
Reverts ROCm/rocm-systems#1866 (re-landing https://github.com/ROCm/rocm-systems/pull/1728) This broke Windows builds at https://github.com/ROCm/rocm-systems/actions/workflows/therock-ci.yml?query=branch%3Adevelop+event%3Apush, I think intentionally? We need a plan for rolling out such changes without build breaks. Sample logs: https://github.com/ROCm/rocm-systems/actions/runs/19371422209/job/55428130376#step:14:6597 ``` [ocl-clr] [134/153] Building CXX object rocclr\CMakeFiles\rocclr.dir\device\pal\palubercapturemgr.cpp.obj [ocl-clr] FAILED: rocclr/CMakeFiles/rocclr.dir/device/pal/palubercapturemgr.cpp.obj [ocl-clr] ccache "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\bin\Hostx64\x64\cl.exe" /nologo /TP -DATI_OS_WIN -DCL_TARGET_OPENCL_VERSION=220 -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DCOMGR_DYN_DLL -DGPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION=42 -DHAVE_CL2_HPP -DLITTLEENDIAN_CPU -DOPENCL_C_MAJOR=2 -DOPENCL_C_MINOR=0 -DOPENCL_MAJOR=2 -DOPENCL_MINOR=1 -DPAL_BUILD_RDF=1 -DPAL_CLIENT_INTERFACE_MAJOR_VERSION=932 -DPAL_DEVELOPER_BUILD=0 -DPAL_GPUOPEN_OCL -DPAL_KMT_BUILD=1 -DROCCLR_VERSION_GITHASH=\"38294ab\" -DWITH_PAL_DEVICE -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib\include -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib\backends\common -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\elf -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\include -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\..\..\amdocl -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\core -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\util -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\devdriver\shared\legacy\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\devdriver\third_party\dd_crc32\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\metrohash\src -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\loader -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\common -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\common\win32 -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\libelf -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode\..\..\include -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode\..\..\hsail-tools\libHSAIL -external:IB:\build\compiler\amd-comgr\dist\include -external:W0 /DWIN32 /D_WINDOWS /EHsc /DWIN32 /D_WINDOWS /EHsc /O2 /Ob2 /DNDEBUG -std:c++20 -MD /wd4267 /wd4244 /wd4996 /MT /showIncludes /Forocclr\CMakeFiles\rocclr.dir\device\pal\palubercapturemgr.cpp.obj /Fdrocclr\CMakeFiles\rocclr.dir\rocclr.pdb /FS -c C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp [ocl-clr] cl : Command line warning D9025 : overriding '/MD' with '/MT' [ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(152): error C2039: 'RegisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession' [ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession' [ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(195): error C2039: 'UnregisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession' [ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession' [ocl-clr] [135/153] Building CXX object rocclr\CMakeFiles\rocclr.dir\device\pal\paldevicegl.cpp.obj ```
156 строки
7.2 KiB
C++
156 строки
7.2 KiB
C++
/*
|
|
***********************************************************************************************************************
|
|
*
|
|
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
**********************************************************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
#include "palTraceSession.h"
|
|
|
|
namespace Pal
|
|
{
|
|
class IPlatform;
|
|
class IQueue;
|
|
class ICmdBuffer;
|
|
class Device;
|
|
}
|
|
|
|
namespace GpuUtil
|
|
{
|
|
|
|
/// Supported render operations used to advance the trace
|
|
enum RenderOp : Pal::uint8
|
|
{
|
|
RenderOpDraw = (1u << 0),
|
|
RenderOpDispatch = (1u << 1)
|
|
};
|
|
|
|
/// Structure used to batch submit render operations on queue submission
|
|
/// This struct should have a `*Count` field for each @ref RenderOp enumeration above
|
|
struct RenderOpCounts
|
|
{
|
|
Pal::uint32 drawCount;
|
|
Pal::uint32 dispatchCount;
|
|
};
|
|
|
|
constexpr Pal::uint32 RenderOpTraceControllerVersion = 4;
|
|
constexpr char RenderOpTraceControllerName[] = "renderop";
|
|
|
|
// =====================================================================================================================
|
|
class RenderOpTraceController : public ITraceController
|
|
{
|
|
public:
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 896
|
|
using RenderOp = GpuUtil::RenderOp;
|
|
#endif
|
|
RenderOpTraceController(Pal::IPlatform* pPlatform, Pal::IDevice* pDevice);
|
|
virtual ~RenderOpTraceController();
|
|
|
|
virtual const char* GetName() const override { return RenderOpTraceControllerName; }
|
|
virtual Pal::uint32 GetVersion() const override { return RenderOpTraceControllerVersion; }
|
|
|
|
virtual void OnConfigUpdated(DevDriver::StructuredValue* pJsonConfig) override;
|
|
virtual Pal::Result OnTraceRequested() override;
|
|
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 908
|
|
virtual Pal::Result OnPreparationGpuWork(Pal::uint32 gpuIndex, Pal::ICmdBuffer** ppCmdBuf) override;
|
|
#endif
|
|
virtual Pal::Result OnBeginGpuWork(Pal::uint32 gpuIndex, Pal::ICmdBuffer** ppCmdBuffer) override;
|
|
virtual Pal::Result OnEndGpuWork(Pal::uint32 gpuIndex, Pal::ICmdBuffer** ppCmdBuffer) override;
|
|
virtual Pal::Result OnEndPostambleGpuWork(
|
|
Pal::uint32 gpuIndex,
|
|
Pal::ICmdBuffer** ppCmdBuffer) override;
|
|
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 896
|
|
void RecordRenderOp(Pal::IQueue* pQueue, RenderOp renderOp);
|
|
#endif
|
|
|
|
void FinishTrace();
|
|
|
|
// Cancel the trace currently in progress.
|
|
virtual Pal::Result OnTraceCanceled() override;
|
|
|
|
/// This function must be called by client drivers implementing the RenderOp controller.
|
|
/// On every queue submission, this function is called with the cumulative counts of render operations
|
|
/// recorded into that queue's command buffers.
|
|
/// Based on the controller's internal mask, set by the user during trace configuration,
|
|
/// the trace controller may advance its state.
|
|
void RecordRenderOps(Pal::IQueue* pQueue, const RenderOpCounts& renderOpCounts);
|
|
|
|
// Force a controller update
|
|
virtual void OnUpdated() override { OnRenderOpUpdated(0); }
|
|
|
|
virtual Pal::IQueue* GetTraceQueue() const override { return m_pQueue; }
|
|
|
|
private:
|
|
/// Controls whether the trace proceeds on absolute render op counts or relative
|
|
enum class CaptureMode : Pal::uint8
|
|
{
|
|
Relative = 0, ///< Relative to when the trace request is received
|
|
Absolute ///< Absolute render op index
|
|
};
|
|
|
|
Pal::Result AcceptTrace();
|
|
Pal::Result BeginTrace();
|
|
|
|
Pal::Result SubmitBeginTraceGpuWork() const;
|
|
Pal::Result SubmitEndTraceGpuWork();
|
|
Pal::Result SubmitEndPostambleGpuWork();
|
|
|
|
Pal::Result WaitForTraceEndGpuWorkCompletion() const;
|
|
Pal::Result CreateFence(Pal::IFence** ppFence) const;
|
|
Pal::Result CreateCommandBuffer(bool traceEnd, Pal::ICmdBuffer** ppCmdBuf) const;
|
|
Pal::Result CreateCmdAllocator();
|
|
|
|
void OnRenderOpUpdated(Pal::uint64 countRecorded);
|
|
void FreeResources();
|
|
void AbortTrace();
|
|
|
|
Pal::IPlatform* const m_pPlatform; // Platform associated with this TraceController
|
|
Pal::IDevice* m_pDevice; // Device associated with this TraceController
|
|
Pal::ICmdAllocator* m_pCmdAllocator; // Command allocator for the TraceController
|
|
|
|
TraceSession* m_pTraceSession; // TraceSession owning this TraceController
|
|
Pal::uint64 m_supportedGpuMask; // Bit mask of GPU indices that are capable of participating in the trace
|
|
Pal::uint8 m_renderOpMask; // Bitmask of RenderOp modes, indicating which are accepted
|
|
CaptureMode m_captureMode; // Modality for determining the starting renderop index of the trace
|
|
Pal::uint64 m_renderOpCount; // The "global" count, incremented on every render op
|
|
Pal::uint64 m_prepStartRenderOp; // Relative or absolute render op number indicating trace begin
|
|
Pal::uint64 m_numPrepRenderOps; // Number of "warm-up" frames before the start frame
|
|
Pal::uint64 m_captureRenderOpCount; // Number of frames to wait before ending the trace
|
|
Pal::uint64 m_renderOpTraceAccepted; // The frame number when the trace was accepted
|
|
|
|
Util::Mutex m_renderOpLock; // Lock over UpdateFrame/OnFrameUpdated
|
|
Pal::IQueue* m_pQueue; // The queue being used to submit Begin/End GPU trace command buffers
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 908
|
|
Pal::ICmdBuffer* m_pCmdBufTracePrepare; // Command buffer for recording during the prep phase
|
|
#endif
|
|
Pal::ICmdBuffer* m_pCmdBufTraceBegin; // Command buffer to submit Trace Begin
|
|
Pal::ICmdBuffer* m_pCmdBufTraceEnd; // Command buffer to submit Trace End
|
|
Pal::ICmdBuffer* m_pCmdBufPostambleEnd; // Command buffer to submit Postamble End
|
|
Pal::IFence* m_pFenceTraceEnd; // Fence to wait for Trace End command buffer completion
|
|
Pal::IFence* m_pFencePostambleEnd; // Fence to wait for Postamble End command buffer completion
|
|
};
|
|
|
|
} // namespace GpuUtil
|