fa772be675
## Overview and rationale This reverts https://github.com/ROCm/rocm-systems/pull/1886, which... * Re-applies https://github.com/ROCm/rocm-systems/pull/1866 * Reverts https://github.com/ROCm/rocm-systems/pull/1728 (So it restores the [`amdgpu-windows-interop/`](https://github.com/ROCm/rocm-systems/tree/develop/shared/amdgpu-windows-interop) folder back to the state from a few weeks ago) The rationale for this change is at https://github.com/ROCm/rocm-systems/pull/1866: > Last PAL update broke applications on gfx12 Windows. ## Cross-repository change details That PR failed to build but was merged with this explanation: > TheRock CI Windows build fails as expected with this revert. > > References to these PAL members need to be stripped out in a patch on TheRock. > > ``` > 11.3 C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(152): error C2039: 'RegisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession' > 11.4 C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession' > 11.4 C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(195): error C2039: 'UnregisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession' > 11.4 C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession' > ``` The patch in TheRock was updated in https://github.com/ROCm/TheRock/pull/2154. This rolls forward by updating the ref for TheRock. That original PR could have been sequenced differently to avoid a build break - perhaps by * Pointing to a branch in TheRock with the patch rebased * Deleting the patch in the workflows here but holding a local copy of the path to be applied in workflows * Landing the patch as a normal commit instead of carrying it at all ## Test plan 1. Watch TheRock CI here (https://github.com/ROCm/rocm-systems/actions/runs/19447202693/job/55644411119?pr=1893) 2. Build locally: ```bash # In rocm-systems git am --whitespace=nowarn D:\projects\TheRock\patches\amd-mainline\rocm-systems\0001-Revert-SWDEV-543498-Some-compute-Ubertrace-profiles-.patch git am --whitespace=nowarn D:\projects\TheRock\patches\amd-mainline\rocm-systems\0003-Use-is_versioned-true-consistently-in-both-Comgr-Loa.patch git am --whitespace=nowarn D:\projects\TheRock\patches\amd-mainline\rocm-systems\0006-Explicitly-load-libamdhip64.so.7.patch # Note: the build fails with the observed errors if patch 0001 is not applied! # In TheRock cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=cl.exe -DCMAKE_CXX_COMPILER=cl.exe \ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DPython3_EXECUTABLE=d:/projects/TheRock/.venv/Scripts/python \ -DTHEROCK_ROCM_SYSTEMS_SOURCE_DIR=d:/projects/TheRock/../rocm-systems \ # IMPORTANT -DTHEROCK_AMDGPU_FAMILIES=gfx110X-all \ -DBUILD_TESTING=ON \ -DTHEROCK_ENABLE_ALL=ON \ -Damd-llvm_BUILD_TYPE=RelWithDebInfo \ -S D:/projects/TheRock \ -B D:/projects/TheRock/build \ -G Ninja cmake --build D:/projects/TheRock/build --target hip-clr # [build] Build finished with exit code 0 cmake --build D:/projects/TheRock/build --target ocl-clr+dist # [build] Build finished with exit code 0 ```
213 строки
8.7 KiB
C++
213 строки
8.7 KiB
C++
/*
|
|
***********************************************************************************************************************
|
|
*
|
|
* Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
**********************************************************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
#include "palGpaSession.h"
|
|
#include "palGpuUtil.h"
|
|
#include "palTraceSession.h"
|
|
#include "palVector.h"
|
|
#include "palHashSet.h"
|
|
#include "palMutex.h"
|
|
|
|
namespace Pal
|
|
{
|
|
class IPlatform;
|
|
class IDevice;
|
|
class IShaderLibrary;
|
|
} // namespace Pal
|
|
|
|
namespace GpuUtil
|
|
{
|
|
class GpaSession;
|
|
} // namespace GpuUtil
|
|
|
|
namespace GpuUtil
|
|
{
|
|
|
|
namespace TraceChunk
|
|
{
|
|
|
|
/// "CodeObject" RDF chunk identifier & version
|
|
constexpr char CodeObjectChunkId[TextIdentifierSize] = "CodeObject";
|
|
constexpr Pal::uint32 CodeObjectChunkVersion = 2;
|
|
|
|
/// Header for the "CodeObject" RDF chunk
|
|
struct CodeObjectHeader
|
|
{
|
|
Pal::uint32 pciId; /// The ID of the GPU the trace was run on
|
|
Pal::ShaderHash codeObjectHash; /// Hash of the Code Object binary
|
|
};
|
|
|
|
/// "COLoadEvent" RDF chunk identifier & version
|
|
constexpr char CodeObjectLoadEventChunkId[TextIdentifierSize] = "COLoadEvent";
|
|
constexpr Pal::uint32 CodeObjectLoadEventChunkVersion = 3;
|
|
|
|
struct CodeObjectLoadEventHeader
|
|
{
|
|
Pal::uint32 count; /// Number of load events in this chunk
|
|
};
|
|
|
|
/// Describes whether a load event was into GPU memory or from.
|
|
enum class CodeObjectLoadEventType : Pal::uint32
|
|
{
|
|
LoadToGpuMemory = 0, /// Code Object was loaded into GPU memory
|
|
UnloadFromGpuMemory = 1 /// Code Object was unloaded from GPU memory
|
|
};
|
|
|
|
/// Describes one or more GPU load/unload(s) of a Code Object. Payload for "COLoadEvent" RDF chunk.
|
|
struct CodeObjectLoadEvent
|
|
{
|
|
Pal::uint32 pciId; /// The ID of the GPU the trace was run on
|
|
CodeObjectLoadEventType eventType; /// Type of loader event
|
|
Pal::uint64 baseAddress; /// Base address where the Code Object was loaded
|
|
Pal::ShaderHash codeObjectHash; /// Hash of the (un)loaded Code Object binary
|
|
Pal::uint64 timestamp; /// CPU timestamp of this event being triggered
|
|
};
|
|
|
|
/// "PsoCorrelation" RDF chunk identifier & version
|
|
constexpr char PsoCorrelationChunkId[TextIdentifierSize] = "PsoCorrelation";
|
|
constexpr Pal::uint32 PsoCorrelationChunkVersion = 3;
|
|
|
|
struct PsoCorrelationHeader
|
|
{
|
|
Pal::uint32 count; /// Number of PSO correlations in this chunk
|
|
};
|
|
|
|
/// Payload for the "PsoCorrelation" RDF chunks
|
|
struct PsoCorrelation
|
|
{
|
|
Pal::uint32 pciId; /// The ID of the GPU the trace was run on
|
|
Pal::uint64 apiPsoHash; /// Hash of the API-level Pipeline State Object
|
|
Pal::PipelineHash internalPipelineHash; /// Hash of all inputs to the pipeline compiler
|
|
char apiLevelObjectName[64]; /// Debug object name (null-terminated)
|
|
};
|
|
|
|
/// "COCorrelation" RDF chunk identifier & version
|
|
constexpr char CodeObjectCorrelationChunkId[TextIdentifierSize] = "COCorrelation";
|
|
constexpr uint32_t CodeObjectCorrelationChunkVersion = 4;
|
|
|
|
struct CodeObjectCorrelationHeader
|
|
{
|
|
Pal::uint32 count; /// Number of Code Object Correlations in this chunk
|
|
};
|
|
|
|
/// Payload for the "CodeObjectCorrelation" RDF chunks
|
|
struct CodeObjectCorrelation
|
|
{
|
|
Pal::PipelineHash internalPipelineHash; /// Hash of all inputs to the pipeline compiler
|
|
Pal::ShaderHash codeObjectHash; /// Hash of the Code Object binary in the CO Database
|
|
Pal::uint32 containsMetadata : 1; /// 1 if the code object contains metadata, 0 otherwise
|
|
Pal::uint32 reserved : 31; /// Bitflags reserved for future use
|
|
};
|
|
|
|
} // namespace TraceChunk
|
|
|
|
/// CodeObject Trace Source name & version
|
|
constexpr char CodeObjectTraceSourceName[] = "codeobject";
|
|
constexpr Pal::uint32 CodeObjectTraceSourceVersion = 3;
|
|
|
|
// =====================================================================================================================
|
|
class CodeObjectTraceSource : public ITraceSource
|
|
{
|
|
public:
|
|
CodeObjectTraceSource(Pal::IPlatform* pPlatform);
|
|
~CodeObjectTraceSource();
|
|
|
|
// ==== TraceSource Native Functions ========================================================================== //
|
|
Pal::Result RegisterPipeline(const Pal::IPipeline* pPipeline, const RegisterPipelineInfo& clientInfo);
|
|
Pal::Result UnregisterPipeline(const Pal::IPipeline* pPipeline);
|
|
|
|
Pal::Result RegisterLibrary(const Pal::IShaderLibrary* pLibrary, const RegisterLibraryInfo& clientInfo);
|
|
Pal::Result UnregisterLibrary(const Pal::IShaderLibrary* pLibrary);
|
|
|
|
Pal::Result RegisterElfBinary(const ElfBinaryInfo& elfBinaryInfo);
|
|
Pal::Result UnregisterElfBinary(const ElfBinaryInfo& elfBinaryInfo);
|
|
|
|
// ==== Base Class Overrides =================================================================================== //
|
|
virtual void OnConfigUpdated(DevDriver::StructuredValue* pJsonConfig) override { }
|
|
|
|
virtual Pal::uint64 QueryGpuWorkMask() const override { return 0; }
|
|
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 908
|
|
virtual void OnTraceAccepted(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override { }
|
|
#else
|
|
virtual void OnTraceAccepted() override { }
|
|
#endif
|
|
virtual void OnTraceBegin(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override { }
|
|
virtual void OnTraceEnd(Pal::uint32 gpuIndex, Pal::ICmdBuffer* pCmdBuf) override { }
|
|
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 939
|
|
virtual void OnPostambleEnd(
|
|
Pal::uint32 gpuIndex,
|
|
Pal::ICmdBuffer* pCmdBuf) override { }
|
|
#endif
|
|
virtual void OnTraceFinished() override;
|
|
|
|
virtual const char* GetName() const override { return CodeObjectTraceSourceName; }
|
|
virtual Pal::uint32 GetVersion() const override { return CodeObjectTraceSourceVersion; }
|
|
|
|
private:
|
|
Pal::Result RegisterSinglePipeline(const Pal::IPipeline* pPipeline, const RegisterPipelineInfo& clientInfo);
|
|
Pal::Result UnregisterSinglePipeline(const Pal::IPipeline* pPipeline);
|
|
|
|
Pal::Result AddCodeObjectLoadEvent(
|
|
const Pal::IShaderLibrary* pLibrary,
|
|
TraceChunk::CodeObjectLoadEventType eventType);
|
|
Pal::Result AddCodeObjectLoadEvent(
|
|
const Pal::IPipeline* pLibrary,
|
|
TraceChunk::CodeObjectLoadEventType eventType);
|
|
Pal::Result AddCodeObjectLoadEvent(
|
|
const ElfBinaryInfo& elfBinaryInfo,
|
|
TraceChunk::CodeObjectLoadEventType eventType);
|
|
|
|
Pal::Result WriteCodeObjectChunks();
|
|
Pal::Result WriteLoaderEventsChunk();
|
|
Pal::Result WritePsoCorrelationChunk();
|
|
Pal::Result WriteCoCorrelationChunk();
|
|
|
|
struct CodeObjectDatabaseRecord
|
|
{
|
|
Pal::uint32 recordSize;
|
|
Pal::ShaderHash codeObjectHash;
|
|
};
|
|
|
|
Pal::IPlatform* const m_pPlatform;
|
|
|
|
Util::RWLock m_registerPipelineLock;
|
|
Util::Vector<CodeObjectDatabaseRecord*, 1, Pal::IPlatform> m_codeObjectRecords;
|
|
Util::Vector<TraceChunk::CodeObjectLoadEvent, 1, Pal::IPlatform> m_loadEventRecords;
|
|
Util::Vector<TraceChunk::PsoCorrelation, 1, Pal::IPlatform> m_psoCorrelationRecords;
|
|
Util::Vector<TraceChunk::CodeObjectCorrelation, 1, Pal::IPlatform> m_coCorrelationRecords;
|
|
|
|
// API hashes -> internal pipeline hash (-> child code object hashes)
|
|
Util::HashSet<Pal::uint64, Pal::IPlatform, Util::JenkinsHashFunc> m_registeredApiHashes;
|
|
Util::HashSet<Pal::uint64, Pal::IPlatform, Util::JenkinsHashFunc> m_registeredPipelines;
|
|
Util::HashSet<Pal::uint64, Pal::IPlatform, Util::JenkinsHashFunc> m_registeredCoHashes;
|
|
|
|
};
|
|
|
|
} // namespace GpuUtil
|
|
|