Files
rocm-systems/shared/amdgpu-windows-interop/pal/inc/util/palBuddyAllocator.h
T
Scott Todd 0633d8d8ce Revert "Revert "Update amdgpu-windows-interop with latest changes 20251105 (#…" (#1886)
Reverts ROCm/rocm-systems#1866 (re-landing https://github.com/ROCm/rocm-systems/pull/1728)

This broke Windows builds at https://github.com/ROCm/rocm-systems/actions/workflows/therock-ci.yml?query=branch%3Adevelop+event%3Apush, I think intentionally? We need a plan for rolling out such changes without build breaks.

Sample logs: https://github.com/ROCm/rocm-systems/actions/runs/19371422209/job/55428130376#step:14:6597
```
[ocl-clr] [134/153] Building CXX object rocclr\CMakeFiles\rocclr.dir\device\pal\palubercapturemgr.cpp.obj
[ocl-clr] FAILED: rocclr/CMakeFiles/rocclr.dir/device/pal/palubercapturemgr.cpp.obj 
[ocl-clr] ccache "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\bin\Hostx64\x64\cl.exe"  /nologo /TP -DATI_OS_WIN -DCL_TARGET_OPENCL_VERSION=220 -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DCOMGR_DYN_DLL -DGPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION=42 -DHAVE_CL2_HPP -DLITTLEENDIAN_CPU -DOPENCL_C_MAJOR=2 -DOPENCL_C_MINOR=0 -DOPENCL_MAJOR=2 -DOPENCL_MINOR=1 -DPAL_BUILD_RDF=1 -DPAL_CLIENT_INTERFACE_MAJOR_VERSION=932 -DPAL_DEVELOPER_BUILD=0 -DPAL_GPUOPEN_OCL -DPAL_KMT_BUILD=1 -DROCCLR_VERSION_GITHASH=\"38294ab\" -DWITH_PAL_DEVICE -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib\include -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\compiler\lib\backends\common -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\elf -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\include -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\..\.. -IC:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\opencl\khronos\headers\opencl2.2\CL\..\..\..\..\amdocl -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\core -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\util -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\devdriver\shared\legacy\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\devdriver\third_party\dd_crc32\inc -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\shared\metrohash\src -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\loader -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\common -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\common\win32 -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\hsail-compiler\lib\loaders\elf\utils\libelf\..\..\..\..\..\lib\loaders\elf\utils\libelf -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode\..\..\include -IC:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\sc\HSAIL\ext\libamdhsacode\..\..\hsail-tools\libHSAIL -external:IB:\build\compiler\amd-comgr\dist\include -external:W0 /DWIN32 /D_WINDOWS /EHsc /DWIN32 /D_WINDOWS  /EHsc /O2 /Ob2 /DNDEBUG -std:c++20 -MD /wd4267 /wd4244 /wd4996 /MT /showIncludes /Forocclr\CMakeFiles\rocclr.dir\device\pal\palubercapturemgr.cpp.obj /Fdrocclr\CMakeFiles\rocclr.dir\rocclr.pdb /FS -c C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp
[ocl-clr] cl : Command line warning D9025 : overriding '/MD' with '/MT'
[ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(152): error C2039: 'RegisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession'
[ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession'
[ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\projects\clr\rocclr\device\pal\palubercapturemgr.cpp(195): error C2039: 'UnregisterTraceStateChangeCallback': is not a member of 'GpuUtil::TraceSession'
[ocl-clr] C:\home\runner\_work\rocm-systems\rocm-systems\shared\amdgpu-windows-interop\pal\inc\gpuUtil\palTraceSession.h(372): note: see declaration of 'GpuUtil::TraceSession'
[ocl-clr] [135/153] Building CXX object rocclr\CMakeFiles\rocclr.dir\device\pal\paldevicegl.cpp.obj
```
2025-11-17 14:27:09 -08:00

203 řádky
9.4 KiB
C++

/*
***********************************************************************************************************************
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palBuddyAllocator.h
* @brief PAL utility BuddyAllocator class declaration.
***********************************************************************************************************************
*/
#pragma once
#include "palUtil.h"
#include "palHashSet.h"
#include "palHashMap.h"
#include "palMutex.h"
namespace Util
{
/**
***********************************************************************************************************************
* @brief Buddy Allocator
*
* Responsible for managing small GPU memory requests by allocating a large base allocation and dividing it into
* appropriately sized suballocation blocks.
***********************************************************************************************************************
*/
template <typename Allocator>
class BuddyAllocator
{
public:
/// Constructor.
///
/// @param [in] pAllocator The allocator that will allocate memory if required.
/// @param [in] baseAllocSize The size of the base allocation this buddy allocator suballocates.
/// @param [in] minAllocSize The size of the smallest block this buddy allocator can allocate.
BuddyAllocator(
Allocator* pAllocator,
gpusize baseAllocSize,
gpusize minAllocSize);
~BuddyAllocator();
/// Initializes the buddy allocator.
///
/// @returns Success if the buddy allocator has been successfully initialized.
Result Init();
/// Suballocates a block from the base allocation that this buddy allocator manages. Expects @ref ClaimGpuMemory to
/// be called directly before it. If a memory manager with multiple buddyAllocators is used, use pattern should
/// be: Iterate through buddyAllocators calling ClaimGpuMemory, if one returns @ref Success break out of the loop,
/// then call Allocate on that buddyAllocator. If none return @ref Success, then a new buddyAllocator needs to be
/// created. The purpose of splitting up buddyAllocator selection and Allocation is to reduce lock contention in
/// multithreaded memory managers.
///
/// @param [in] size The size of the requested suballocation.
/// @param [in] alignment The alignment requirements of the requested suballocation.
/// @param [out] pOffset The offset the suballocated block starts within the base allocation.
///
/// @returns Success if the allocation succeeded, @ref ErrorOutOfMemory if there isn't enough system memory to
/// fulfill the request, or @ref ErrorOutOfGpuMemory if there isn't a large enough block free in the
/// base allocation to fulfill the request.
///
/// @warning Unless @ref ClaimGpuMemory is called before every single call, the results of @ref Allocate will
/// be invalid. If @ref ClaimGpuMemory returns @ref Success, then @ref ErrorOutOfGpuMemory will never be
/// returned.
Result Allocate(
gpusize size,
gpusize alignment,
gpusize* pOffset);
/// Frees a previously allocated suballocation.
///
/// @param [in] offset The offset the suballocated block starts within the base allocation.
/// @param [in] size Optional parameter specifying the size of the original allocation.
/// @param [in] alignment Optional parameter specifying the alignment of the original allocation.
void Free(
gpusize offset,
gpusize size = 0,
gpusize alignment = 0);
/// Tells whether the base allocation is completely free. If the returned value is true then the caller is safe
/// to deallocate the base allocation.
bool IsEmpty() const
{
return (m_numSuballocations == 0);
}
/// Returns the size of the largest allocation that can be suballocated with this buddy allocator.
gpusize MaximumAllocationSize() const;
/// Claims (doesn't allocate) some memory, used to quickly determine if a pool of memory has availible memory.
/// Doesn't affect internal state unless Result::Success is returned
///
/// @param [in] size The size of the requested suballocation.
/// @param [in] alignment The alignment requirements of the requested suballocation.
///
/// @returns Success if there is enough memory in this buddyAllocator to allocate the requested size of memory,
/// @ref ErrorOutOfGpuMemory if there is not enough memory
///
/// @warning Unless this is called to test availible memory before every call to Allocate, then the results will not
/// be valid.
Result ClaimGpuMemory(
gpusize size,
gpusize alignment);
/// Checks if @ref ClaimGpuMemory can actually claim memory, can be used to find the best fit pool. This function
/// does NOT acquire a lock on the structures ClaimGpuMemory uses, and does NOT claim or allocate the memory.
///
/// @param [in] size The size of the requested suballocation.
/// @param [in] alignment The alignment requirements of the requested suballocation.
/// @param [out] pKval The highest kval that will need to be split will be stored here.
///
/// @returns Success if there is enough memory in this buddyAllocator to allocate the requested size of memory,
/// @ref ErrorOutOfGpuMemory if there is not enough memory
///
Result CheckIfOpenMemory(
gpusize size,
gpusize alignment,
uint32* pKval);
private:
typedef Util::HashSet<gpusize, Allocator, JenkinsHashFunc> FreeSet;
typedef Util::HashMap<gpusize, uint32, Allocator, JenkinsHashFunc> UsedMap;
Result GetNextFreeBlock(
uint32 kval,
gpusize* pOffset);
Result FreeBlock(gpusize offset);
static constexpr gpusize KvalToSize(uint32 kVal) { return (1ull << kVal); }
static uint32 SizeToKval(gpusize size) { return Log2(size); }
Allocator* const m_pAllocator;
const uint32 m_baseAllocKval;
const uint32 m_minKval;
// Array of hashSets of blocks that are free at each level
FreeSet* m_pFreeBlockSets;
// Hashmap of blocks that are used, key=offset, value=level (kval)
UsedMap* m_pUsedBlockMap;
// List of the free memory at each level
uint32* m_pNumFreeList;
// The highest Kval that has at least 1 free block (used in ClaimGpuMemory)
uint32 m_highestFreeKval;
uint32 m_numSuballocations;
// mutex on altering the numFreeList
Util::Mutex m_numFreeMutex;
// mutex on the used block map
Util::Mutex m_usedBlockMapMutex;
// array of mutexes, one for each freeBlockSet
Util::Mutex* m_pFreeSetMutexes;
// mutex on the freeing. Serialize freeing blocks and don't allow allocating blocks while one is freeing. Based on
// testing, applications typically don't try to free and allocate memory at the same time, and almost all of the
// memory freeing is done at the end of the application.
Util::RWLock m_freeLock;
// Set to true if ClaimGpuMemory is ever called on this buddyAllocator. This signals to free to not merge blocks
// if m_pNumFreeList[kval - m_minKval] = 0
bool m_usedClaim;
// HashSet and HashMap utility functions
Result InsertToFreeSet(gpusize offset, uint32 kval);
bool GetKvalUsed(gpusize offset, uint32* pKval);
Result SetKvalUsed(gpusize offset, uint32 kval);
Result PopFromFreeSet(gpusize* pOffset, uint32 kval);
bool IsOffsetFree(gpusize offset, uint32 kval);
Result RemoveOffsetFromFreeSet(gpusize offset, uint32 kval);
Result RemoveOffsetFromUsedMap(gpusize offset);
PAL_DISALLOW_COPY_AND_ASSIGN(BuddyAllocator);
PAL_DISALLOW_DEFAULT_CTOR(BuddyAllocator);
};
} // Util