Files
rocm-systems/projects/clr/rocclr/device/pal/palsettings.hpp
T
2025-09-02 15:05:18 +01:00

142 rader
6.2 KiB
C++

/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "top.hpp"
#include "library.hpp"
#include "palDevice.h"
/*! \addtogroup pal PAL Resource Implementation
* @{
*/
//! PAL Device Implementation
namespace amd::pal {
//! Device settings
class Settings : public device::Settings {
public:
//! Debug GPU flags
enum DebugGpuFlags {
CheckForILSource = 0x00000001,
StubCLPrograms = 0x00000002, //!< Enables OpenCL programs stubbing
LockGlobalMemory = 0x00000004,
};
enum BlitEngineType {
BlitEngineDefault = 0x00000000,
BlitEngineHost = 0x00000001,
BlitEngineCAL = 0x00000002,
BlitEngineKernel = 0x00000003,
};
enum HostMemFlags {
HostMemDisable = 0x00000000,
HostMemBuffer = 0x00000001,
HostMemImage = 0x00000002,
};
union {
struct {
uint remoteAlloc_ : 1; //!< Allocate remote memory for the heap
uint stagedXferRead_ : 1; //!< Uses a staged buffer read
uint stagedXferWrite_ : 1; //!< Uses a staged buffer write
uint disablePersistent_ : 1; //!< Disables using persistent memory for staging
uint imageSupport_ : 1; //!< Report images support
uint doublePrecision_ : 1; //!< Enables double precision support
uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
uint imageDMA_ : 1; //!< Enable direct image DMA transfers
uint threadTraceEnable_ : 1; //!< Thread trace enable
uint svmAtomics_ : 1; //!< SVM device atomics
uint svmFineGrainSystem_ : 1; //!< SVM fine grain system support
uint useDeviceQueue_ : 1; //!< Submit to separate device queue
uint rgpSqttWaitIdle_ : 1; //!< Wait for idle after SQTT trace
uint rgpSqttForceDisable_ : 1; //!< Disables SQTT
uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing
uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10
uint disableSdma_ : 1; //!< Disable SDMA support
uint alwaysResident_ : 1; //!< Make resources resident at allocation time
uint reserved_ : 13;
};
uint value_;
};
uint oclVersion_; //!< Reported OpenCL version support
uint debugFlags_; //!< Debug GPU flags
uint hwLDSSize_; //!< HW local data store size
uint maxWorkGroupSize_; //!< Requested workgroup size for this device
uint preferredWorkGroupSize_; //!< Requested preferred workgroup size for this device
uint blitEngine_; //!< Blit engine type
uint cacheLineSize_; //!< Cache line size in bytes
uint cacheSize_; //!< L1 cache size in bytes
uint numComputeRings_; //!< 0 - disabled, 1 , 2,.. - the number of compute rings
uint numDeviceEvents_; //!< The number of device events
uint numWaitEvents_; //!< The number of wait events for device enqueue
uint hostMemDirectAccess_; //!< Enables direct access to the host memory
uint numScratchWavesPerCu_; //!< Maximum number of waves when scratch is enabled
size_t xferBufSize_; //!< Transfer buffer size for image copy optimization
size_t pinnedXferSize_; //!< Pinned buffer size for transfer
size_t pinnedMinXferSize_; //!< Minimal buffer size for pinned transfer
size_t cpDmaCopySizeMax_; //!< Threshold for CP DMA path in copy
size_t resourceCacheSize_; //!< Resource cache size in MB
size_t numMemDependencies_; //!< The array size for memory dependencies tracking
uint64_t maxAllocSize_; //!< Maximum single allocation size
uint rgpSqttDispCount_; //!< The number of dispatches captured in SQTT
uint maxCmdBuffers_; //!< Maximum number of command buffers allocated per queue
uint mallPolicy_; //!< 0 - default, 1 - always bypass, 2 - always put
uint64_t subAllocationMinSize_; //!< Minimum size allowed for suballocations
uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations
uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
size_t prepinnedMinSize_; //!< minimal memory size for prepinned transfer
uint32_t limit_blit_wg_; //!< The number of workgroups for blit execution
//! Default constructor
Settings();
//! Creates settings
bool create(const Pal::DeviceProperties& palProp, //!< PAL device properties
const Pal::GpuMemoryHeapProperties* heaps, //!< PAL heap settings
const Pal::WorkStationCaps& wscaps, //!< PAL workstation settings
const amd::Isa& isa, //!< XNACK is enabled on this device
bool reportAsOCL12Device = false //!< Report As OpenCL1.2 Device
);
private:
//! Disable copy constructor
Settings(const Settings&);
//! Disable assignment
Settings& operator=(const Settings&);
//! Overrides current settings based on registry/environment
void override();
using KernelArgImpl = device::Settings::KernelArgImpl;
};
/*@}*/ // namespace amd::pal
} // namespace amd::pal