SWDEV-507967 - Deprecate gfx9, gfx8, gfx7 on Windows

PAL_CLIENT_INTERFACE_MAJOR_VERSION from 872 --> 910

Change-Id: I03dfa2924ccdae4c2f13f09d5f34ee58298e1343
This commit is contained in:
agunashe
2025-01-07 11:04:44 -08:00
committed by Todd tiantuo Li
parent 199e464402
commit ea804e16f8
10 changed files with 18 additions and 136 deletions
+1 -1
View File
@@ -20,7 +20,7 @@
set(PAL_CLIENT "OCL")
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 872)
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 910)
set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION 42)
set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0)
set(AMD_DK_ROOT $ENV{DK_ROOT})
+1 -3
View File
@@ -673,8 +673,6 @@ class Settings : public amd::HeapObject {
uint customHostAllocator_ : 1; //!< True if device has custom host allocator
// that replaces generic OS allocation routines
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program
uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device
uint useLightning_ : 1; //!< Enable LC path for this device
@@ -689,7 +687,7 @@ class Settings : public amd::HeapObject {
uint gwsInitSupported_:1; //!< Check if GWS is supported on this machine.
uint kernel_arg_opt_: 1; //!< Enables kernel arg optimization for blit kernels
uint kernel_arg_impl_ : 2; //!< Kernel argument implementation
uint reserved_ : 7;
uint reserved_ : 12;
};
uint value_;
};
+2 -6
View File
@@ -1926,12 +1926,8 @@ std::vector<std::string> Program::ProcessOptions(amd::option::Options* options)
// Set options for the standard device specific options
// All our devices support these options now
if (device().settings().reportFMAF_) {
optionsVec.push_back("-DFP_FAST_FMAF=1");
}
if (device().settings().reportFMA_) {
optionsVec.push_back("-DFP_FAST_FMA=1");
}
optionsVec.push_back("-DFP_FAST_FMAF=1");
optionsVec.push_back("-DFP_FAST_FMA=1");
} else {
if (!isHIP()) {
+5 -7
View File
@@ -1066,7 +1066,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
bool result = false;
amd::Image::Format newFormat(gpuMem(dstMemory).desc().format_);
bool swapLayer =
(dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
// Find unsupported formats
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1388,7 +1388,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
bool result = false;
amd::Image::Format newFormat(gpuMem(srcMemory).desc().format_);
bool swapLayer =
(srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
// Find unsupported formats
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1655,16 +1655,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
// Program source origin
int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0};
if ((gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
dev().settings().gfx10Plus_) {
if (gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
srcOrg[3] = 1;
}
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
// Program destinaiton origin
int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0};
if ((gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
dev().settings().gfx10Plus_) {
if (gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
dstOrg[3] = 1;
}
setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
@@ -2329,7 +2327,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
Memory* memView = &gpuMem(memory);
amd::Image::Format newFormat(gpuMem(memory).owner()->asImage()->getImageFormat());
bool swapLayer =
(memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
// Program the kernels workload depending on the fill dimensions
fillType = FillImage;
-21
View File
@@ -667,27 +667,6 @@ static constexpr std::array<std::pair<int, int>, 140> gfx10BlockIdPal = {{
void PerfCounter::convertInfo() {
switch (dev().ipLevel()) {
case Pal::GfxIpLevel::GfxIp7:
if (info_.blockIndex_ < ciBlockIdOrcaToPal.size()) {
auto p = ciBlockIdOrcaToPal[info_.blockIndex_];
info_.blockIndex_ = std::get<0>(p);
info_.counterIndex_ = std::get<1>(p);
}
break;
case Pal::GfxIpLevel::GfxIp8:
if (info_.blockIndex_ < viBlockIdOrcaToPal.size()) {
auto p = viBlockIdOrcaToPal[info_.blockIndex_];
info_.blockIndex_ = std::get<0>(p);
info_.counterIndex_ = std::get<1>(p);
}
break;
case Pal::GfxIpLevel::GfxIp9:
if (info_.blockIndex_ < gfx9BlockIdPal.size()) {
auto p = gfx9BlockIdPal[info_.blockIndex_];
info_.blockIndex_ = std::get<0>(p);
info_.counterIndex_ = std::get<1>(p);
}
break;
case Pal::GfxIpLevel::GfxIp10_1:
case Pal::GfxIpLevel::GfxIp10_3:
case Pal::GfxIpLevel::GfxIp11_0:
+2 -20
View File
@@ -84,21 +84,6 @@ struct PalDevice {
static constexpr PalDevice supportedPalDevices[] = {
// GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo},
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol},
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland},
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga)
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji},
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa
{8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney},
{9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10},
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven},
{9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12},
{9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20},
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2},
{9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir},
{10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10},
{10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12},
{10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14},
@@ -391,10 +376,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
if (settings().checkExtension(ClKhrFp64)) {
info_.doubleFPConfig_ = info_.singleFPConfig_ | CL_FP_DENORM;
}
if (settings().reportFMA_) {
info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
}
info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
if (settings().checkExtension(ClKhrFp16)) {
info_.halfFPConfig_ = info_.singleFPConfig_;
@@ -587,7 +569,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
if (settings().svmFineGrainSystem_) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
}
if (amd::IS_HIP && ipLevel_ >= Pal::GfxIpLevel::GfxIp9) {
if (amd::IS_HIP) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
+3 -8
View File
@@ -1043,12 +1043,7 @@ bool Resource::CreateInterop(CreateParams* params) {
//! and OGL decompresses 24bit DEPTH into D24S8 for OGL compatibility
if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
(desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
if (dev().settings().gfx10Plus_) {
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
} else {
hwState_[1] &= ~0x3c000000;
hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
}
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
}
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
@@ -1253,7 +1248,7 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
// Force remote allocation if it was requested in the settings
if (dev().settings().remoteAlloc_ && ((memoryType() == Local) || (memoryType() == Persistent))) {
if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
if (dev().settings().apuSystem_) {
desc_.type_ = Remote;
} else {
desc_.type_ = RemoteUSWC;
@@ -1512,7 +1507,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
// Make sure linear pitch in bytes is 4 bytes aligned
if (((gpuMemoryRowPitch % 4) != 0) ||
// another DRM restriciton... SI has 4 pixels
(gpuMemoryOffset % 4 != 0) || (dev().settings().sdamPageFaultWar_ && (imageOffsetx != 0))) {
(gpuMemoryOffset % 4 != 0) || (imageOffsetx != 0)) {
return false;
}
}
+1 -63
View File
@@ -67,10 +67,6 @@ Settings::Settings() {
pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE)
? defaultMinXferSize * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi;
// Disable FP_FAST_FMA defines by default
reportFMAF_ = false;
reportFMA_ = false;
// GPU device by default
apuSystem_ = false;
@@ -93,11 +89,6 @@ Settings::Settings() {
// Use image DMA if requested
imageDMA_ = GPU_IMAGE_DMA;
// Disable ASIC specific features by default
viPlus_ = false;
aiPlus_ = false;
gfx10Plus_ = false;
// Number of compute rings.
numComputeRings_ = 0;
@@ -116,9 +107,6 @@ Settings::Settings() {
// Don't support Denormals for single precision by default
singleFpDenorm_ = false;
// Disable SDMA workaround by default
sdamPageFaultWar_ = false;
// SQTT buffer size in bytes
rgpSqttDispCount_ = PAL_RGP_DISP_COUNT;
rgpSqttWaitIdle_ = true;
@@ -201,8 +189,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
case Pal::AsicRevision::Navi14:
case Pal::AsicRevision::Navi12:
case Pal::AsicRevision::Navi10:
case Pal::AsicRevision::Navi10_A0:
gfx10Plus_ = true;
useLightning_ = GPU_ENABLE_LC;
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
if (useLightning_) {
@@ -219,51 +205,14 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
// GFX10.1 HW doesn't support custom pitch. Enable double copy workaround
imageBufferWar_ = GPU_IMAGE_BUFFER_WAR;
}
// Fall through to AI (gfx9) ...
case Pal::AsicRevision::Vega20:
// Enable HW P2P path for Vega20+. Runtime still relies on KMD/PAL for support
enableHwP2P_ = true;
case Pal::AsicRevision::Vega12:
case Pal::AsicRevision::Vega10:
case Pal::AsicRevision::Raven:
case Pal::AsicRevision::Raven2:
case Pal::AsicRevision::Renoir:
aiPlus_ = true;
enableCoopGroups_ = IS_LINUX;
enableCoopMultiDeviceGroups_ = IS_LINUX;
if (useLightning_) {
singleFpDenorm_ = true;
}
// Fall through to VI ...
case Pal::AsicRevision::Carrizo:
case Pal::AsicRevision::Bristol:
case Pal::AsicRevision::Stoney:
case Pal::AsicRevision::Iceland:
case Pal::AsicRevision::Tonga:
case Pal::AsicRevision::Fiji:
case Pal::AsicRevision::Polaris10:
case Pal::AsicRevision::Polaris11:
case Pal::AsicRevision::Polaris12:
// Keep this false even though we have support
// singleFpDenorm_ = true;
viPlus_ = true;
// SDMA may have memory access outside of
// the valid buffer range and cause a page fault
sdamPageFaultWar_ = true;
enableExtension(ClKhrFp16);
// Fall through to CI ...
case Pal::AsicRevision::Kalindi:
case Pal::AsicRevision::Godavari:
case Pal::AsicRevision::Spectre:
case Pal::AsicRevision::Spooky:
case Pal::AsicRevision::Bonaire:
case Pal::AsicRevision::Hawaii:
case Pal::AsicRevision::HawaiiPro:
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
reportFMAF_ = false;
if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) {
reportFMAF_ = true;
}
// Cache line size is 64 bytes
cacheLineSize_ = 64;
// L1 cache size is 16KB
@@ -293,13 +242,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
} else {
maxAllocSize_ = 3ULL * Gi;
}
// Note: More than 4 command buffers may cause a HW hang
// with HWSC on pre-gfx9 devices in OCLPerfKernelArguments
if (!aiPlus_) {
maxCmdBuffers_ = 4;
}
supportRA_ = false;
numMemDependencies_ = GPU_NUM_MEM_DEPENDENCY;
break;
@@ -345,7 +287,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
if (hwLDSSize_ == 0) {
// Use hardcoded values for now, since PAL properties aren't available with offline devices
hwLDSSize_ = (IS_LINUX || amd::IS_HIP || gfx10Plus_) ? 64 * Ki: 32 * Ki;
hwLDSSize_ = (IS_LINUX || amd::IS_HIP) ? 64 * Ki: 32 * Ki;
}
imageSupport_ = true;
@@ -357,10 +299,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
// HW doesn't support untiled image writes
// hostMemDirectAccess_ |= HostMemImage;
// Report FP_FAST_FMA define if double precision HW
reportFMA_ = true;
reportFMAF_ = true;
if (doublePrecision_) {
// Enable KHR double precision extension
enableExtension(ClKhrFp64);
+1 -5
View File
@@ -65,21 +65,17 @@ class Settings : public device::Settings {
uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
uint imageDMA_ : 1; //!< Enable direct image DMA transfers
uint viPlus_ : 1; //!< VI and post VI features
uint aiPlus_ : 1; //!< AI and post AI features
uint gfx10Plus_ : 1; //!< gfx10 and post gfx10 features
uint threadTraceEnable_ : 1; //!< Thread trace enable
uint svmAtomics_ : 1; //!< SVM device atomics
uint svmFineGrainSystem_ : 1; //!< SVM fine grain system support
uint useDeviceQueue_ : 1; //!< Submit to separate device queue
uint sdamPageFaultWar_ : 1; //!< SDMA page fault workaround
uint rgpSqttWaitIdle_ : 1; //!< Wait for idle after SQTT trace
uint rgpSqttForceDisable_ : 1; //!< Disables SQTT
uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing
uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10
uint disableSdma_ : 1; //!< Disable SDMA support
uint alwaysResident_ : 1; //!< Make resources resident at allocation time
uint reserved_ : 10;
uint reserved_ : 13;
};
uint value_;
};
+2 -2
View File
@@ -36,7 +36,7 @@ TimeStamp::~TimeStamp() {}
void TimeStamp::begin() {
if (!flags_.beginIssued_) {
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_,
memOffset_ + CommandStartTime * sizeof(uint64_t));
flags_.beginIssued_ = true;
}
@@ -44,7 +44,7 @@ void TimeStamp::begin() {
void TimeStamp::end() {
CondLog(!flags_.beginIssued_, "We didn't issue a begin operation!");
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_,
memOffset_ + CommandEndTime * sizeof(uint64_t));
flags_.endIssued_ = true;
}