SWDEV-507967 - Deprecate gfx9, gfx8, gfx7 on Windows
PAL_CLIENT_INTERFACE_MAJOR_VERSION from 872 --> 910 Change-Id: I03dfa2924ccdae4c2f13f09d5f34ee58298e1343
This commit is contained in:
committed by
Todd tiantuo Li
parent
199e464402
commit
ea804e16f8
@@ -20,7 +20,7 @@
|
||||
|
||||
set(PAL_CLIENT "OCL")
|
||||
|
||||
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 872)
|
||||
set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 910)
|
||||
set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION 42)
|
||||
set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0)
|
||||
set(AMD_DK_ROOT $ENV{DK_ROOT})
|
||||
|
||||
@@ -673,8 +673,6 @@ class Settings : public amd::HeapObject {
|
||||
uint customHostAllocator_ : 1; //!< True if device has custom host allocator
|
||||
// that replaces generic OS allocation routines
|
||||
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
|
||||
uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program
|
||||
uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program
|
||||
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
|
||||
uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device
|
||||
uint useLightning_ : 1; //!< Enable LC path for this device
|
||||
@@ -689,7 +687,7 @@ class Settings : public amd::HeapObject {
|
||||
uint gwsInitSupported_:1; //!< Check if GWS is supported on this machine.
|
||||
uint kernel_arg_opt_: 1; //!< Enables kernel arg optimization for blit kernels
|
||||
uint kernel_arg_impl_ : 2; //!< Kernel argument implementation
|
||||
uint reserved_ : 7;
|
||||
uint reserved_ : 12;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -1926,12 +1926,8 @@ std::vector<std::string> Program::ProcessOptions(amd::option::Options* options)
|
||||
|
||||
// Set options for the standard device specific options
|
||||
// All our devices support these options now
|
||||
if (device().settings().reportFMAF_) {
|
||||
optionsVec.push_back("-DFP_FAST_FMAF=1");
|
||||
}
|
||||
if (device().settings().reportFMA_) {
|
||||
optionsVec.push_back("-DFP_FAST_FMA=1");
|
||||
}
|
||||
optionsVec.push_back("-DFP_FAST_FMAF=1");
|
||||
optionsVec.push_back("-DFP_FAST_FMA=1");
|
||||
} else {
|
||||
|
||||
if (!isHIP()) {
|
||||
|
||||
@@ -1066,7 +1066,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
bool result = false;
|
||||
amd::Image::Format newFormat(gpuMem(dstMemory).desc().format_);
|
||||
bool swapLayer =
|
||||
(dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
|
||||
dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
|
||||
// Find unsupported formats
|
||||
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
|
||||
@@ -1388,7 +1388,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
bool result = false;
|
||||
amd::Image::Format newFormat(gpuMem(srcMemory).desc().format_);
|
||||
bool swapLayer =
|
||||
(srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
|
||||
srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
|
||||
// Find unsupported formats
|
||||
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
|
||||
@@ -1655,16 +1655,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
|
||||
|
||||
// Program source origin
|
||||
int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0};
|
||||
if ((gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
|
||||
dev().settings().gfx10Plus_) {
|
||||
if (gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
srcOrg[3] = 1;
|
||||
}
|
||||
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
|
||||
|
||||
// Program destinaiton origin
|
||||
int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0};
|
||||
if ((gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
|
||||
dev().settings().gfx10Plus_) {
|
||||
if (gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
dstOrg[3] = 1;
|
||||
}
|
||||
setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
|
||||
@@ -2329,7 +2327,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
|
||||
Memory* memView = &gpuMem(memory);
|
||||
amd::Image::Format newFormat(gpuMem(memory).owner()->asImage()->getImageFormat());
|
||||
bool swapLayer =
|
||||
(memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
|
||||
memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
|
||||
// Program the kernels workload depending on the fill dimensions
|
||||
fillType = FillImage;
|
||||
|
||||
@@ -667,27 +667,6 @@ static constexpr std::array<std::pair<int, int>, 140> gfx10BlockIdPal = {{
|
||||
|
||||
void PerfCounter::convertInfo() {
|
||||
switch (dev().ipLevel()) {
|
||||
case Pal::GfxIpLevel::GfxIp7:
|
||||
if (info_.blockIndex_ < ciBlockIdOrcaToPal.size()) {
|
||||
auto p = ciBlockIdOrcaToPal[info_.blockIndex_];
|
||||
info_.blockIndex_ = std::get<0>(p);
|
||||
info_.counterIndex_ = std::get<1>(p);
|
||||
}
|
||||
break;
|
||||
case Pal::GfxIpLevel::GfxIp8:
|
||||
if (info_.blockIndex_ < viBlockIdOrcaToPal.size()) {
|
||||
auto p = viBlockIdOrcaToPal[info_.blockIndex_];
|
||||
info_.blockIndex_ = std::get<0>(p);
|
||||
info_.counterIndex_ = std::get<1>(p);
|
||||
}
|
||||
break;
|
||||
case Pal::GfxIpLevel::GfxIp9:
|
||||
if (info_.blockIndex_ < gfx9BlockIdPal.size()) {
|
||||
auto p = gfx9BlockIdPal[info_.blockIndex_];
|
||||
info_.blockIndex_ = std::get<0>(p);
|
||||
info_.counterIndex_ = std::get<1>(p);
|
||||
}
|
||||
break;
|
||||
case Pal::GfxIpLevel::GfxIp10_1:
|
||||
case Pal::GfxIpLevel::GfxIp10_3:
|
||||
case Pal::GfxIpLevel::GfxIp11_0:
|
||||
|
||||
@@ -84,21 +84,6 @@ struct PalDevice {
|
||||
|
||||
static constexpr PalDevice supportedPalDevices[] = {
|
||||
// GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision
|
||||
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo},
|
||||
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol},
|
||||
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland},
|
||||
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga)
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji},
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa
|
||||
{8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney},
|
||||
{9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10},
|
||||
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven},
|
||||
{9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12},
|
||||
{9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20},
|
||||
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2},
|
||||
{9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir},
|
||||
{10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10},
|
||||
{10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12},
|
||||
{10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14},
|
||||
@@ -391,10 +376,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
if (settings().checkExtension(ClKhrFp64)) {
|
||||
info_.doubleFPConfig_ = info_.singleFPConfig_ | CL_FP_DENORM;
|
||||
}
|
||||
|
||||
if (settings().reportFMA_) {
|
||||
info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
||||
}
|
||||
info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
||||
|
||||
if (settings().checkExtension(ClKhrFp16)) {
|
||||
info_.halfFPConfig_ = info_.singleFPConfig_;
|
||||
@@ -587,7 +569,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
if (settings().svmFineGrainSystem_) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
|
||||
}
|
||||
if (amd::IS_HIP && ipLevel_ >= Pal::GfxIpLevel::GfxIp9) {
|
||||
if (amd::IS_HIP) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
|
||||
|
||||
@@ -1043,12 +1043,7 @@ bool Resource::CreateInterop(CreateParams* params) {
|
||||
//! and OGL decompresses 24bit DEPTH into D24S8 for OGL compatibility
|
||||
if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
|
||||
(desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
|
||||
if (dev().settings().gfx10Plus_) {
|
||||
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
|
||||
} else {
|
||||
hwState_[1] &= ~0x3c000000;
|
||||
hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
|
||||
}
|
||||
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
|
||||
}
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
@@ -1253,7 +1248,7 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear
|
||||
|
||||
// Force remote allocation if it was requested in the settings
|
||||
if (dev().settings().remoteAlloc_ && ((memoryType() == Local) || (memoryType() == Persistent))) {
|
||||
if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
|
||||
if (dev().settings().apuSystem_) {
|
||||
desc_.type_ = Remote;
|
||||
} else {
|
||||
desc_.type_ = RemoteUSWC;
|
||||
@@ -1512,7 +1507,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
// Make sure linear pitch in bytes is 4 bytes aligned
|
||||
if (((gpuMemoryRowPitch % 4) != 0) ||
|
||||
// another DRM restriciton... SI has 4 pixels
|
||||
(gpuMemoryOffset % 4 != 0) || (dev().settings().sdamPageFaultWar_ && (imageOffsetx != 0))) {
|
||||
(gpuMemoryOffset % 4 != 0) || (imageOffsetx != 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,10 +67,6 @@ Settings::Settings() {
|
||||
pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE)
|
||||
? defaultMinXferSize * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi;
|
||||
|
||||
// Disable FP_FAST_FMA defines by default
|
||||
reportFMAF_ = false;
|
||||
reportFMA_ = false;
|
||||
|
||||
// GPU device by default
|
||||
apuSystem_ = false;
|
||||
|
||||
@@ -93,11 +89,6 @@ Settings::Settings() {
|
||||
// Use image DMA if requested
|
||||
imageDMA_ = GPU_IMAGE_DMA;
|
||||
|
||||
// Disable ASIC specific features by default
|
||||
viPlus_ = false;
|
||||
aiPlus_ = false;
|
||||
gfx10Plus_ = false;
|
||||
|
||||
// Number of compute rings.
|
||||
numComputeRings_ = 0;
|
||||
|
||||
@@ -116,9 +107,6 @@ Settings::Settings() {
|
||||
// Don't support Denormals for single precision by default
|
||||
singleFpDenorm_ = false;
|
||||
|
||||
// Disable SDMA workaround by default
|
||||
sdamPageFaultWar_ = false;
|
||||
|
||||
// SQTT buffer size in bytes
|
||||
rgpSqttDispCount_ = PAL_RGP_DISP_COUNT;
|
||||
rgpSqttWaitIdle_ = true;
|
||||
@@ -201,8 +189,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
case Pal::AsicRevision::Navi14:
|
||||
case Pal::AsicRevision::Navi12:
|
||||
case Pal::AsicRevision::Navi10:
|
||||
case Pal::AsicRevision::Navi10_A0:
|
||||
gfx10Plus_ = true;
|
||||
useLightning_ = GPU_ENABLE_LC;
|
||||
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
|
||||
if (useLightning_) {
|
||||
@@ -219,51 +205,14 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
// GFX10.1 HW doesn't support custom pitch. Enable double copy workaround
|
||||
imageBufferWar_ = GPU_IMAGE_BUFFER_WAR;
|
||||
}
|
||||
// Fall through to AI (gfx9) ...
|
||||
case Pal::AsicRevision::Vega20:
|
||||
// Enable HW P2P path for Vega20+. Runtime still relies on KMD/PAL for support
|
||||
enableHwP2P_ = true;
|
||||
case Pal::AsicRevision::Vega12:
|
||||
case Pal::AsicRevision::Vega10:
|
||||
case Pal::AsicRevision::Raven:
|
||||
case Pal::AsicRevision::Raven2:
|
||||
case Pal::AsicRevision::Renoir:
|
||||
aiPlus_ = true;
|
||||
enableCoopGroups_ = IS_LINUX;
|
||||
enableCoopMultiDeviceGroups_ = IS_LINUX;
|
||||
if (useLightning_) {
|
||||
singleFpDenorm_ = true;
|
||||
}
|
||||
// Fall through to VI ...
|
||||
case Pal::AsicRevision::Carrizo:
|
||||
case Pal::AsicRevision::Bristol:
|
||||
case Pal::AsicRevision::Stoney:
|
||||
case Pal::AsicRevision::Iceland:
|
||||
case Pal::AsicRevision::Tonga:
|
||||
case Pal::AsicRevision::Fiji:
|
||||
case Pal::AsicRevision::Polaris10:
|
||||
case Pal::AsicRevision::Polaris11:
|
||||
case Pal::AsicRevision::Polaris12:
|
||||
// Keep this false even though we have support
|
||||
// singleFpDenorm_ = true;
|
||||
viPlus_ = true;
|
||||
// SDMA may have memory access outside of
|
||||
// the valid buffer range and cause a page fault
|
||||
sdamPageFaultWar_ = true;
|
||||
enableExtension(ClKhrFp16);
|
||||
// Fall through to CI ...
|
||||
case Pal::AsicRevision::Kalindi:
|
||||
case Pal::AsicRevision::Godavari:
|
||||
case Pal::AsicRevision::Spectre:
|
||||
case Pal::AsicRevision::Spooky:
|
||||
case Pal::AsicRevision::Bonaire:
|
||||
case Pal::AsicRevision::Hawaii:
|
||||
case Pal::AsicRevision::HawaiiPro:
|
||||
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
|
||||
reportFMAF_ = false;
|
||||
if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) {
|
||||
reportFMAF_ = true;
|
||||
}
|
||||
// Cache line size is 64 bytes
|
||||
cacheLineSize_ = 64;
|
||||
// L1 cache size is 16KB
|
||||
@@ -293,13 +242,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
} else {
|
||||
maxAllocSize_ = 3ULL * Gi;
|
||||
}
|
||||
|
||||
// Note: More than 4 command buffers may cause a HW hang
|
||||
// with HWSC on pre-gfx9 devices in OCLPerfKernelArguments
|
||||
if (!aiPlus_) {
|
||||
maxCmdBuffers_ = 4;
|
||||
}
|
||||
|
||||
supportRA_ = false;
|
||||
numMemDependencies_ = GPU_NUM_MEM_DEPENDENCY;
|
||||
break;
|
||||
@@ -345,7 +287,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
|
||||
if (hwLDSSize_ == 0) {
|
||||
// Use hardcoded values for now, since PAL properties aren't available with offline devices
|
||||
hwLDSSize_ = (IS_LINUX || amd::IS_HIP || gfx10Plus_) ? 64 * Ki: 32 * Ki;
|
||||
hwLDSSize_ = (IS_LINUX || amd::IS_HIP) ? 64 * Ki: 32 * Ki;
|
||||
}
|
||||
|
||||
imageSupport_ = true;
|
||||
@@ -357,10 +299,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
// HW doesn't support untiled image writes
|
||||
// hostMemDirectAccess_ |= HostMemImage;
|
||||
|
||||
// Report FP_FAST_FMA define if double precision HW
|
||||
reportFMA_ = true;
|
||||
reportFMAF_ = true;
|
||||
|
||||
if (doublePrecision_) {
|
||||
// Enable KHR double precision extension
|
||||
enableExtension(ClKhrFp64);
|
||||
|
||||
@@ -65,21 +65,17 @@ class Settings : public device::Settings {
|
||||
uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU
|
||||
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
|
||||
uint imageDMA_ : 1; //!< Enable direct image DMA transfers
|
||||
uint viPlus_ : 1; //!< VI and post VI features
|
||||
uint aiPlus_ : 1; //!< AI and post AI features
|
||||
uint gfx10Plus_ : 1; //!< gfx10 and post gfx10 features
|
||||
uint threadTraceEnable_ : 1; //!< Thread trace enable
|
||||
uint svmAtomics_ : 1; //!< SVM device atomics
|
||||
uint svmFineGrainSystem_ : 1; //!< SVM fine grain system support
|
||||
uint useDeviceQueue_ : 1; //!< Submit to separate device queue
|
||||
uint sdamPageFaultWar_ : 1; //!< SDMA page fault workaround
|
||||
uint rgpSqttWaitIdle_ : 1; //!< Wait for idle after SQTT trace
|
||||
uint rgpSqttForceDisable_ : 1; //!< Disables SQTT
|
||||
uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing
|
||||
uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10
|
||||
uint disableSdma_ : 1; //!< Disable SDMA support
|
||||
uint alwaysResident_ : 1; //!< Make resources resident at allocation time
|
||||
uint reserved_ : 10;
|
||||
uint reserved_ : 13;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -36,7 +36,7 @@ TimeStamp::~TimeStamp() {}
|
||||
|
||||
void TimeStamp::begin() {
|
||||
if (!flags_.beginIssued_) {
|
||||
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
|
||||
gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_,
|
||||
memOffset_ + CommandStartTime * sizeof(uint64_t));
|
||||
flags_.beginIssued_ = true;
|
||||
}
|
||||
@@ -44,7 +44,7 @@ void TimeStamp::begin() {
|
||||
|
||||
void TimeStamp::end() {
|
||||
CondLog(!flags_.beginIssued_, "We didn't issue a begin operation!");
|
||||
gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_,
|
||||
gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_,
|
||||
memOffset_ + CommandEndTime * sizeof(uint64_t));
|
||||
flags_.endIssued_ = true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user