Files
rocm-systems/rocclr/device/gpu/gslbe/src/rt/GSLDevice.cpp
T
Satyanvesh Dittakavi e20dd61932 SWDEV-306939 - Fix vdi errors/warnings by CppCheck
Change-Id: I56d910f8363787f1050d5d7e8064ed553c5827fd
2022-01-12 00:22:16 -05:00

1663 řádky
55 KiB
C++

/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "gsl_ctx.h"
#include "GSLDevice.h"
#include "EventQueue.h"
#include "ini_export.h"
#include "GSLContext.h"
#include "cm_if.h"
#include "utils/flags.hpp"
#include "query/QueryObject.h"
#include "memory/MemObject.h"
#include "sampler/SamplerObject.h"
#include "texture/TextureResourceObject.h"
#include "../iol/iodrv_if.h"
extern gslMemObjectAttribTiling g_CALBETiling_Tiled;
void
CALGSLDevice::resFree(gslMemObject mem) const
{
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
m_cs->destroyMemObject(mem);
}
void CALGSLDevice::Initialize()
{
m_adp = 0;
m_cs = 0;
m_rs = 0;
m_textureResource = 0;
m_textureSampler = 0;
m_target = (CALtarget)0xffffffff;
m_srcDRMDMAMem = NULL ;
m_dstDRMDMAMem = NULL ;
m_flags = 0;
m_nativeDisplayHandle = NULL;
m_deviceMode = GSL_DEVICE_MODE_GFX;
m_gpuIndex = 0;
m_chainIndex = 0;
m_forcedComputeEngineID = GSL_ENGINEID_INVALID;
m_vpuMask = 1;
gslDeviceOps_ = NULL;
}
CALGSLDevice::CALGSLDevice()
{
Initialize();
}
CALGSLDevice::~CALGSLDevice()
{
assert(m_adp == 0); /// CALBE client must call close explicitly. Check that here
if (m_scfg.sclkActivityThresholdPtr.hasValue) {
osMemFree(m_scfg.sclkActivityThresholdPtr.value);
}
if (m_scfg.sclkDownHysteresisPtr.hasValue) {
osMemFree(m_scfg.sclkDownHysteresisPtr.value);
}
if (m_scfg.sclkUpHysteresisPtr.hasValue) {
osMemFree(m_scfg.sclkUpHysteresisPtr.value);
}
if (m_scfg.packagePowerLimitPtr.hasValue) {
osMemFree(m_scfg.packagePowerLimitPtr.value);
}
if (m_scfg.mclkActivityThresholdPtr.hasValue) {
osMemFree(m_scfg.mclkActivityThresholdPtr.value);
}
if (m_scfg.mclkUpHysteresisPtr.hasValue) {
osMemFree(m_scfg.mclkUpHysteresisPtr.value);
}
if (m_scfg.mclkDownHysteresisPtr.hasValue) {
osMemFree(m_scfg.mclkDownHysteresisPtr.value);
}
delete gslDeviceOps_;
}
gsl::gsAdaptor*
CALGSLDevice::getNative() const
{
return m_adp;
}
uint32
CALGSLDevice::getMaxTextureSize() const
{
return static_cast<uint32>(m_maxtexturesize);
}
void
CALGSLDevice::getAttribs_int(gsl::gsCtx* cs)
{
m_attribs.struct_size = sizeof(CALdeviceattribs);
m_attribs.target = m_target;
gslMemInfo memInfo;
cs->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
m_attribs.localRAM = (uint32)((memInfo.cardMemTotalBytes + memInfo.cardExtMemTotalBytes) / (1024 * 1024));
m_attribs.uncachedRemoteRAM = (uint32)(memInfo.agpMemTotalBytes / (1024 * 1024));
m_attribs.cachedRemoteRAM = (uint32)(memInfo.agpMemTotalCacheableBytes / (1024 * 1024));
m_attribs.totalVisibleHeap = (uint32) (memInfo.cardMemTotalBytes / (1024 * 1024));
m_attribs.totalInvisibleHeap = (uint32) (memInfo.cardExtMemTotalBytes / (1024 * 1024));
m_attribs.totalDirectHeap = (uint32) (memInfo.directTotalBytes / (1024 * 1024));
m_attribs.totalCoherentHeap = (uint32) (memInfo.coherentTotalBytes / (1024 * 1024));
m_attribs.totalRemoteSharedHeap = (uint32) (memInfo.sharedTotalBytes / (1024 * 1024));
m_attribs.totalCachedRemoteSharedHeap = (uint32) (memInfo.sharedCacheableTotalBytes / (1024 * 1024));
m_attribs.totalSDIHeap = (uint32) (memInfo.busAddressableTotalBytes / (1024 * 1024));
m_attribs.engineClock = cs->getMaxEngineClock();
m_attribs.memoryClock = cs->getMaxMemoryClock();
m_attribs.numberOfSIMD = cs->getNumSIMD();
m_attribs.numberOfCUsperShaderArray = cs->getNumCUsPerShaderArray();
m_attribs.wavefrontSize = cs->getWaveFrontSize();
m_attribs.doublePrecision = cs->getIsDoublePrecisionSupported();
m_attribs.memBusWidth = cs->getVramBitWidth();
m_attribs.numMemBanks = cs->getVramBanks();
m_attribs.isWorkstation = cs->getIsWorkstation();
m_attribs.numberOfShaderEngines = cs->getNumShaderEngines();
m_attribs.pciTopologyInformation = m_adp->getLocationId();
const uint8* boardName = cs->getString(GSL_GS_RENDERER);
::strncpy(m_attribs.boardName, (char*)boardName, CAL_ASIC_INFO_MAX_LEN * sizeof(char) - 1);
const uint8* driverStore = cs->getString(GSL_GS_DRIVER_STORE_PATH);
::strncpy(m_attribs.driverStore, (char*)driverStore, CAL_DRIVER_STORE_MAX_LEN * sizeof(char) - 1);
m_attribs.counterFreq = cs->getCounterFreq();
m_attribs.nanoSecondsPerTick = 1000000000.0 / cs->getCounterFreq();
m_attribs.longIdleDetect = cs->getLongIdleDetect();
m_attribs.svmAtomics = m_adp->pAsicInfo->svmAtomics;
m_attribs.vaStart = static_cast<CALuint64>(m_adp->pAsicInfo->vaStart);
m_attribs.vaEnd = static_cast<CALuint64>(m_adp->pAsicInfo->vaEnd);
m_attribs.numOfVpu = m_adp->pAsicInfo->numberOfVPU;
m_attribs.isOpenCL200Device = m_adp->pAsicInfo->bIsOpen2Device;
m_attribs.isSVMFineGrainSystem = m_adp->pAsicInfo->svmFineGrainSystem;
m_attribs.isWDDM2Enabled = m_adp->pAsicInfo->vaAvailable && m_adp->pAsicInfo->bNoVATranslation;
m_attribs.maxRTCUs = cs->getMaxRTCUs();
m_attribs.asicRevision = cs->getChipRev();
m_attribs.pcieDeviceID = cs->getAsicDID();
m_attribs.pcieRevisionID = cs->getPciRevID();
}
// Parses a single unsigned integer from a comma separated string of integers such as
// 0x24, 0x9235, 0x123,...
//
// pStr (in) The input string
// pValue (out) The unsigned integer output when parsing is successful
//
// Return Pointer to next location in string following a comma. If no such location is found
// then pointer is null.
static char* getNextValue(char* pStr, uint32* pValue)
{
char* pRetStr = NULL;
if (pStr != NULL) {
if (pValue != NULL) {
*pValue = strtoul(pStr, NULL, 0);
}
pRetStr = strchr(pStr, ',');
if (pRetStr) {
pRetStr++;
}
}
return pRetStr;
}
// Parse string element to extract <deviceid, revisionid, clientid, value> 4-tuples of
// comma-separated uint values and allocate and write those values to output array.
//
// Example input string:
// 0x67A0,0x00,0x06,0x28,0x67A1,0x00,0x06,0x28
//
// Return the number of 4-tuple entries successfully parsed.
static uint32 parse4TupleValues(const char* element, uint32*& values)
{
// Early out if something else (e.g oglPanel) has already set this value
if (values != NULL) {
return 0;
}
const size_t strSize = strlen(element);
if (0 == strSize) {
return 0;
}
char * const str = (char*)alloca(sizeof(char) * (strSize + 1));
char * pCurStr = str;
uint32 curDevID;
uint32 curRevID;
uint32 curClientID;
uint32 curValue;
uint32 numTuples = 0;
// Find size
memcpy(str, element, strSize);
str[strSize] = '\0';
while (pCurStr != NULL) {
pCurStr = getNextValue(pCurStr, &curDevID);
if (pCurStr != NULL) {
pCurStr = getNextValue(pCurStr, &curRevID);
if (pCurStr != NULL) {
pCurStr = getNextValue(pCurStr, &curClientID);
if (pCurStr != NULL) {
pCurStr = getNextValue(pCurStr, &curValue);
numTuples ++;
}
}
}
}
if (numTuples == 0) {
return 0;
}
// Allocate
values = (uint32*)osMemAlloc(numTuples * sizeof(uint32) * 4);
if (!values) {
return 0;
}
// Copy
uint32 * pCurValue = values;
pCurStr = str;
memcpy(str, element, strSize);
str[strSize] = '\0';
while (pCurStr != NULL) {
pCurStr = getNextValue(pCurStr, pCurValue);
pCurValue++;
pCurStr = getNextValue(pCurStr, pCurValue);
pCurValue++;
pCurStr = getNextValue(pCurStr, pCurValue);
pCurValue++;
pCurStr = getNextValue(pCurStr, pCurValue);
pCurValue++;
}
return numTuples;
}
static void
CALGSLDevice::parsePowerParam(const char* element, gslRuntimeConfigUint32Value& pwrCount, gslRuntimeConfigUint32pValue& pwrPointer)
{
uint32 count = 0;
uint32* values = NULL;
count = parse4TupleValues(element, values);
if (0 != count) {
pwrCount.hasValue = true;
pwrCount.value = count;
pwrPointer.hasValue = true;
pwrPointer.value = values;
}
}
bool
CALGSLDevice::open(uint32 gpuIndex, OpenParams& openData)
{
gslDeviceOps_ = new amd::Monitor("GSL Device Ops Lock", true);
if (NULL == gslDeviceOps_) {
return false;
}
unsigned int chainIndex = 0;
#ifdef ATI_OS_WIN
m_gpuIndex = gpuIndex;
m_usePerVPUAdapterModel = true;
m_PerformLazyDeviceInit = true;
#else
void * nativeHandle;
gslDeviceMode deviceMode;
gsAdaptor::getDeviceInitData(gpuIndex, &deviceMode, &chainIndex, &nativeHandle);
m_nativeDisplayHandle = nativeHandle;
m_deviceMode = deviceMode;
#endif
m_chainIndex = chainIndex;
m_vpuMask = 1 << chainIndex;
//
// CALBE is required to explicitly manage multiple opens and closes
// assert on the condition for correct usage
//
assert(m_adp == 0);
memset(&m_dcfg, 0, sizeof(m_dcfg));
extern void getConfigFromFile(gslStaticRuntimeConfig &scfg, gslDynamicRuntimeConfig &dcfg);
getConfigFromFile(m_scfg, m_dcfg);
m_scfg.UsePerVPUAdapterModel.hasValue = true;
m_scfg.UsePerVPUAdapterModel.value = m_usePerVPUAdapterModel;
m_scfg.DX10SamplerResources.hasValue = true;
m_scfg.DX10SamplerResources.value = true;
m_scfg.vpuMask.hasValue = true;
m_scfg.vpuMask.value = m_vpuMask;
m_scfg.bEnableHighPerformanceState.hasValue = true;
m_scfg.bEnableHighPerformanceState.value = openData.enableHighPerformanceState;
m_scfg.bEnableReusableMemCache.hasValue = true;
m_scfg.bEnableReusableMemCache.value = false;
parsePowerParam(openData.sclkThreshold, m_scfg.sclkActivityThresholdCount, m_scfg.sclkActivityThresholdPtr);
parsePowerParam(openData.downHysteresis, m_scfg.sclkDownHysteresisCount, m_scfg.sclkDownHysteresisPtr);
parsePowerParam(openData.upHysteresis, m_scfg.sclkUpHysteresisCount, m_scfg.sclkUpHysteresisPtr);
parsePowerParam(openData.powerLimit, m_scfg.packagePowerLimitCount, m_scfg.packagePowerLimitPtr);
parsePowerParam(openData.mclkThreshold, m_scfg.mclkActivityThresholdCount, m_scfg.mclkActivityThresholdPtr);
parsePowerParam(openData.mclkUpHyst, m_scfg.mclkUpHysteresisCount, m_scfg.mclkUpHysteresisPtr);
parsePowerParam(openData.mclkDownHyst, m_scfg.mclkDownHysteresisCount, m_scfg.mclkDownHysteresisPtr);
m_dcfg.disableMarkUsedInCmdBuf.hasValue = true;
m_dcfg.disableMarkUsedInCmdBuf.value = false;
// Enable immediate memory release
m_dcfg.immediateMemoryRelease.hasValue = true;
m_dcfg.immediateMemoryRelease.value = true;
m_dcfg.bEnableSvm.hasValue = true;
m_dcfg.bEnableSvm.value = openData.reportAsOCL12Device ? false : OPENCL_MAJOR >= 2;
m_dcfg.bEnableFlatAddressing.hasValue = true;
#if defined(ATI_BITS_32) && defined(ATI_OS_LINUX)
m_dcfg.bEnableFlatAddressing.value = false;
#else
m_dcfg.bEnableFlatAddressing.value = openData.reportAsOCL12Device ? false : (OPENCL_MAJOR >= 2);
#endif
if (GPU_ENABLE_HW_DEBUG) {
m_dcfg.nPatchDumpLevel.hasValue = true;
m_dcfg.nPatchDumpLevel.value |= NPATCHDUMPLEVEL_BITS_HW_DEBUG;
}
//we can use environment variable CAL_ENABLE_ASYNC_DMA to force dma on or off when we need it
char *s = NULL;
if((s = getenv("CAL_ENABLE_ASYNC_DMA")))
{
m_dcfg.drmdmaMode.hasValue = true;
m_dcfg.drmdmaMode.value = (atoi(s) == 0) ? GSL_CONFIG_DRMDMA_MODE_FORCE_OFF : GSL_CONFIG_DRMDMA_MODE_DEFAULT;
}
// Use GPU_USE_SYNC_OBJECTS to force syncobject on or off when we need it
m_dcfg.syncObjectMode.hasValue = true;
m_dcfg.syncObjectMode.value = (GPU_USE_SYNC_OBJECTS) ?
GSL_CONFIG_SYNCOBJECT_MODE_ON : GSL_CONFIG_SYNCOBJECT_MODE_OFF;
// Use OCL_SET_SVM_SIZE to set SVM size we need
m_dcfg.ndevSVMSize.hasValue = true;
m_dcfg.ndevSVMSize.value = OCL_SET_SVM_SIZE;
// Use GPU_IFH_MODE to test with IFH mode enabled
m_dcfg.DropFlush.hasValue = true;
m_dcfg.DropFlush.value = (GPU_IFH_MODE == 1);
// Enable TC compatible htile mode. It's HW feature for VI+ and controlled in HWL.
// Depth interop doesn't support TC compatible htile mode, but OCL needs correct tiling setup.
m_dcfg.bEnableTCCompatibleHtile.hasValue = true;
m_dcfg.bEnableTCCompatibleHtile.value = true;
int32 asic_id = 0;
if (!SetupAdapter(asic_id))
{
return false;
}
// Disable gfx10+ ASICs in GSL
if (asic_id >= GSL_ATIASIC_ID_NAVI10)
{
return false;
}
if (!SetupContext(asic_id))
{
return false;
}
if (m_PerformLazyDeviceInit)
{
// close the adaptor
gsAdaptor::closeAdaptor(m_adp);
m_adp = 0;
}
else
{
PerformFullInitialization();
}
return true;
}
void
CALGSLDevice::close()
{
m_fullInitialized = false;
if (m_cs != NULL)
{
m_cs->Flush();
}
if (m_dstDRMDMAMem)
{
resFree(m_dstDRMDMAMem);
m_dstDRMDMAMem = NULL ;
}
if (m_srcDRMDMAMem)
{
resFree(m_srcDRMDMAMem);
m_srcDRMDMAMem = NULL ;
}
if (m_cs != NULL)
{
m_cs->destroyTextureResource(m_textureResource);
m_cs->destroySampler(m_textureSampler);
m_cs->destroyQuery(m_mapQuery);
m_cs->destroyQuery(m_mapDMAQuery);
m_cs->setRenderState(0);
m_cs->destroyRenderState(m_rs);
m_cs->destroySubAllocDesc();
m_rs = 0;
m_adp->deleteContext(m_cs);
m_cs = 0;
}
if (m_adp != NULL)
{
gsAdaptor::closeAdaptor(m_adp);
m_adp = 0;
}
}
void
CALGSLDevice::PerformAdapterInitialization(bool ValidateOnly)
{
// Win10 initialization is more exhaustive.
// @ToDo Check if Win7 can be simplified as well
// If we end up creating a paging fence on Win10 in IOL the slave may not go idle
PerformAdapterInitialization_int(ValidateOnly && m_initLite);
}
void CALGSLDevice::CloseInitializedAdapter(bool ValidateOnly)
{
// @ToDo Check if Win7 can be simplified as well
// The adapter shouldnt be destroyed if its created when bindExternalDevice is called
// during context creation
if (m_initLite && ValidateOnly && !m_fullInitialized)
{
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
// close the adaptor
gsAdaptor::closeAdaptor(m_adp);
m_adp = 0;
}
}
void
CALGSLDevice::PerformFullInitialization() const
{
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
CALGSLDevice* mutable_this = const_cast<CALGSLDevice*>(this);
mutable_this->PerformFullInitialization_int();
}
bool
CALGSLDevice::SetupAdapter(int32 &asic_id)
{
#ifdef ATI_OS_WIN
m_initLite = true;
#endif
PerformAdapterInitialization_int(m_initLite);
if (m_adp == 0)
{
return false;
}
asic_id = m_adp->getAsicID();
if ((asic_id < GSL_ATIASIC_ID_TAHITI_P))
{
LogPrintfInfo("Unsupported legacy ASIC(%d) found!\n", asic_id);
// close the adaptor
gsAdaptor::closeAdaptor(m_adp);
m_adp = 0;
return false;
}
bool hasDmaEngine = m_adp->findDMAEngine();
bool hasComputeEngine = m_adp->findComputeEngine();
m_canDMA = hasDmaEngine;
m_adp->queryAvailableEngines(&m_nEngines, m_engines);
//Disable DRMDMA on CFX mode for linux on all GPUs.
#ifdef ATI_OS_LINUX
if (m_adp->getNumLinkedVPUs() > 1)
{
m_canDMA = ATIGL_FALSE;
}
#endif
//The sDMA L2T reading invalid address bug is fixed starting from Vega10,
//and page-fault is not enabled for pre-VI, so we need to workaround
//the bug for ASICs in between.
if (asic_id < GSL_ATIASIC_ID_GREENLAND &&
asic_id >= GSL_ATIASIC_ID_BONAIRE_M)
{
m_isSDMAL2TConstrained = true;
}
else
{
m_isSDMAL2TConstrained = false;
}
if (asic_id < GSL_ATIASIC_ID_TAHITI_P)
{
m_computeRing = false;
}
else
{
m_computeRing = true;
}
if (!flagIsDefault(GPU_NUM_COMPUTE_RINGS))
{
m_computeRing = (GPU_NUM_COMPUTE_RINGS != 0);
}
if ((!flagIsDefault(GPU_SELECT_COMPUTE_RINGS_ID)) && (m_computeRing))
{
gslEngineID engineID;
engineID = static_cast<gslEngineID>(GPU_SELECT_COMPUTE_RINGS_ID + GSL_ENGINEID_COMPUTE0);
if ((engineID >= GSL_ENGINEID_COMPUTE0) && (engineID <= GSL_ENGINEID_COMPUTE7))
{
for (uint i = 0; i < m_nEngines; ++i) {
if (m_engines[i].id == engineID){
m_isComputeRingIDForced = true;
m_forcedComputeEngineID = engineID;
break;
}
}
}
}
if (m_computeRing && !hasComputeEngine)
{
return false;
}
return true;
}
bool
CALGSLDevice::SetupContext(int32 &asic_id)
{
gsl::gsCtx* temp_cs = m_adp->createComputeContext(m_computeRing ? (m_isComputeRingIDForced ? m_forcedComputeEngineID :
getFirstAvailableComputeEngineID()) : GSL_ENGINEID_3DCOMPUTE0,
m_canDMA ? GSL_ENGINEID_DRMDMA0 : GSL_ENGINEID_INVALID, m_initLite);
temp_cs->getMainSubCtx()->setVPUMask(m_vpuMask);
m_maxtexturesize = temp_cs->getMaxTextureSize();
switch (asic_id)
{
case GSL_ATIASIC_ID_TAHITI_P:
m_target = CAL_TARGET_TAHITI;
m_elfmachine = ED_ATI_CAL_MACHINE_TAHITI_ISA;
break;
case GSL_ATIASIC_ID_PITCAIRN_PM:
m_target = CAL_TARGET_PITCAIRN;
m_elfmachine = ED_ATI_CAL_MACHINE_PITCAIRN_ISA;
break;
case GSL_ATIASIC_ID_CAPEVERDE_M:
m_target = CAL_TARGET_CAPEVERDE;
m_elfmachine = ED_ATI_CAL_MACHINE_CAPEVERDE_ISA;
break;
case GSL_ATIASIC_ID_OLAND_M:
m_target = CAL_TARGET_OLAND;
m_elfmachine = ED_ATI_CAL_MACHINE_OLAND_ISA;
break;
case GSL_ATIASIC_ID_HAINAN_M:
m_target = CAL_TARGET_HAINAN;
m_elfmachine = ED_ATI_CAL_MACHINE_HAINAN_ISA;
break;
case GSL_ATIASIC_ID_BONAIRE_M:
m_target = CAL_TARGET_BONAIRE;
m_elfmachine = ED_ATI_CAL_MACHINE_BONAIRE_ISA;
break;
case GSL_ATIASIC_ID_SPECTRE:
m_target = CAL_TARGET_SPECTRE;
m_elfmachine = ED_ATI_CAL_MACHINE_SPECTRE_ISA;
break;
case GSL_ATIASIC_ID_SPOOKY:
m_target = CAL_TARGET_SPOOKY;
m_elfmachine = ED_ATI_CAL_MACHINE_SPOOKY_ISA;
break;
case GSL_ATIASIC_ID_KALINDI:
m_target = CAL_TARGET_KALINDI;
m_elfmachine = ED_ATI_CAL_MACHINE_KALINDI_ISA;
break;
case GSL_ATIASIC_ID_HAWAII_P:
m_target = CAL_TARGET_HAWAII;
m_elfmachine = ED_ATI_CAL_MACHINE_HAWAII_ISA;
break;
case GSL_ATIASIC_ID_ICELAND_M:
m_target = CAL_TARGET_ICELAND;
m_elfmachine = ED_ATI_CAL_MACHINE_ICELAND_ISA;
break;
case GSL_ATIASIC_ID_TONGA_P:
m_target = CAL_TARGET_TONGA;
m_elfmachine = ED_ATI_CAL_MACHINE_TONGA_ISA;
break;
case GSL_ATIASIC_ID_GODAVARI:
m_target = CAL_TARGET_GODAVARI;
m_elfmachine = ED_ATI_CAL_MACHINE_GODAVARI_ISA;
break;
case GSL_ATIASIC_ID_FIJI_P:
m_target = CAL_TARGET_FIJI;
m_elfmachine = ED_ATI_CAL_MACHINE_FIJI_ISA;
break;
case GSL_ATIASIC_ID_CARRIZO:
m_target = CAL_TARGET_CARRIZO;
m_elfmachine = ED_ATI_CAL_MACHINE_CARRIZO_ISA;
break;
case GSL_ATIASIC_ID_ELLESMERE:
m_target = CAL_TARGET_ELLESMERE;
m_elfmachine = ED_ATI_CAL_MACHINE_ELLESMERE_ISA;
break;
case GSL_ATIASIC_ID_BAFFIN:
m_target = CAL_TARGET_BAFFIN;
m_elfmachine = ED_ATI_CAL_MACHINE_BAFFIN_ISA;
break;
case GSL_ATIASIC_ID_GREENLAND:
m_target = CAL_TARGET_GREENLAND;
m_elfmachine = ED_ATI_CAL_MACHINE_GREENLAND_ISA;
break;
case GSL_ATIASIC_ID_STONEY:
m_target = CAL_TARGET_STONEY;
m_elfmachine = ED_ATI_CAL_MACHINE_STONEY_ISA;
break;
case GSL_ATIASIC_ID_LEXA:
m_target = CAL_TARGET_LEXA;
m_elfmachine = ED_ATI_CAL_MACHINE_LEXA_ISA;
break;
case GSL_ATIASIC_ID_RAVEN:
m_target = CAL_TARGET_RAVEN;
m_elfmachine = ED_ATI_CAL_MACHINE_RAVEN_ISA;
break;
case GSL_ATIASIC_ID_RAVEN2:
m_target = CAL_TARGET_RAVEN2;
m_elfmachine = ED_ATI_CAL_MACHINE_RAVEN2_ISA;
break;
case GSL_ATIASIC_ID_RENOIR:
m_target = CAL_TARGET_RENOIR;
m_elfmachine = ED_ATI_CAL_MACHINE_RENOIR_ISA;
break;
case GSL_ATIASIC_ID_POLARIS22:
m_target = CAL_TARGET_POLARIS22;
m_elfmachine = ED_ATI_CAL_MACHINE_POLARIS22_ISA;
break;
case GSL_ATIASIC_ID_VEGA12:
m_target = CAL_TARGET_VEGA12;
m_elfmachine = ED_ATI_CAL_MACHINE_VEGA12_ISA;
break;
case GSL_ATIASIC_ID_VEGA20:
m_target = CAL_TARGET_VEGA20;
m_elfmachine = ED_ATI_CAL_MACHINE_VEGA20_ISA;
break;
default:
// 6XX is not supported
m_adp->deleteContext(temp_cs);
gsAdaptor::closeAdaptor(m_adp);
m_adp = 0;
assert(0);
return false;
}
//cache device details
getAttribs_int(temp_cs);
temp_cs->getMemInfo(&m_memInfo, GSL_MEMINFO_BASIC);
assert(temp_cs->getVMMode());
m_adp->deleteContext(temp_cs);
return true;
}
void
CALGSLDevice::PerformAdapterInitialization_int(bool initLite)
{
if (m_adp == 0)
{
if (m_usePerVPUAdapterModel)
{
m_adp = gsAdaptor::openAdaptorByIndex<gsl::gsAdaptor>(m_gpuIndex, &m_scfg, &m_dcfg, initLite);
}
else
{
m_adp = gsAdaptor::openAdaptor(m_nativeDisplayHandle, m_chainIndex, &m_scfg, &m_dcfg);
}
assert(m_adp != 0);
}
}
void
CALGSLDevice::PerformFullInitialization_int()
{
m_fullInitialized = true;
if (m_adp == 0)
{
PerformAdapterInitialization_int(false);
}
if (m_cs == 0)
{
m_cs = m_adp->createComputeContext(m_computeRing ? (m_isComputeRingIDForced ? m_forcedComputeEngineID :
getFirstAvailableComputeEngineID()) : GSL_ENGINEID_3DCOMPUTE0,
m_canDMA ? GSL_ENGINEID_DRMDMA0 : GSL_ENGINEID_INVALID, false);
m_cs->getMainSubCtx()->setVPUMask(m_vpuMask);
//
// Check if the command stream has a DMA connection and allow DMA if there
// is a connection and we can actually DMA
//
bool dmaConnection = m_cs->getDrmDma0Ctx() && m_cs->getDrmDma0Ctx()->ioInfo.iolConnection;
m_allowDMA = (dmaConnection && m_canDMA);
m_rs = m_cs->createRenderState();
m_cs->setRenderState(m_rs);
m_cs->Flush();
m_cs->createSubAllocDesc();
m_mapQuery = m_cs->createQuery(GSL_SYNC_ATI);
m_mapDMAQuery = m_cs->createQuery(GSL_DRMDMA_SYNC_ATI);
// Allocate 1x1 FART and Vid memory for DMA flush
CALresourceDesc desc;
memset(&desc, 0, sizeof(CALresourceDesc));
desc.type = GSL_MOA_MEMORY_AGP;
desc.size.width = 1;
desc.size.height = 1;
desc.format = CM_SURF_FMT_R32F;
desc.channelOrder = GSL_CHANNEL_ORDER_R;
desc.dimension = GSL_MOA_TEXTURE_2D;
m_srcDRMDMAMem = resAlloc(&desc);
desc.type = GSL_MOA_MEMORY_CARD_EXT_NONEXT;
m_dstDRMDMAMem = resAlloc(&desc);
m_cs->setDMAFlushBuf(m_srcDRMDMAMem, m_dstDRMDMAMem, 4 /*size of CM_SURF_FMT_R32F*/);
m_PerformLazyDeviceInit = false;
m_textureResource = m_cs->createTextureResource();
m_textureSampler = m_cs->createSampler();
// This is a temporary w/a for a CP uCode bug in HWS mode.
// due to this bug, CP uCode loops through a RUNLIST unless
// there is a submission on all queues in HWS mode.
// To force CP uCode to exit the loop, the below code creates
// temporary contexts and submits a packet (via setRenderState)
// to each available compute engines except the first one which
// already had submission in the above code.
// @todo: remove this code once the bug is fixed in CP uCode
if (m_adp->isHWSSupported()) {
gslEngineID usedComputeEngineID = m_isComputeRingIDForced ?
m_forcedComputeEngineID :
getFirstAvailableComputeEngineID();
for (uint i = 0; i < m_nEngines; ++i) {
if (m_engines[i].id >= GSL_ENGINEID_COMPUTE0 &&
m_engines[i].id <= GSL_ENGINEID_COMPUTE7 &&
m_engines[i].id != usedComputeEngineID) {
gsl::gsCtx* cs_temp;
gslRenderState rs_temp;
cs_temp = m_adp->createComputeContext(m_engines[i].id,
m_canDMA ? GSL_ENGINEID_DRMDMA0 : GSL_ENGINEID_INVALID, false);
cs_temp->getMainSubCtx()->setVPUMask(m_vpuMask);
rs_temp = cs_temp->createRenderState();
cs_temp->setRenderState(rs_temp);
cs_temp->Flush();
cs_temp->setRenderState(0);
cs_temp->destroyRenderState(rs_temp);
m_adp->deleteContext(cs_temp);
}
}
}
}
}
void
Wait(gsl::gsCtx* cs, gslQueryTarget target, gslQueryObject object)
{
uint64 param;
uint32 mask = (target == GSL_DRMDMA_SYNC_ATI) ? GSL_ENGINE_MASK(GSL_ENGINEID_DRMDMA0) | GSL_ENGINE_MASK(GSL_ENGINEID_DRMDMA1) : GSL_ENGINEMASK_ALL_BUT_UVD_VCE;
object->BeginQuery(cs, target, 0, mask);
object->EndQuery(cs, 0);
object->GetResult(cs, &param);
assert(param == 1);
}
static bool
CALGSLDevice::ResolveAperture(const gslMemObjectAttribTiling tiling) const
{
// Don't ask for aperture if the tiling is linear.
if ((GSL_MOA_TILING_LINEAR == tiling) ||
(GSL_MOA_TILING_LINEAR_GENERAL == tiling))
{
return false;
}
// Use aperture.
return true;
}
gslMemObject
CALGSLDevice::resAlloc(const CALresourceDesc* desc) const
{
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
assert(m_cs != 0);
gslMemObject mem = 0;
uint32 flags = desc->flags;
gslMemObjectAttribs attribs(
GSL_MOA_TEXTURE_1D, // type
GSL_MOA_MEMORY_CARD_EXT_NONEXT, // location XXX
(flags & CAL_RESALLOC_GLOBAL_BUFFER) ? GSL_MOA_TILING_LINEAR : g_CALBETiling_Tiled, // tiling
GSL_MOA_DISPLAYABLE_NO, // displayable
ATIGL_FALSE, // mipmap
1, // samples
0, // cpu_address
GSL_MOA_SIGNED_NO, // signed_format
GSL_MOA_FORMAT_DERIVED, // numFormat
DRIVER_MODULE_GLL, // module
GSL_ALLOCATION_INSTANCED // alloc_type
);
attribs.location = desc->type;
attribs.vaBase = desc->vaBase;
attribs.section = desc->section;
attribs.isAllocSVM = desc->isAllocSVM;
attribs.isAllocExecute = desc->isAllocExecute;
attribs.minAlignment = desc->minAlignment;
//!@note GSL asserts with tiled 1D images of any type.
if ((desc->dimension == GSL_MOA_BUFFER) ||
(desc->dimension == GSL_MOA_TEXTURE_1D) ||
(desc->dimension == GSL_MOA_TEXTURE_1D_ARRAY) ||
(desc->dimension == GSL_MOA_TEXTURE_BUFFER))
{
attribs.tiling = GSL_MOA_TILING_LINEAR;
}
if (desc->type == GSL_MOA_MEMORY_SYSTEM)
{
// CPU addres and size for pinning
attribs.cpu_address = desc->systemMemory;
attribs.size = desc->systemMemorySize;
if ((desc->size.width % 64) == 0)
{
attribs.tiling = GSL_MOA_TILING_LINEAR;
}
else
{
// Use linear general if width isn't aligned
attribs.tiling = GSL_MOA_TILING_LINEAR_GENERAL;
}
}
else if (desc->type == GSL_MOA_MEMORY_CARD_EXTERNAL_PHYSICAL)
{
attribs.cpu_address = (void*)desc->busAddress;
}
// Don't ask for aperture if the tiling is linear.
attribs.useAperture = ResolveAperture(attribs.tiling);
attribs.channelOrder = desc->channelOrder;
attribs.type = desc->dimension;
if (desc->mipLevels > 1) {
attribs.levels = desc->mipLevels;
attribs.mipmap = true;
}
switch (desc->dimension)
{
case GSL_MOA_BUFFER:
mem = m_cs->createMemObject1D(desc->format, desc->size.width, &attribs);
break;
case GSL_MOA_TEXTURE_1D:
mem = m_cs->createMemObject1D(desc->format, desc->size.width, &attribs);
break;
case GSL_MOA_TEXTURE_2D:
mem = m_cs->createMemObject2D(desc->format, desc->size.width, (uint32)desc->size.height, &attribs);
break;
case GSL_MOA_TEXTURE_3D:
mem = m_cs->createMemObject3D(desc->format, desc->size.width,
(uint32)desc->size.height, (uint32)desc->size.depth, &attribs);
break;
case GSL_MOA_TEXTURE_BUFFER:
attribs.type = GSL_MOA_TEXTURE_BUFFER;
mem = m_cs->createMemObject1D(desc->format, desc->size.width, &attribs);
break;
case GSL_MOA_TEXTURE_1D_ARRAY:
mem = m_cs->createMemObject3D(desc->format, desc->size.width,
1, (uint32)desc->size.height, &attribs);
break;
case GSL_MOA_TEXTURE_2D_ARRAY:
mem = m_cs->createMemObject3D(desc->format, desc->size.width,
(uint32)desc->size.height, (uint32)desc->size.depth, &attribs);
break;
default:
break;
}
#ifdef ATI_OS_WIN
if ((desc->section == GSL_SECTION_SVM || desc->section == GSL_SECTION_SVM_ATOMICS) && mem == NULL) {
//svm allocation failure, try one more time after wait.
Wait(m_cs, GSL_SYNC_ATI, m_mapQuery);
mem = m_cs->createMemObject1D(desc->format, desc->size.width, &attribs);
}
#endif
return mem;
}
gslMemObject
CALGSLDevice::resAllocView(gslMemObject res, gslResource3D size, size_t offset, cmSurfFmt format,
gslChannelOrder channelOrder, gslMemObjectAttribType resType, uint32 level, uint32 layer, uint32 flags,
uint64 bytePitch) const
{
assert(m_cs != 0);
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
gslMemObjectAttribs attribs(
GSL_MOA_TEXTURE_2D, // type. Filled in below based on the base type.
GSL_MOA_MEMORY_ALIAS, // location. Filled in below based on the base location.
GSL_MOA_TILING_LINEAR, // tiling. Filled in below based on the flags passed in.
GSL_MOA_DISPLAYABLE_NO, // displayable
ATIGL_FALSE, // mipmap
1, // samples
0, // cpu_address
GSL_MOA_SIGNED_NO, // signed_format
GSL_MOA_FORMAT_DERIVED, // numFormat
DRIVER_MODULE_GLL, // module
GSL_ALLOCATION_INSTANCED // alloc_type
);
attribs.bytePitch = bytePitch;
attribs.section = res->getAttribs().section;
attribs.isAllocSVM = res->getAttribs().isAllocSVM;
attribs.isAllocExecute = res->getAttribs().isAllocExecute;
// Need to get the alignment info from hwl.
// Not sure hwl is correct though. Linear aligned 256b, tiled 8kb according to the address library.
uint32 alignment;
switch (flags & ~CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER)
{
case CAL_RESALLOCSLICEVIEW_LINEAR_ALIGNED:
alignment = 256;
attribs.tiling = GSL_MOA_TILING_LINEAR;
break;
case CAL_RESALLOCSLICEVIEW_LINEAR_UNALIGNED:
alignment = 1;
attribs.tiling = GSL_MOA_TILING_LINEAR_GENERAL;
break;
default:
alignment = 8192;
// GSL asserts if this tiled mode is differnt from the original surface.
// (For example, original is GSL_MOA_TILING_MACRO and the new one is GSL_MOA_TILING_TILED)
// Use the original mode for view allocation.
attribs.tiling = res->getAttribs().tiling;
if (attribs.tiling == GSL_MOA_TILING_LINEAR || attribs.tiling == GSL_MOA_TILING_LINEAR_GENERAL)
{
alignment = 256;
}
break;
};
// Check any alignment restrictions.
uint64 resPitch = res->getPitch();
cmSurfFmt baseFormat = res->getFormat();
uint32 elementSize = cmGetSurfElementSize(static_cast<cmSurfFmt>(baseFormat));
uint64 offsetInBytes = static_cast<uint64>(offset) * elementSize;
if (offsetInBytes % alignment)
{
return 0; //offset doesn't match alignment requirements.
}
if (attribs.bytePitch == (uint64)-1)
{
attribs.bytePitch = resPitch * elementSize;
}
// alias has same location as the base resource.
attribs.type = res->getAttribs().type;
attribs.location = res->getAttribs().location;
attribs.displayable = res->getAttribs().displayable;
attribs.channelOrder = channelOrder;
gslMemObject mo = NULL, levelobject = res;
bool levelLayer = false;
if (flags & CAL_RESALLOCSLICEVIEW_LEVEL)
{
const gsSubImageParam levelParam(level);
levelobject = m_cs->createSubMemObject(res, GSL_LEVEL, levelParam);
attribs.bytePitch = static_cast<size_t>(levelobject->getPitch()) *
(levelobject->getBitsPerElement() / 8);
levelLayer = true;
}
if (flags & CAL_RESALLOCSLICEVIEW_LAYER)
{
const gsSubImageParam layerParam(layer);
mo = m_cs->createSubMemObject(levelobject, GSL_LAYER, layerParam);
if (levelobject != res)
{
m_cs->destroyMemObject(levelobject);
}
levelobject = mo;
levelLayer = true;
}
if (levelLayer) {
// If level/layer object was created, then don't need an extra view
return levelobject;
}
attribs.type = resType;
switch (resType)
{
case GSL_MOA_BUFFER:
mo = m_cs->createOffsetMemObject1D(levelobject, offsetInBytes, format,
size.width, &attribs);
break;
case GSL_MOA_TEXTURE_1D:
mo = m_cs->createOffsetMemObject1D(levelobject, offsetInBytes, format,
size.width, &attribs);
break;
case GSL_MOA_TEXTURE_2D:
mo = m_cs->createOffsetMemObject2D(levelobject, offsetInBytes, format,
size.width, (uint32)size.height, &attribs);
break;
case GSL_MOA_TEXTURE_3D:
mo = m_cs->createOffsetMemObject3D(levelobject, offsetInBytes, format,
size.width, (uint32)size.height, (uint32)size.depth, &attribs);
break;
case GSL_MOA_TEXTURE_BUFFER:
mo = m_cs->createOffsetMemObject1D(levelobject, offsetInBytes, format,
size.width, &attribs);
break;
case GSL_MOA_TEXTURE_1D_ARRAY:
mo = m_cs->createOffsetMemObject3D(levelobject, offsetInBytes, format,
size.width, 1, (uint32)size.height, &attribs);
break;
case GSL_MOA_TEXTURE_2D_ARRAY:
mo = m_cs->createOffsetMemObject3D(levelobject, offsetInBytes, format,
size.width, (uint32)size.height, (uint32)size.depth, &attribs);
break;
default:
break;
}
if (levelobject != res)
{
m_cs->destroyMemObject(levelobject);
}
return mo;
}
struct GSLDeviceMemMap
{
gslMemObject mem;
uint32 flags;
};
void*
CALGSLDevice::resMapLocal(size_t& pitch,
gslMemObject mem,
gslMapAccessType flags)
{
// No map really necessary if IOMMUv2 is being used, return the surface address directly
// as CPU can write to it for Linear tiled surfaces only
if (mem->getAttribs().isAllocSVM && mem->getAttribs().tiling <= GSL_MOA_TILING_LINEAR)
{
return (void*)mem->getImage(0)->surf.addr.getAddress();
}
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
void* pPtr = NULL;
gslMemObjectAttribLocation location = mem->getAttribs().location;
if ((location == GSL_MOA_MEMORY_CARD_LOCKABLE) || !m_allowDMA)
{
// direct lock
if (location == GSL_MOA_MEMORY_CARD_LOCKABLE) {
// Get tiling mode and resolve the aperture settings.
bool useAperture = ResolveAperture(mem->getAttribs().tiling);
pPtr = mem->map(m_cs, GSL_MAP_NOSYNC, GSL_GPU_0, false, useAperture);
}
else
{
pPtr = mem->map(m_cs, flags, GSL_GPU_0, true, false);
}
if (pPtr == NULL)
{
return NULL;
}
// obtain the pitch of the buffer
pitch = static_cast<size_t>(mem->getPitch());
}
else
{
// Allocate map structure for the unmap call
GSLDeviceMemMap* memMap = static_cast<GSLDeviceMemMap*>(malloc(sizeof(GSLDeviceMemMap)));
if (memMap == NULL)
{
return NULL;
}
uint64 width = mem->getRectWidth();
intp height = mem->getRectHeight();
cmSurfFmt format = mem->getFormat();
gslMemObjectAttribType dstType = mem->getAttribs().type;
gslMemObjectAttribs attribsDest(
dstType, // type
GSL_MOA_MEMORY_REMOTE_CACHEABLE, // location
GSL_MOA_TILING_LINEAR, // tiling
GSL_MOA_DISPLAYABLE_NO, // displayable
ATIGL_FALSE, // mipmap
1, // samples
0, // cpu_address
GSL_MOA_SIGNED_NO, // signed_format
GSL_MOA_FORMAT_DERIVED, // numFormat
DRIVER_MODULE_GLL, // module
GSL_ALLOCATION_INSTANCED // alloc_type
);
attribsDest.channelOrder = mem->getAttribs().channelOrder;
// Create the target destination buffer
memMap->mem = m_cs->createMemObject2D(format, width, (uint32)height, &attribsDest);
if (memMap->mem == NULL)
{
attribsDest.location = GSL_MOA_MEMORY_AGP;
memMap->mem = m_cs->createMemObject2D(format, width, (uint32)height, &attribsDest);
if (memMap->mem == NULL)
{
free(memMap);
return NULL;
}
}
// set the pointer to it as the return buffer
pPtr = memMap->mem->map(m_cs, GSL_MAP_NOSYNC, GSL_GPU_0, false, false);
if (pPtr == 0)
{
m_cs->destroyMemObject(memMap->mem);
free(memMap);
return NULL;
}
// obtain the pitch of the temporary buffer
pitch = static_cast<size_t>(memMap->mem->getPitch());
// For write only cases, we don't care about the data
if (flags != GSL_MAP_WRITE_ONLY)
{
uint64 surfaceSize = memMap->mem->getSurfaceSize();
uint64 dstSize = mem->getSurfaceSize();
surfaceSize = (surfaceSize > dstSize) ? dstSize : surfaceSize;
//! @todo Workaround strange GSL/CMM-QS behavior. OCL doesn't require a sync,
//! because resource isn't busy on the CAL device. However without sync there are less CBs available
//! Conformanace multidevice test will create around 60 queues, instead of 70
uint32 mode = (IS_LINUX) ? GSL_SYNCUPLOAD_SYNC_WAIT | GSL_SYNCUPLOAD_SYNC_START : 0;
m_cs->DMACopy(mem, 0, memMap->mem, 0, surfaceSize, mode, NULL);
Wait(m_cs, GSL_DRMDMA_SYNC_ATI, m_mapDMAQuery);
}
m_hack.insert(std::pair<gslMemObject, intp>(mem, (intp) memMap));
memMap->flags = flags;
}
return pPtr;
}
void
CALGSLDevice::resUnmapLocal(gslMemObject mem)
{
// No unmap necessary with IOMMUv2 as map operation directly returned the base surface System VA
// which CPU can write to it for Linear tiled surfaces only
if (mem->getAttribs().isAllocSVM && mem->getAttribs().tiling <= GSL_MOA_TILING_LINEAR)
{
return;
}
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
// Find the pairing
Hack::iterator iter = m_hack.find(mem);
if (iter == m_hack.end())
{
// We didn't find a pair, then it's a direct map
mem->unmap(m_cs);
//! @todo: GSL doesn't wait for CB on unmap,
//! thus the data isn't really avaiable to all engines
if (mem->getAttribs().location != GSL_MOA_MEMORY_CARD_LOCKABLE)
{
Wait(m_cs, GSL_SYNC_ATI, m_mapQuery);
}
return;
}
GSLDeviceMemMap* memMap = static_cast<GSLDeviceMemMap*>(iter->second);
m_hack.erase(iter);
memMap->mem->unmap(m_cs);
if (memMap->flags != GSL_MAP_READ_ONLY)
{
uint64 surfaceSize = memMap->mem->getSurfaceSize();
uint64 dstSize = mem->getSurfaceSize();
surfaceSize = (surfaceSize > dstSize) ? dstSize : surfaceSize;
//! @todo Workaround strange GSL/CMM-QS behavior. OCL doesn't require a sync,
//! because resource isn't busy on the CAL device. However without sync there are less CBs available
//! Conformanace multidevice test will create around 60 queues, instead of 70
uint32 mode = (IS_LINUX) ? GSL_SYNCUPLOAD_SYNC_WAIT | GSL_SYNCUPLOAD_SYNC_START : 0;
m_cs->DMACopy(memMap->mem, 0, mem, 0, surfaceSize, mode, NULL);
Wait(m_cs, GSL_DRMDMA_SYNC_ATI, m_mapDMAQuery);
}
m_cs->destroyMemObject(memMap->mem);
delete memMap;
}
gslMemObject
CALGSLDevice::resGetHeap(size_t size) const
{
assert(m_cs != 0);
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
gslMemObjectAttribs attribs(
GSL_MOA_VIRTUAL_HEAP, // type
GSL_MOA_MEMORY_SYSTEM, // location
GSL_MOA_TILING_LINEAR, // tiling
GSL_MOA_DISPLAYABLE_NO, // displayable
ATIGL_FALSE, // mipmap
1, // samples
0, // cpu_address
GSL_MOA_SIGNED_NO, // signed_format
GSL_MOA_FORMAT_DERIVED, // numFormat
DRIVER_MODULE_GLL, // module
GSL_ALLOCATION_INSTANCED, // alloc_type
0, // channel_order
0 // size of cpu_address
);
gslMemObject rval = m_cs->createMemObject1D(CM_SURF_FMT_R32I, size, &attribs);
return rval;
}
void*
CALGSLDevice::resMapRemote(
size_t& pitch,
gslMemObject mem,
gslMapAccessType flags) const
{
// No map really necessary if IOMMUv2 is being used, return the surface address directly
// as CPU can write to it for Linear tiled surfaces only
if (mem->getAttribs().isAllocSVM && mem->getAttribs().tiling <= GSL_MOA_TILING_LINEAR)
{
return (void*)mem->getImage(0)->surf.addr.getAddress();
}
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
pitch = static_cast<size_t>(mem->getPitch());
return mem->map(m_cs, GSL_MAP_NOSYNC, GSL_GPU_0, false, false);
}
void
CALGSLDevice::resUnmapRemote(gslMemObject mem) const
{
// No unmap necessary with IOMMUv2 as map operation directly returned the base surface System VA
// which CPU can write to it for Linear tiled surfaces only
if (mem->getAttribs().isAllocSVM && mem->getAttribs().tiling <= GSL_MOA_TILING_LINEAR)
{
return;
}
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
mem->unmap(m_cs);
}
#define CPDMA_THRESHOLD 131072
CopyType
CALGSLDevice::GetCopyType(
gslMemObject srcMem,
gslMemObject destMem,
size_t* srcOffset,
size_t* destOffset,
bool allowDMA,
uint32 flags,
size_t size,
bool enableCopyRect) const
{
CopyType type = USE_NONE;
gslMemObjectAttribTiling srcTiling = srcMem->getAttribs().tiling;
gslMemObjectAttribTiling dstTiling = destMem->getAttribs().tiling;
gslMemObjectAttribType srcType = srcMem->getAttribs().type;
gslMemObjectAttribType dstType = destMem->getAttribs().type;
uint64 srcSize = srcMem->getSurfaceSize();
if (size != 0)
{
srcSize = (srcSize > size) ? size : srcSize;
}
// CPDMA isnt possible for anything other than a 1D_TEXURE or a BUFFER as it does a blind blob copy without regards to padding
bool isCPDMApossible = ((srcTiling == GSL_MOA_TILING_LINEAR) || srcTiling == GSL_MOA_TILING_LINEAR_GENERAL) &&
((dstTiling == GSL_MOA_TILING_LINEAR) || dstTiling == GSL_MOA_TILING_LINEAR_GENERAL) &&
(dstType == GSL_MOA_TEXTURE_1D || dstType == GSL_MOA_BUFFER) &&
(srcType == dstType);
if (!allowDMA && !isCPDMApossible)
{
return USE_NONE;
}
//
// Use CPDMA for transfers < 128KB
//
if (isCPDMApossible && (((flags != CAL_MEMCOPY_ASYNC) && (srcSize <= CPDMA_THRESHOLD) && !enableCopyRect) ||
(allowDMA == false)) )
{
type = USE_CPDMA;
}
// ### Check for Particular kind of DRMDMA here
else if (allowDMA &&
(((srcType == GSL_MOA_TEXTURE_2D) && (dstType == GSL_MOA_BUFFER)) ||
((dstType == GSL_MOA_TEXTURE_2D) && (srcType == GSL_MOA_BUFFER))))
{
if ((srcTiling != GSL_MOA_TILING_LINEAR) &&
(dstTiling == GSL_MOA_TILING_LINEAR))
{
intp bppSrc = srcMem->getBitsPerElement();
uint64 BytesPerPixel = bppSrc / 8;
uint64 linearBytePitch = size * BytesPerPixel;
// Make sure linear pitch in bytes is 4 bytes aligned
if (((linearBytePitch % 4) == 0) &&
// another DRM restriciton... SI has 4 pixels
(srcOffset[0] % 4 == 0) &&
(destOffset[0] % 4 == 0))
{
// The sDMA T2L cases we need to avoid are when the tiled_x
// is not a multiple of BytesPerPixel.
if (!m_isSDMAL2TConstrained ||
(srcOffset[0] % BytesPerPixel == 0))
{
type = USE_DRMDMA_T2L;
}
}
}
else if ((srcTiling == GSL_MOA_TILING_LINEAR) &&
(dstTiling != GSL_MOA_TILING_LINEAR))
{
intp bppDst = destMem->getBitsPerElement();
uint64 BytesPerPixel = bppDst / 8;
uint64 linearBytePitch = size * BytesPerPixel;
// Make sure linear pitch in bytes is 4 bytes aligned
if (((linearBytePitch % 4) == 0) &&
// another DRM restriciton... SI has 4 pixels
(destOffset[0] % 4 == 0) &&
(srcOffset[0] % 4 == 0))
{
// The sDMA L2T cases we need to avoid are when the tiled_x
// is not a multiple of BytesPerPixel.
if (!m_isSDMAL2TConstrained ||
(destOffset[0] % BytesPerPixel == 0))
{
type = USE_DRMDMA_L2T;
}
}
}
}
else if (dstType == srcType)
{
type = USE_DRMDMA;
}
return type;
}
uint64
CALGSLDevice::calcScratchBufferSize(uint32 regNum) const
{
gslProgramTargetEnum target = GSL_COMPUTE_PROGRAM;
// Determine the scratch size we need to allocate
cmScratchSpaceNeededPerShaderStage scratchSpacePerShaderStage;
memset(&scratchSpacePerShaderStage, 0, sizeof(scratchSpacePerShaderStage));
uint64 scratchBufferSizes[gslProgramTarget_COUNT];
memset(scratchBufferSizes, 0, sizeof(scratchBufferSizes));
uint32 enabledShadersFlag = 0;
//!@todo should be CM_COMPUTE_SHADER
enabledShadersFlag |= CM_FRAGMENT_SHADER_BIT;
scratchSpacePerShaderStage.scratchSpace[CM_FRAGMENT_SHADER] = regNum;
target = GSL_FRAGMENT_PROGRAM;
m_cs->CalcAllScratchBufferSizes(enabledShadersFlag, scratchSpacePerShaderStage,
scratchBufferSizes);
// SWDEV-79308:
// Reduce the total scratch buffer size by a factor of 4, which in effect reducing the
// max. scratch waves from 32 to 8. This will avoid the required total scratch buffer
// size exceeds the available local memory. (Note: the scratch buffer size needs to
// be 64K alignment)
if (scratchBufferSizes[target] > 0)
{
scratchBufferSizes[target] = (scratchBufferSizes[target] >> 2);
if (scratchBufferSizes[target] == 0) { // assign minimum scratch buffer size of 64K
scratchBufferSizes[target] = 0x10000;
}
}
return scratchBufferSizes[target];
}
static void
CALGSLDevice::convertInputChannelOrder(intp*channelOrder) const
{
// set default to indicate that we don't want to override the channel order.
// set all order to zero to indicate default.
channelSwizzle chanSwiz = {SWIZZLE_ZERO,
SWIZZLE_ZERO,
SWIZZLE_ZERO,
SWIZZLE_ZERO};
switch (*channelOrder) {
case GSL_CHANNEL_ORDER_R:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_ZERO;
chanSwiz.b = SWIZZLE_ZERO;
chanSwiz.a = SWIZZLE_ONE;
break;
case GSL_CHANNEL_ORDER_A:
chanSwiz.r = SWIZZLE_ZERO;
chanSwiz.g = SWIZZLE_ZERO;
chanSwiz.b = SWIZZLE_ZERO;
chanSwiz.a = SWIZZLE_COMPONENT0;
break;
case GSL_CHANNEL_ORDER_RG:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_ZERO;
chanSwiz.a = SWIZZLE_ONE;
break;
case GSL_CHANNEL_ORDER_RA:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_ZERO;
chanSwiz.b = SWIZZLE_ZERO;
chanSwiz.a = SWIZZLE_COMPONENT1;
break;
case GSL_CHANNEL_ORDER_RGB:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT2;
chanSwiz.a = SWIZZLE_ONE;
break;
case GSL_CHANNEL_ORDER_RGBA:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT2;
chanSwiz.a = SWIZZLE_COMPONENT3;
break;
case GSL_CHANNEL_ORDER_ARGB:
chanSwiz.r = SWIZZLE_COMPONENT1;
chanSwiz.g = SWIZZLE_COMPONENT2;
chanSwiz.b = SWIZZLE_COMPONENT3;
chanSwiz.a = SWIZZLE_COMPONENT0;
break;
case GSL_CHANNEL_ORDER_BGRA:
chanSwiz.r = SWIZZLE_COMPONENT2;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT0;
chanSwiz.a = SWIZZLE_COMPONENT3;
break;
case GSL_CHANNEL_ORDER_SRGB:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT2;
chanSwiz.a = SWIZZLE_ONE;
break;
case GSL_CHANNEL_ORDER_SRGBX:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT2;
chanSwiz.a = SWIZZLE_ONE;
break;
case GSL_CHANNEL_ORDER_SRGBA:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT2;
chanSwiz.a = SWIZZLE_COMPONENT3;
break;
case GSL_CHANNEL_ORDER_SBGRA:
chanSwiz.r = SWIZZLE_COMPONENT2;
chanSwiz.g = SWIZZLE_COMPONENT1;
chanSwiz.b = SWIZZLE_COMPONENT0;
chanSwiz.a = SWIZZLE_COMPONENT3;
break;
case GSL_CHANNEL_ORDER_REPLICATE_R:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT0;
chanSwiz.b = SWIZZLE_COMPONENT0;
chanSwiz.a = SWIZZLE_COMPONENT0;
break;
case GSL_CHANNEL_ORDER_INTENSITY:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT0;
chanSwiz.b = SWIZZLE_COMPONENT0;
chanSwiz.a = SWIZZLE_COMPONENT0;
break;
case GSL_CHANNEL_ORDER_LUMINANCE:
chanSwiz.r = SWIZZLE_COMPONENT0;
chanSwiz.g = SWIZZLE_COMPONENT0;
chanSwiz.b = SWIZZLE_COMPONENT0;
chanSwiz.a = SWIZZLE_ONE;
break;
default: assert(0); break;
};
*channelOrder = *(uint32 *)&chanSwiz;
}
void
CALGSLDevice::fillImageHwState(gslMemObject mem, void* hwState, uint32 hwStateSize) const
{
amd::ScopedLock k(gslDeviceOps());
intp channelOrder = mem->getAttribs().channelOrder;
convertInputChannelOrder(&channelOrder);
m_textureResource->updateDepthTextureParam(mem);
m_textureResource->getTextureSrd(m_cs, mem, reinterpret_cast<const char*>(&channelOrder),
hwState, hwStateSize);
}
void
CALGSLDevice::fillSamplerHwState(bool unnorm, uint32 min, uint32 mag, uint32 addr,
float minLod, float maxLod, void* hwState, uint32 hwStateSize) const
{
amd::ScopedLock k(gslDeviceOps());
m_textureSampler->setUnnormalizedMode(m_cs, unnorm);
m_textureSampler->setMinFilter(m_cs, static_cast<gslTexParameterParamMinFilter>(min));
m_textureSampler->setMagFilter(m_cs, static_cast<gslTexParameterParamMagFilter>(mag));
m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_S, static_cast<gslTexParameterParamWrap>(addr));
m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_T, static_cast<gslTexParameterParamWrap>(addr));
m_textureSampler->setWrap(m_cs, GSL_TEXTURE_WRAP_R, static_cast<gslTexParameterParamWrap>(addr));
m_textureSampler->setMinLOD(m_cs, static_cast<float32>(minLod));
m_textureSampler->setMaxLOD(m_cs, static_cast<float32>(maxLod));
m_textureSampler->getSamplerSrd(m_cs, hwState, hwStateSize);
}
bool
CALGSLDevice::gslSetClockMode(GSLClockModeInfo * clockModeInfo)
{
bool result = false;
const void* requestClockInfo = reinterpret_cast<const void*>(clockModeInfo);
uint32 uReturn = m_adp->requestClockModeInfo((void*)requestClockInfo);
if(uReturn == GSL_SETCLOCK_SUCCESS || uReturn == GSL_SETCLOCK_QUERY_ONLY)
{
result = true;
}
return result;
}