From 083b8ee0b41c074875e18cb37a03386edca2ab11 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 22 Sep 2017 11:05:41 -0400 Subject: [PATCH] P4 to Git Change 1462147 by asalmanp@asalmanp-ocl-stg on 2017/09/22 10:49:57 SWDEV-132899 - [OCL][GFX10] Add support for GFX10 Adjusting WaveFrontSize for Null Devices based on the gfxip (the WaveFrontSize is 32 for gfxip10) ReviewBoardURL = http://ocltc.amd.com/reviews/r/13486/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#63 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#37 edit [ROCm/clr commit: cb4585939dd4c104ca0f494dceb29071b98fe25a] --- projects/clr/rocclr/runtime/device/pal/paldevice.cpp | 6 ++++-- projects/clr/rocclr/runtime/device/pal/palkernel.cpp | 8 +++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index 768c25bec0..2e448255d9 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -229,6 +229,8 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, // Runtime doesn't know what local size could be on the real board info_.maxGlobalVariableSize_ = static_cast(512 * Mi); + info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64; + return true; } @@ -1796,7 +1798,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) { uint sb = vgpu->hwRing(); static const uint WaveSizeLimit = ((1 << 21) - 256); const uint threadSizeLimit = - WaveSizeLimit / properties().gfxipProperties.shaderCore.wavefrontSize; + WaveSizeLimit / info().wavefrontWidth_; if (regNum > threadSizeLimit) { LogError("Requested private memory is bigger than HW supports!"); regNum = threadSizeLimit; @@ -1819,7 +1821,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) { uint32_t numTotalCUs = info().maxComputeUnits_; uint32_t numMaxWaves = settings().numScratchWavesPerCu_ * numTotalCUs; scratchBuf->size_ = - static_cast(properties().gfxipProperties.shaderCore.wavefrontSize) * + static_cast(info().wavefrontWidth_) * scratchBuf->regNum_ * numMaxWaves * sizeof(uint32_t); scratchBuf->size_ = std::min(scratchBuf->size_, info().maxMemAllocSize_); scratchBuf->size_ = std::min(scratchBuf->size_, uint64_t(3 * Gi)); diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index 526e5199a0..a99b9bb93f 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -406,7 +406,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) { workGroupInfo_.availableVGPRs_ = dev().properties().gfxipProperties.shaderCore.numAvailableVgprs; workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ = - dev().properties().gfxipProperties.shaderCore.wavefrontSize; + dev().info().wavefrontWidth_; } else { workGroupInfo_.availableLDSSize_ = 64 * Ki; workGroupInfo_.availableSGPRs_ = 104; @@ -652,8 +652,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { } // Copy wavefront size - workGroupInfo_.wavefrontSize_ = - prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize; + workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_; // Find total workgroup size if (workGroupInfo_.compileSize_[0] != 0) { workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] * @@ -1533,8 +1532,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { } // Copy wavefront size - workGroupInfo_.wavefrontSize_ = - prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize; + workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_; // Find total workgroup size if (workGroupInfo_.compileSize_[0] != 0) { workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *