P4 to Git Change 1462147 by asalmanp@asalmanp-ocl-stg on 2017/09/22 10:49:57

SWDEV-132899 - [OCL][GFX10] Add support for GFX10

	Adjusting WaveFrontSize for Null Devices based on the gfxip (the WaveFrontSize is 32 for gfxip10)

	ReviewBoardURL = http://ocltc.amd.com/reviews/r/13486/

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#37 edit


[ROCm/clr commit: cb4585939d]
Этот коммит содержится в:
foreman
2017-09-22 11:05:41 -04:00
родитель 7e076a69ae
Коммит 083b8ee0b4
2 изменённых файлов: 7 добавлений и 7 удалений
+4 -2
Просмотреть файл
@@ -229,6 +229,8 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
// Runtime doesn't know what local size could be on the real board
info_.maxGlobalVariableSize_ = static_cast<size_t>(512 * Mi);
info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64;
return true;
}
@@ -1796,7 +1798,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
uint sb = vgpu->hwRing();
static const uint WaveSizeLimit = ((1 << 21) - 256);
const uint threadSizeLimit =
WaveSizeLimit / properties().gfxipProperties.shaderCore.wavefrontSize;
WaveSizeLimit / info().wavefrontWidth_;
if (regNum > threadSizeLimit) {
LogError("Requested private memory is bigger than HW supports!");
regNum = threadSizeLimit;
@@ -1819,7 +1821,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
uint32_t numTotalCUs = info().maxComputeUnits_;
uint32_t numMaxWaves = settings().numScratchWavesPerCu_ * numTotalCUs;
scratchBuf->size_ =
static_cast<uint64_t>(properties().gfxipProperties.shaderCore.wavefrontSize) *
static_cast<uint64_t>(info().wavefrontWidth_) *
scratchBuf->regNum_ * numMaxWaves * sizeof(uint32_t);
scratchBuf->size_ = std::min(scratchBuf->size_, info().maxMemAllocSize_);
scratchBuf->size_ = std::min(scratchBuf->size_, uint64_t(3 * Gi));
+3 -5
Просмотреть файл
@@ -406,7 +406,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
workGroupInfo_.availableVGPRs_ =
dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
dev().properties().gfxipProperties.shaderCore.wavefrontSize;
dev().info().wavefrontWidth_;
} else {
workGroupInfo_.availableLDSSize_ = 64 * Ki;
workGroupInfo_.availableSGPRs_ = 104;
@@ -652,8 +652,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
}
// Copy wavefront size
workGroupInfo_.wavefrontSize_ =
prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
@@ -1533,8 +1532,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
}
// Copy wavefront size
workGroupInfo_.wavefrontSize_ =
prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *