P4 to Git Change 1462147 by asalmanp@asalmanp-ocl-stg on 2017/09/22 10:49:57
SWDEV-132899 - [OCL][GFX10] Add support for GFX10
Adjusting WaveFrontSize for Null Devices based on the gfxip (the WaveFrontSize is 32 for gfxip10)
ReviewBoardURL = http://ocltc.amd.com/reviews/r/13486/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#37 edit
[ROCm/clr commit: cb4585939d]
Этот коммит содержится в:
@@ -229,6 +229,8 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
|
||||
// Runtime doesn't know what local size could be on the real board
|
||||
info_.maxGlobalVariableSize_ = static_cast<size_t>(512 * Mi);
|
||||
|
||||
info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1796,7 +1798,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
|
||||
uint sb = vgpu->hwRing();
|
||||
static const uint WaveSizeLimit = ((1 << 21) - 256);
|
||||
const uint threadSizeLimit =
|
||||
WaveSizeLimit / properties().gfxipProperties.shaderCore.wavefrontSize;
|
||||
WaveSizeLimit / info().wavefrontWidth_;
|
||||
if (regNum > threadSizeLimit) {
|
||||
LogError("Requested private memory is bigger than HW supports!");
|
||||
regNum = threadSizeLimit;
|
||||
@@ -1819,7 +1821,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
|
||||
uint32_t numTotalCUs = info().maxComputeUnits_;
|
||||
uint32_t numMaxWaves = settings().numScratchWavesPerCu_ * numTotalCUs;
|
||||
scratchBuf->size_ =
|
||||
static_cast<uint64_t>(properties().gfxipProperties.shaderCore.wavefrontSize) *
|
||||
static_cast<uint64_t>(info().wavefrontWidth_) *
|
||||
scratchBuf->regNum_ * numMaxWaves * sizeof(uint32_t);
|
||||
scratchBuf->size_ = std::min(scratchBuf->size_, info().maxMemAllocSize_);
|
||||
scratchBuf->size_ = std::min(scratchBuf->size_, uint64_t(3 * Gi));
|
||||
|
||||
@@ -406,7 +406,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
|
||||
workGroupInfo_.availableVGPRs_ =
|
||||
dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
|
||||
workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
|
||||
dev().properties().gfxipProperties.shaderCore.wavefrontSize;
|
||||
dev().info().wavefrontWidth_;
|
||||
} else {
|
||||
workGroupInfo_.availableLDSSize_ = 64 * Ki;
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
@@ -652,8 +652,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
|
||||
}
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ =
|
||||
prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
|
||||
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
|
||||
// Find total workgroup size
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
|
||||
@@ -1533,8 +1532,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
|
||||
}
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ =
|
||||
prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
|
||||
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
|
||||
// Find total workgroup size
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
|
||||
|
||||
Ссылка в новой задаче
Block a user