From 4ea764d48569e333d40b5bad4a5d379620e4ea09 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 1 Aug 2014 14:32:36 -0400 Subject: [PATCH] P4 to Git Change 1062136 by jatang@jatang-opencl-hsa-stg2 on 2014/08/01 14:19:58 EPR #402950 - Properly align the scratch buffer to 64K. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#452 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#189 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/include/cal/calcl.h#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#95 edit [ROCm/clr commit: 268ff9830e3ae55324215c16c1652aa3220de92c] --- projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp | 5 ++--- projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp | 8 +++++++- .../runtime/device/gpu/gslbe/src/include/cal/calcl.h | 1 + .../rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 1d088139e2..7bd01093fc 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -2289,9 +2289,8 @@ Device::allocScratch(uint regNum, const VirtualGPU* vgpu) ScopedLockVgpus lock(*this); std::vector& mems = scratch_[s]->memObjs_; - // Calculate the size of the new buffer + - // (64 Ki) for alignment with generic address space - size_t size = calcScratchBufferSize(regNum) + 64 * Ki; + // Calculate the size of the new buffer + size_t size = calcScratchBufferSize(regNum); scratch_[s]->destroyMemory(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp index 6de10e50d1..ad30cf6862 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuresource.cpp @@ -346,6 +346,7 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap) bool useRowPitch = false; desc.vaBase = 0; + desc.minAlignment = 0; desc.section = GSL_SECTION_REGULAR; if (NULL != params && NULL != params->owner_) { //make sure params not NULL mcaddr svmPtr = reinterpret_cast(params->owner_->getSvmPtr()); @@ -453,7 +454,12 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap) // Check resource cache first for an appropriate resource gslRef_ = dev().resourceCache().findCalResource(&cal_); if (memType == Scratch) { - desc.vaBase = static_cast(0x100000000ULL); + if ((dev().settings().hsail_) || (dev().settings().oclVersion_ >= OpenCL20)) { + desc.minAlignment = 64 * Ki; + } + else { + desc.vaBase = static_cast(0x100000000ULL); + } } else if ((gslRef_ != NULL) && (!dev().settings().use64BitPtr_)) { // Make sure runtime didn't pick a resource with > 4GB address diff --git a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/include/cal/calcl.h b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/include/cal/calcl.h index 07f0080383..59dd8e1b92 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/include/cal/calcl.h +++ b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/include/cal/calcl.h @@ -145,6 +145,7 @@ typedef struct CALresourceDescRec { CALuint64 busAddress[2]; mcaddr vaBase; gslMemObjectAttribSection section; + CALuint minAlignment; } CALresourceDesc; typedef enum CALresallocsliceviewflagsRec { diff --git a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp index e5ae58c975..5515dbe0ee 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp @@ -741,6 +741,7 @@ CALGSLDevice::resAlloc(const CALresourceDesc* desc) const attribs.location = desc->type; attribs.vaBase = desc->vaBase; attribs.section = desc->section; + attribs.minAlignment = desc->minAlignment; //!@note GSL asserts with tiled 1D images of any type. if ((desc->dimension == GSL_MOA_BUFFER) ||