From b602575d442da5d5c20153c8c5400274c34fcc7d Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 8 Sep 2016 19:52:04 -0400 Subject: [PATCH] P4 to Git Change 1312566 by lmoriche@lmoriche_opencl_dev on 2016/09/08 18:25:02 SWDEV-94610 - Make sure each kernarg segment sits on a different cache line (align the kernargs on cache lines at minimum). Minor misc cleanups. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#13 edit [ROCm/clr commit: 3a61b24dd5bc8724f357dc2f0bdd14a135ca59ff] --- .../rocclr/runtime/device/rocm/rocdevice.cpp | 14 ++++++-- .../rocclr/runtime/device/rocm/rockernel.cpp | 6 ++-- .../rocclr/runtime/device/rocm/rockernel.hpp | 9 +----- .../rocclr/runtime/device/rocm/rocprogram.cpp | 32 +++++++++++++------ .../rocclr/runtime/device/rocm/rocvirtual.cpp | 5 --- 5 files changed, 38 insertions(+), 28 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index 60cbb5788b..c01413ba18 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -817,8 +817,18 @@ Device::populateOCLDeviceConstants() info_.maxSamplers_ = 16; info_.bufferFromImageSupport_ = CL_FALSE; info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " "; - strcpy(info_.driverVersion_, "1.0 Provisional (hsa)"); - info_.version_ = "OpenCL " OPENCL_VERSION_STR " "; + + uint16_t major, minor; + if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MAJOR, &major) + != HSA_STATUS_SUCCESS + || hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MINOR, &minor) + != HSA_STATUS_SUCCESS) { + return false; + } + std::stringstream ss; + ss << major << "." << minor << " (hsa)"; + strcpy(info_.driverVersion_, ss.str().c_str()); + info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " "; info_.builtInKernels_ = ""; info_.linkerAvailable_ = true; diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp index f27931308a..b97714e978 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp @@ -689,16 +689,14 @@ Kernel::Kernel( const uint32_t workgroupGroupSegmentByteSize, const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize, - const uint32_t kernargSegmentAlignment, - uint extraArgsNum) + const uint32_t kernargSegmentAlignment) : device::Kernel(name), program_(prog), kernelCodeHandle_(kernelCodeHandle), workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize), workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize), kernargSegmentByteSize_(kernargSegmentByteSize), - kernargSegmentAlignment_(kernargSegmentAlignment), - extraArgumentsNum_(extraArgsNum) {} + kernargSegmentAlignment_(kernargSegmentAlignment) {} #if defined(WITH_LIGHTNING_COMPILER) bool Kernel::init_LC() diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp index 956d3be17e..747187fcc6 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp @@ -96,8 +96,7 @@ public: const uint32_t workgroupGroupSegmentByteSize, const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize, - const uint32_t kernargSegmentAlignment, - uint extraArgsNum); + const uint32_t kernargSegmentAlignment); const uint64_t& KernelCodeHandle() { return kernelCodeHandle_; @@ -144,11 +143,6 @@ public: return NULL; } - //! Max number of possible extra (hidden) kernel arguments - static const uint MaxExtraArgumentsNum = 6; - - uint extraArgumentsNum() const { return extraArgumentsNum_; } - //! Return printf info array const std::vector& printfInfo() const {return printf_;} @@ -172,7 +166,6 @@ private: const uint32_t kernargSegmentByteSize_; const uint32_t kernargSegmentAlignment_; size_t kernelDirectiveOffset_; - const uint extraArgumentsNum_; // Number of arguments in Kernenv std::vector printf_; }; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp index 66fae6e6d0..2ac2b9ee00 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp @@ -1093,9 +1093,6 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options) return false; } - // for OpenCL default hidden kernel arguments assuming there is no printf - size_t numHiddenKernelArgs = 3; // FIXME_Wilkin - Kernel *aKernel = new roc::Kernel( kernelName, this, @@ -1105,10 +1102,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options) // TODO: remove the workaround // add 24 bytes for global offsets as workaround for LC reporting // excluded the hidden arguments - amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + numHiddenKernelArgs * sizeof(size_t), - kernargSegmentAlignment, - numHiddenKernelArgs - ); + amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + 3 * sizeof(size_t), + amd::alignUp(kernargSegmentAlignment,device().info().globalMemCacheLineSize_)); if (!aKernel->init()) { return false; } @@ -1450,11 +1445,30 @@ std::string HSAILProgram::hsailOptions(amd::option::Options* options) { std::string hsailOptions; + //Set options for the standard device specific options + + hsailOptions.append(" -D__AMD__"); + + int major, minor; + ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor); + + std::stringstream ss; + ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10); + hsailOptions.append(ss.str()); + + if (device().info().imageSupport_ && options->oVariables->ImageSupport) { + hsailOptions.append(" -D__IMAGE_SUPPORT__"); + } + //This is just for legacy compiler code // All our devices support these options now - hsailOptions.append(" -DFP_FAST_FMAF"); - hsailOptions.append(" -DFP_FAST_FMA"); + if (options->oVariables->FastFMA) { + hsailOptions.append(" -DFP_FAST_FMA"); + } + if (options->oVariables->FastFMAF) { + hsailOptions.append(" -DFP_FAST_FMAF"); + } if (dev().deviceInfo().gfxipVersion_ < 900) { hsailOptions.append(" -cl-denorms-are-zero"); diff --git a/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp index 620509386e..b5473a5589 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocvirtual.cpp @@ -519,11 +519,6 @@ VirtualGPU::create(bool profilingEna) } device::BlitManager::Setup blitSetup; - #if defined(WITH_LIGHTNING_COMPILER) - // TODO: Wilkin - remove the setting of value_ after image kernels are available - blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now -#endif // defined(WITH_LIGHTNING_COMPILER) - blitMgr_ = new KernelBlitManager(*this, blitSetup); if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) { LogError("Could not create BlitManager!");