From 3a61b24dd5bc8724f357dc2f0bdd14a135ca59ff Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 8 Sep 2016 19:52:04 -0400
Subject: [PATCH] P4 to Git Change 1312566 by lmoriche@lmoriche_opencl_dev on
2016/09/08 18:25:02
SWDEV-94610 - Make sure each kernarg segment sits on a different cache line (align the kernargs on cache lines at minimum). Minor misc cleanups.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#13 edit
---
rocclr/runtime/device/rocm/rocdevice.cpp | 14 ++++++++--
rocclr/runtime/device/rocm/rockernel.cpp | 6 ++---
rocclr/runtime/device/rocm/rockernel.hpp | 9 +------
rocclr/runtime/device/rocm/rocprogram.cpp | 32 ++++++++++++++++-------
rocclr/runtime/device/rocm/rocvirtual.cpp | 5 ----
5 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp
index 60cbb5788b..c01413ba18 100644
--- a/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -817,8 +817,18 @@ Device::populateOCLDeviceConstants()
info_.maxSamplers_ = 16;
info_.bufferFromImageSupport_ = CL_FALSE;
info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
- strcpy(info_.driverVersion_, "1.0 Provisional (hsa)");
- info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
+
+ uint16_t major, minor;
+ if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MAJOR, &major)
+ != HSA_STATUS_SUCCESS
+ || hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MINOR, &minor)
+ != HSA_STATUS_SUCCESS) {
+ return false;
+ }
+ std::stringstream ss;
+ ss << major << "." << minor << " (hsa)";
+ strcpy(info_.driverVersion_, ss.str().c_str());
+ info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
info_.builtInKernels_ = "";
info_.linkerAvailable_ = true;
diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp
index f27931308a..b97714e978 100644
--- a/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/rocclr/runtime/device/rocm/rockernel.cpp
@@ -689,16 +689,14 @@ Kernel::Kernel(
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize,
const uint32_t kernargSegmentByteSize,
- const uint32_t kernargSegmentAlignment,
- uint extraArgsNum)
+ const uint32_t kernargSegmentAlignment)
: device::Kernel(name),
program_(prog),
kernelCodeHandle_(kernelCodeHandle),
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
kernargSegmentByteSize_(kernargSegmentByteSize),
- kernargSegmentAlignment_(kernargSegmentAlignment),
- extraArgumentsNum_(extraArgsNum) {}
+ kernargSegmentAlignment_(kernargSegmentAlignment) {}
#if defined(WITH_LIGHTNING_COMPILER)
bool Kernel::init_LC()
diff --git a/rocclr/runtime/device/rocm/rockernel.hpp b/rocclr/runtime/device/rocm/rockernel.hpp
index 956d3be17e..747187fcc6 100644
--- a/rocclr/runtime/device/rocm/rockernel.hpp
+++ b/rocclr/runtime/device/rocm/rockernel.hpp
@@ -96,8 +96,7 @@ public:
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize,
const uint32_t kernargSegmentByteSize,
- const uint32_t kernargSegmentAlignment,
- uint extraArgsNum);
+ const uint32_t kernargSegmentAlignment);
const uint64_t& KernelCodeHandle() {
return kernelCodeHandle_;
@@ -144,11 +143,6 @@ public:
return NULL;
}
- //! Max number of possible extra (hidden) kernel arguments
- static const uint MaxExtraArgumentsNum = 6;
-
- uint extraArgumentsNum() const { return extraArgumentsNum_; }
-
//! Return printf info array
const std::vector& printfInfo() const {return printf_;}
@@ -172,7 +166,6 @@ private:
const uint32_t kernargSegmentByteSize_;
const uint32_t kernargSegmentAlignment_;
size_t kernelDirectiveOffset_;
- const uint extraArgumentsNum_; // Number of arguments in Kernenv
std::vector printf_;
};
diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp
index 66fae6e6d0..2ac2b9ee00 100644
--- a/rocclr/runtime/device/rocm/rocprogram.cpp
+++ b/rocclr/runtime/device/rocm/rocprogram.cpp
@@ -1093,9 +1093,6 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
return false;
}
- // for OpenCL default hidden kernel arguments assuming there is no printf
- size_t numHiddenKernelArgs = 3; // FIXME_Wilkin
-
Kernel *aKernel = new roc::Kernel(
kernelName,
this,
@@ -1105,10 +1102,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
// TODO: remove the workaround
// add 24 bytes for global offsets as workaround for LC reporting
// excluded the hidden arguments
- amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + numHiddenKernelArgs * sizeof(size_t),
- kernargSegmentAlignment,
- numHiddenKernelArgs
- );
+ amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + 3 * sizeof(size_t),
+ amd::alignUp(kernargSegmentAlignment,device().info().globalMemCacheLineSize_));
if (!aKernel->init()) {
return false;
}
@@ -1450,11 +1445,30 @@ std::string
HSAILProgram::hsailOptions(amd::option::Options* options)
{
std::string hsailOptions;
+
//Set options for the standard device specific options
+
+ hsailOptions.append(" -D__AMD__");
+
+ int major, minor;
+ ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
+
+ std::stringstream ss;
+ ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
+ hsailOptions.append(ss.str());
+
+ if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
+ hsailOptions.append(" -D__IMAGE_SUPPORT__");
+ }
+
//This is just for legacy compiler code
// All our devices support these options now
- hsailOptions.append(" -DFP_FAST_FMAF");
- hsailOptions.append(" -DFP_FAST_FMA");
+ if (options->oVariables->FastFMA) {
+ hsailOptions.append(" -DFP_FAST_FMA");
+ }
+ if (options->oVariables->FastFMAF) {
+ hsailOptions.append(" -DFP_FAST_FMAF");
+ }
if (dev().deviceInfo().gfxipVersion_ < 900) {
hsailOptions.append(" -cl-denorms-are-zero");
diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp
index 620509386e..b5473a5589 100644
--- a/rocclr/runtime/device/rocm/rocvirtual.cpp
+++ b/rocclr/runtime/device/rocm/rocvirtual.cpp
@@ -519,11 +519,6 @@ VirtualGPU::create(bool profilingEna)
}
device::BlitManager::Setup blitSetup;
- #if defined(WITH_LIGHTNING_COMPILER)
- // TODO: Wilkin - remove the setting of value_ after image kernels are available
- blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
blitMgr_ = new KernelBlitManager(*this, blitSetup);
if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) {
LogError("Could not create BlitManager!");