P4 to Git Change 1312566 by lmoriche@lmoriche_opencl_dev on 2016/09/08 18:25:02
SWDEV-94610 - Make sure each kernarg segment sits on a different cache line (align the kernargs on cache lines at minimum). Minor misc cleanups. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#13 edit
This commit is contained in:
@@ -817,8 +817,18 @@ Device::populateOCLDeviceConstants()
|
||||
info_.maxSamplers_ = 16;
|
||||
info_.bufferFromImageSupport_ = CL_FALSE;
|
||||
info_.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
|
||||
strcpy(info_.driverVersion_, "1.0 Provisional (hsa)");
|
||||
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
|
||||
|
||||
uint16_t major, minor;
|
||||
if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MAJOR, &major)
|
||||
!= HSA_STATUS_SUCCESS
|
||||
|| hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_VERSION_MINOR, &minor)
|
||||
!= HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << major << "." << minor << " (hsa)";
|
||||
strcpy(info_.driverVersion_, ss.str().c_str());
|
||||
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
|
||||
|
||||
info_.builtInKernels_ = "";
|
||||
info_.linkerAvailable_ = true;
|
||||
|
||||
@@ -689,16 +689,14 @@ Kernel::Kernel(
|
||||
const uint32_t workgroupGroupSegmentByteSize,
|
||||
const uint32_t workitemPrivateSegmentByteSize,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
const uint32_t kernargSegmentAlignment,
|
||||
uint extraArgsNum)
|
||||
const uint32_t kernargSegmentAlignment)
|
||||
: device::Kernel(name),
|
||||
program_(prog),
|
||||
kernelCodeHandle_(kernelCodeHandle),
|
||||
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
|
||||
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
|
||||
kernargSegmentByteSize_(kernargSegmentByteSize),
|
||||
kernargSegmentAlignment_(kernargSegmentAlignment),
|
||||
extraArgumentsNum_(extraArgsNum) {}
|
||||
kernargSegmentAlignment_(kernargSegmentAlignment) {}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
bool Kernel::init_LC()
|
||||
|
||||
@@ -96,8 +96,7 @@ public:
|
||||
const uint32_t workgroupGroupSegmentByteSize,
|
||||
const uint32_t workitemPrivateSegmentByteSize,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
const uint32_t kernargSegmentAlignment,
|
||||
uint extraArgsNum);
|
||||
const uint32_t kernargSegmentAlignment);
|
||||
|
||||
const uint64_t& KernelCodeHandle() {
|
||||
return kernelCodeHandle_;
|
||||
@@ -144,11 +143,6 @@ public:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Max number of possible extra (hidden) kernel arguments
|
||||
static const uint MaxExtraArgumentsNum = 6;
|
||||
|
||||
uint extraArgumentsNum() const { return extraArgumentsNum_; }
|
||||
|
||||
//! Return printf info array
|
||||
const std::vector<PrintfInfo>& printfInfo() const {return printf_;}
|
||||
|
||||
@@ -172,7 +166,6 @@ private:
|
||||
const uint32_t kernargSegmentByteSize_;
|
||||
const uint32_t kernargSegmentAlignment_;
|
||||
size_t kernelDirectiveOffset_;
|
||||
const uint extraArgumentsNum_; // Number of arguments in Kernenv
|
||||
std::vector<PrintfInfo> printf_;
|
||||
};
|
||||
|
||||
|
||||
@@ -1093,9 +1093,6 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
|
||||
return false;
|
||||
}
|
||||
|
||||
// for OpenCL default hidden kernel arguments assuming there is no printf
|
||||
size_t numHiddenKernelArgs = 3; // FIXME_Wilkin
|
||||
|
||||
Kernel *aKernel = new roc::Kernel(
|
||||
kernelName,
|
||||
this,
|
||||
@@ -1105,10 +1102,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
|
||||
// TODO: remove the workaround
|
||||
// add 24 bytes for global offsets as workaround for LC reporting
|
||||
// excluded the hidden arguments
|
||||
amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + numHiddenKernelArgs * sizeof(size_t),
|
||||
kernargSegmentAlignment,
|
||||
numHiddenKernelArgs
|
||||
);
|
||||
amd::alignUp(kernargSegmentByteSize, sizeof(size_t)) + 3 * sizeof(size_t),
|
||||
amd::alignUp(kernargSegmentAlignment,device().info().globalMemCacheLineSize_));
|
||||
if (!aKernel->init()) {
|
||||
return false;
|
||||
}
|
||||
@@ -1450,11 +1445,30 @@ std::string
|
||||
HSAILProgram::hsailOptions(amd::option::Options* options)
|
||||
{
|
||||
std::string hsailOptions;
|
||||
|
||||
//Set options for the standard device specific options
|
||||
|
||||
hsailOptions.append(" -D__AMD__");
|
||||
|
||||
int major, minor;
|
||||
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
||||
hsailOptions.append(ss.str());
|
||||
|
||||
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
||||
hsailOptions.append(" -D__IMAGE_SUPPORT__");
|
||||
}
|
||||
|
||||
//This is just for legacy compiler code
|
||||
// All our devices support these options now
|
||||
hsailOptions.append(" -DFP_FAST_FMAF");
|
||||
hsailOptions.append(" -DFP_FAST_FMA");
|
||||
if (options->oVariables->FastFMA) {
|
||||
hsailOptions.append(" -DFP_FAST_FMA");
|
||||
}
|
||||
if (options->oVariables->FastFMAF) {
|
||||
hsailOptions.append(" -DFP_FAST_FMAF");
|
||||
}
|
||||
|
||||
if (dev().deviceInfo().gfxipVersion_ < 900) {
|
||||
hsailOptions.append(" -cl-denorms-are-zero");
|
||||
|
||||
@@ -519,11 +519,6 @@ VirtualGPU::create(bool profilingEna)
|
||||
}
|
||||
|
||||
device::BlitManager::Setup blitSetup;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
// TODO: Wilkin - remove the setting of value_ after image kernels are available
|
||||
blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
blitMgr_ = new KernelBlitManager(*this, blitSetup);
|
||||
if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) {
|
||||
LogError("Could not create BlitManager!");
|
||||
|
||||
Verwijs in nieuw issue
Block a user