b9916d35f4
SWDEV-102698 - [OCL-LC-ROCm] Add code caching support to OpenCL program manager Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#146 edit ... //depot/stg/opencl/drivers/opencl/compiler/tools/Makefile#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/build/Makefile.runtime#65 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#205 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#261 edit
120 lines
3.5 KiB
C++
120 lines
3.5 KiB
C++
//
|
|
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#ifndef WITHOUT_GPU_BACKEND
|
|
|
|
#include "top.hpp"
|
|
#include "os/os.hpp"
|
|
#include "device/device.hpp"
|
|
#include "rocsettings.hpp"
|
|
#include "device/rocm/rocglinterop.hpp"
|
|
|
|
namespace roc {
|
|
|
|
Settings::Settings()
|
|
{
|
|
// Initialize the HSA device default settings
|
|
|
|
// Set this to true when we drop the flag
|
|
doublePrecision_ = ::CL_KHR_FP64;
|
|
pollCompletion_ = ENVVAR_HSA_POLL_KERNEL_COMPLETION;
|
|
|
|
enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE;
|
|
enableImageHandle_ = true;
|
|
|
|
maxWorkGroupSize_ = 256;
|
|
maxWorkGroupSize2DX_ = 16;
|
|
maxWorkGroupSize2DY_ = 16;
|
|
maxWorkGroupSize3DX_ = 4;
|
|
maxWorkGroupSize3DY_ = 4;
|
|
maxWorkGroupSize3DZ_ = 4;
|
|
|
|
kernargPoolSize_ = HSA_KERNARG_POOL_SIZE;
|
|
signalPoolSize_ = HSA_SIGNAL_POOL_SIZE;
|
|
|
|
// Determine if user is requesting Non-Coherent mode
|
|
// for system memory. By default system memory is
|
|
// operates or is programmed to be in Coherent mode.
|
|
// Users can turn it off for hardware that does not
|
|
// support this feature naturally
|
|
char *nonCoherentMode = NULL;
|
|
nonCoherentMode = getenv("OPENCL_USE_NC_MEMORY_POLICY");
|
|
enableNCMode_ = (nonCoherentMode)? true : false;
|
|
|
|
// Determine if user wishes to disable support for
|
|
// partial dispatch. By default support for partial
|
|
// dispatch is enabled. Users can turn it off for
|
|
// devices that do not support this feature.
|
|
//
|
|
// @note Update appropriate field of device::Settings
|
|
char *partialDispatch = NULL;
|
|
partialDispatch = getenv("OPENCL_DISABLE_PARTIAL_DISPATCH");
|
|
enablePartialDispatch_ = (partialDispatch) ? false : true;
|
|
partialDispatch_ = (partialDispatch) ? false : true;
|
|
commandQueues_ = 100; //!< Field value set to maximum number
|
|
//!< concurrent Virtual GPUs for ROCm backend
|
|
|
|
// Determine if user is requesting code caching for
|
|
// compiling and linking when using Lightening Compiler
|
|
enableCodeCache_ = OCL_CODE_CACHE_ENABLE;
|
|
|
|
// Determine if user is requesting reset the code cache
|
|
// storage (note that code cache must be enable)
|
|
resetCodeCache_ = OCL_CODE_CACHE_RESET;
|
|
}
|
|
|
|
bool
|
|
Settings::create(bool doublePrecision)
|
|
{
|
|
customHostAllocator_ = true;
|
|
|
|
// Enable extensions
|
|
enableExtension(ClKhrByteAddressableStore);
|
|
enableExtension(ClKhrGlobalInt32BaseAtomics);
|
|
enableExtension(ClKhrGlobalInt32ExtendedAtomics);
|
|
enableExtension(ClKhrLocalInt32BaseAtomics);
|
|
enableExtension(ClKhrLocalInt32ExtendedAtomics);
|
|
enableExtension(ClKhrInt64BaseAtomics);
|
|
enableExtension(ClKhrInt64ExtendedAtomics);
|
|
enableExtension(ClKhr3DImageWrites);
|
|
enableExtension(ClAmdMediaOps);
|
|
enableExtension(ClAmdMediaOps2);
|
|
if(MesaInterop::Supported())
|
|
enableExtension(ClKhrGlSharing);
|
|
|
|
// Make sure device supports doubles
|
|
doublePrecision_ &= doublePrecision;
|
|
|
|
if (doublePrecision_) {
|
|
// Enable KHR double precision extension
|
|
enableExtension(ClKhrFp64);
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
// Also enable AMD double precision extension?
|
|
enableExtension(ClAmdFp64);
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
enableExtension(ClKhrSubGroups);
|
|
|
|
enableExtension(ClKhrDepthImages);
|
|
supportDepthsRGB_ = true;
|
|
|
|
// Override current device settings
|
|
override();
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Settings::override()
|
|
{
|
|
if (!flagIsDefault(GPU_MAX_COMMAND_QUEUES)) {
|
|
commandQueues_ = GPU_MAX_COMMAND_QUEUES;
|
|
}
|
|
}
|
|
|
|
} // namespace roc
|
|
|
|
#endif // WITHOUT_GPU_BACKEND
|