51f3eeb8f2
EPR #410989 - [Project Brahma] - Report GPU devices as 1.2 until 2.0 is supported on Brahma stack. 1. Remove BRAHMA related definition from opencldefs since they are not needed. 2. Add the definition BRAHMA to runtimedefs since brahma stack is only for runtime/gpu 3. Report GPU devices as 1.2 until 2.0 is supported on Brahma stack. Affected files ... ... //depot/stg/opencl/drivers/opencl/opencldefs#157 edit ... //depot/stg/opencl/drivers/opencl/runtime/runtimedefs#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#227 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#6 edit
269 строки
16 KiB
C++
269 строки
16 KiB
C++
//
|
|
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#ifndef FLAGS_HPP_
|
|
#define FLAGS_HPP_
|
|
|
|
|
|
#define RUNTIME_FLAGS(debug,release,release_on_stg) \
|
|
\
|
|
debug(int, LOG_LEVEL, 0, \
|
|
"The default log level") \
|
|
debug(bool, BREAK_ON_LOG_WARNING, false, \
|
|
"Break each time an error is logged") \
|
|
debug(bool, BREAK_ON_LOG_ERROR, false, \
|
|
"Break each time an error is logged") \
|
|
debug(uint, DEBUG_GPU_FLAGS, 0, \
|
|
"The debug options for GPU device") \
|
|
debug(uint, GPU_MAX_COMMAND_QUEUES, 70, \
|
|
"The maximum number of concurrent Virtual GPUs") \
|
|
release(size_t, CQ_THREAD_STACK_SIZE, 256*Ki, /* @todo: that much! */ \
|
|
"The default command queue thread stack size") \
|
|
release(size_t, CPU_WORKER_THREAD_STACK_SIZE, 64*Ki, \
|
|
"The default CPU worker thread stack size") \
|
|
release(int, CPU_MAX_COMPUTE_UNITS, -1, \
|
|
"Override the number of computation units per CPU device") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE, 0, \
|
|
"Maximum number of workitems in a workgroup for GPU, 0 -use default") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE_2D_X, 0, \
|
|
"Maximum number of workitems in a 2D workgroup for GPU, x component, 0 -use default") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE_2D_Y, 0, \
|
|
"Maximum number of workitems in a 2D workgroup for GPU, y component, 0 -use default") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE_3D_X, 0, \
|
|
"Maximum number of workitems in a 3D workgroup for GPU, x component, 0 -use default") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE_3D_Y, 0, \
|
|
"Maximum number of workitems in a 3D workgroup for GPU, y component, 0 -use default") \
|
|
release(int, GPU_MAX_WORKGROUP_SIZE_3D_Z, 0, \
|
|
"Maximum number of workitems in a 3D workgroup for GPU, z component, 0 -use default") \
|
|
release(int, CPU_MAX_WORKGROUP_SIZE, 1024, \
|
|
"Maximum number of workitems in a workgroup for CPU") \
|
|
debug(bool, CPU_MEMORY_GUARD_PAGES, false, \
|
|
"Use guard pages for CPU memory") \
|
|
debug(size_t, CPU_MEMORY_GUARD_PAGE_SIZE, 64, \
|
|
"Size in KB of CPU memory guard page") \
|
|
debug(size_t, CPU_MEMORY_ALIGNMENT_SIZE, 256, \
|
|
"Size in bytes for the default alignment for guarded memory on CPU") \
|
|
debug(size_t, PARAMETERS_MIN_ALIGNMENT, 16, \
|
|
"Minimum alignment required for the abstract parameters stack") \
|
|
debug(size_t, MEMOBJ_BASE_ADDR_ALIGN, 4*Ki, \
|
|
"Alignment of the base address of any allocate memory object") \
|
|
release(cstring, GPU_DEVICE_NAME, "", \
|
|
"Select the device ordinal (will only report a single device)") \
|
|
release(cstring, GPU_DEVICE_ORDINAL, "", \
|
|
"Select the device ordinal (comma seperated list of available devices)") \
|
|
release(bool, REMOTE_ALLOC, false, \
|
|
"Use remote memory for the global heap allocation") \
|
|
release(int, GPU_INITIAL_HEAP_SIZE, 16, \
|
|
"Initial size of the GPU heap in MiB") \
|
|
release(uint, GPU_MAX_HEAP_SIZE, 100, \
|
|
"Set maximum size of the GPU heap to % of board memory") \
|
|
release(int, GPU_HEAP_GROWTH_INCREMENT, 8, \
|
|
"Amount to grow the GPU heap by in MiB") \
|
|
release(uint, GPU_STAGING_BUFFER_SIZE, 512, \
|
|
"Size of the GPU staging buffer in KiB") \
|
|
release(bool, GPU_DUMP_BLIT_KERNELS, false, \
|
|
"Dump the kernels for blit manager") \
|
|
release(uint, GPU_BLIT_ENGINE_TYPE, 0x0, \
|
|
"Blit engine type: 0 - Default, 1 - Host, 2 - CAL, 3 - Kernel") \
|
|
release(bool, GPU_FLUSH_ON_EXECUTION, false, \
|
|
"Submit commands to HW on every operation. 0 - Disable, 1 - Enable") \
|
|
release(bool, GPU_USE_SYNC_OBJECTS, true, \
|
|
"If enabled, use sync objects instead of polling") \
|
|
release(bool, ENABLE_CAL_SHUTDOWN, false, \
|
|
"Enable explicit CAL shutdown (for PM4 capture)") \
|
|
release(bool, CL_KHR_FP64, true, \
|
|
"Enable/Disable support for double precision") \
|
|
release(uint, GPU_OPEN_VIDEO, 0, \
|
|
"Non-zero value allows to report Open Video extension on GPU") \
|
|
release(cstring, AMD_OCL_BUILD_OPTIONS, 0, \
|
|
"Set clBuildProgram() and clCompileProgram()'s options (override)") \
|
|
release(cstring, AMD_OCL_BUILD_OPTIONS_APPEND, 0, \
|
|
"Append clBuildProgram() and clCompileProgram()'s options") \
|
|
release(cstring, AMD_OCL_LINK_OPTIONS, 0, \
|
|
"Set clLinkProgram()'s options (override)") \
|
|
release(cstring, AMD_OCL_LINK_OPTIONS_APPEND, 0, \
|
|
"Append clLinkProgram()'s options") \
|
|
release(cstring, AMD_OCL_SC_LIB, 0, \
|
|
"Set shader compiler shared library name or path") \
|
|
debug(bool, AMD_OCL_SUPPRESS_MESSAGE_BOX, false, \
|
|
"Suppress the error dialog on Windows") \
|
|
debug(bool, OCL_STRESS_BINARY_IMAGE, false, \
|
|
"Exercise the binary image producer and consumer") \
|
|
release(cstring, GPU_PRE_RA_SCHED, "default", \
|
|
"Allows setting of alternate pre-RA-sched") \
|
|
release(size_t, GPU_PINNED_XFER_SIZE, 16, \
|
|
"The pinned buffer size for pinning in read/write transfers") \
|
|
release(size_t, GPU_PINNED_MIN_XFER_SIZE, 512, \
|
|
"The minimal buffer size for pinned read/write transfers in KBytes") \
|
|
release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \
|
|
"The resource cache size in MB") \
|
|
release(uint, GPU_ASYNC_MEM_COPY, 0, \
|
|
"Enables async memory transfers with DRM engine") \
|
|
release(bool, GPU_FORCE_64BIT_PTR, 0, \
|
|
"Forces 64 bit pointers on GPU") \
|
|
release(bool, GPU_FORCE_OCL20_32BIT, 0, \
|
|
"Forces 32 bit apps to take CLANG\HSAIL path") \
|
|
release(bool, GPU_RAW_TIMESTAMP, 0, \
|
|
"Reports GPU raw timestamps in GPU timeline") \
|
|
release(bool, CPU_IMAGE_SUPPORT, true, \
|
|
"Turn on image support on the CPU device") \
|
|
release(bool, GPU_PARTIAL_DISPATCH, true, \
|
|
"Enables partial dispatch on GPU") \
|
|
release(size_t, GPU_NUM_MEM_DEPENDENCY, 256, \
|
|
"Number of memory objects for dependency tracking") \
|
|
release(size_t, GPU_XFER_BUFFER_SIZE, 0, \
|
|
"Transfer buffer size for image copy optimization in KB") \
|
|
release(bool, GPU_IMAGE_DMA, true, \
|
|
"Enable DRM DMA for image transfers") \
|
|
release(uint, CPU_MAX_ALLOC_PERCENT, 25, \
|
|
"Maximum size of a single allocation in MiB") \
|
|
release(uint, GPU_SINGLE_ALLOC_PERCENT, 75, \
|
|
"Maximum size of a single allocation as percentage of total") \
|
|
release(uint, GPU_NUM_COMPUTE_RINGS, 2, \
|
|
"GPU number of compute rings. 0 - disabled, 1 , 2,.. - the number of compute rings") \
|
|
release(int, GPU_SELECT_COMPUTE_RINGS_ID, -1, \
|
|
"GPU select the compute rings ID -1 - disabled, 0 , 1,.. - the forced compute rings ID for submission") \
|
|
release_on_stg(bool, C1X_ATOMICS, !IS_MAINLINE, \
|
|
"Runtime will report c1x atomics support") \
|
|
release(uint, GPU_WORKLOAD_SPLIT, 22, \
|
|
"Workload split size") \
|
|
release(bool, GPU_USE_SINGLE_SCRATCH, false, \
|
|
"Use single scratch buffer per device instead of per HW ring") \
|
|
release_on_stg(cstring, GPU_TARGET_INFO_ARCH, "amdil", \
|
|
"Select the GPU TargetInfo arch (amdil|hsail)") \
|
|
release(bool, HSA_RUNTIME, 0, \
|
|
"1 = Enable HSA Runtime, any other value or absence disables it.") \
|
|
release(bool, AMD_OCL_WAIT_COMMAND, false, \
|
|
"1 = Enable a wait for every submitted command") \
|
|
debug(bool, AMD_OCL_DEBUG_LINKER, false, \
|
|
"Enable debug output in linker") \
|
|
debug(bool, GPU_SPLIT_LIB, true, \
|
|
"Enable splitting GPU 32/64 bit library") \
|
|
release(bool, GPU_STAGING_WRITE_PERSISTENT, false, \
|
|
"Enable Persistent writes") \
|
|
release(bool, DRMDMA_FOR_LNX_CF, false, \
|
|
"Enable DRMDMA for Linux CrossFire") \
|
|
release(bool, GPU_HSAIL_ENABLE, false, \
|
|
"Enable HSAIL on dGPU stack (requires CI+ HW)") \
|
|
release(bool, GPU_ASSUME_ALIASES, false, \
|
|
"Assume memory aliases in the compilation process") \
|
|
release(uint, GPU_PRINT_CHILD_KERNEL, 0, \
|
|
"Prints the specified number of the child kernels") \
|
|
release(bool, GPU_DIRECT_SRD, false, \
|
|
"Use indirect SRD access in HSAIL") \
|
|
release(bool, GPU_USE_DEVICE_QUEUE, false, \
|
|
"Use a dedicated device queue for the actual submissions") \
|
|
release(bool, GPU_ENABLE_LARGE_ALLOCATION, false, \
|
|
"Enable >4GB single allocations") \
|
|
release(bool, AMD_THREAD_TRACE_ENABLE, !IS_MAINLINE, \
|
|
"Enable thread trace extension") \
|
|
release(uint, OPENCL_VERSION, (IS_BRAHMA ? 120 : 200), \
|
|
"Force GPU opencl verison") \
|
|
release(uint, CPU_OPENCL_VERSION, 120, \
|
|
"Force CPU opencl verison") \
|
|
release(bool, ENVVAR_HSA_POLL_KERNEL_COMPLETION, false, \
|
|
"Determines if Hsa runtime should use polling scheme") \
|
|
release(bool, HSA_LOCAL_MEMORY_ENABLE, false, \
|
|
"Enable HSA device local memory usage") \
|
|
release(bool, HSA_ENABLE_ATOMICS_32B, false, \
|
|
"1 = Enable SVM atomics in 32 bits (HSA backend-only). Any other value keeps then disabled.") \
|
|
release(bool, GPU_IFH_MODE, false, \
|
|
"1 = Enable GPU IFH (infinitely fast hardware) mode. Any other value keeps setting disabled.") \
|
|
release(bool, GPU_MIPMAP, false, \
|
|
"Enables GPU mipmap extension") \
|
|
debug(bool, GPU_FORCE_SINGLE_FP_DENORM, false, \
|
|
"Forces reporting CL_FP_DENORM bit for single precision") \
|
|
debug(bool, OCL_FORCE_CPU_SVM, false, \
|
|
"force svm support for CPU") \
|
|
debug(bool, GPU_ENABLE_HW_DEBUG, false, \
|
|
"Enable HW DEBUG for GPU")
|
|
|
|
|
|
|
|
namespace amd {
|
|
|
|
//! \addtogroup Utils
|
|
// @{
|
|
|
|
struct Flag
|
|
{
|
|
enum Type
|
|
{
|
|
Tinvalid = 0,
|
|
Tbool, //!< A boolean type flag (true, false).
|
|
Tint, //!< An integer type flag (signed).
|
|
Tuint, //!< An integer type flag (unsigned).
|
|
Tsize_t, //!< A size_t type flag.
|
|
Tcstring //!< A string type flag.
|
|
};
|
|
|
|
#define DEFINE_FLAG_NAME(type, name, value, help) k##name,
|
|
enum Name
|
|
{
|
|
RUNTIME_FLAGS(DEFINE_FLAG_NAME, DEFINE_FLAG_NAME, DEFINE_FLAG_NAME)
|
|
numFlags_
|
|
};
|
|
#undef DEFINE_FLAG_NAME
|
|
|
|
#define CAN_SET(type, name, v, h) static const bool cannotSet##name = false;
|
|
#define CANNOT_SET(type, name, v, h) static const bool cannotSet##name = true;
|
|
|
|
#ifdef DEBUG
|
|
RUNTIME_FLAGS(CAN_SET, CAN_SET, CAN_SET)
|
|
#else // !DEBUG
|
|
RUNTIME_FLAGS(CANNOT_SET, CAN_SET, CANNOT_SET)
|
|
#endif // !DEBUG
|
|
|
|
#undef CAN_SET
|
|
#undef CANNOT_SET
|
|
|
|
private:
|
|
|
|
static Flag flags_[];
|
|
|
|
public:
|
|
static char* envstr_;
|
|
const char* name_;
|
|
const void* value_;
|
|
Type type_;
|
|
bool isDefault_;
|
|
|
|
public:
|
|
|
|
static bool init();
|
|
|
|
static void tearDown();
|
|
|
|
bool setValue(const char* value);
|
|
|
|
static bool isDefault(Name name) { return flags_[name].isDefault_; }
|
|
};
|
|
|
|
#define flagIsDefault(name) \
|
|
(amd::Flag::cannotSet##name || amd::Flag::isDefault(amd::Flag::k##name))
|
|
|
|
// @}
|
|
|
|
} // namespace amd
|
|
|
|
#ifdef _WIN32
|
|
# define EXPORT_FLAG extern "C" __declspec(dllexport)
|
|
#else // !_WIN32
|
|
# define EXPORT_FLAG extern "C"
|
|
#endif // !_WIN32
|
|
|
|
#define DECLARE_RELEASE_FLAG(type, name, value, help) EXPORT_FLAG type name;
|
|
#ifdef DEBUG
|
|
# define DECLARE_DEBUG_FLAG(type, name, value, help) EXPORT_FLAG type name;
|
|
#else // !DEBUG
|
|
# define DECLARE_DEBUG_FLAG(type, name, value, help) const type name = value;
|
|
#endif // !DEBUG
|
|
|
|
RUNTIME_FLAGS(DECLARE_DEBUG_FLAG, DECLARE_RELEASE_FLAG, DECLARE_DEBUG_FLAG);
|
|
|
|
#undef DECLARE_DEBUG_FLAG
|
|
#undef DECLARE_RELEASE_FLAG
|
|
|
|
#endif /*FLAGS_HPP_*/
|