Files
rocm-systems/projects/clr/rocclr/runtime/utils/flags.hpp
T
foreman 868a67346b P4 to Git Change 1058301 by weizhang@msdnweizhang-opencl on 2014/07/23 10:08:38
EPR #397491 - Enable platform atomics in stg but not in mainline.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#450 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#272 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#207 edit


[ROCm/clr commit: 5b7c1dd22c]
2014-07-23 10:21:39 -04:00

249 строки
14 KiB
C++

//
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef FLAGS_HPP_
#define FLAGS_HPP_
#define RUNTIME_FLAGS(debug,release,release_on_stg) \
\
debug(int, LOG_LEVEL, 0, \
"The default log level") \
debug(bool, BREAK_ON_LOG_WARNING, false, \
"Break each time an error is logged") \
debug(bool, BREAK_ON_LOG_ERROR, false, \
"Break each time an error is logged") \
debug(uint, DEBUG_GPU_FLAGS, 0, \
"The debug options for GPU device") \
debug(uint, GPU_MAX_COMMAND_QUEUES, 80, \
"The maximum number of concurrent Virtual GPUs") \
release(size_t, CQ_THREAD_STACK_SIZE, 256*Ki, /* @todo: that much! */ \
"The default command queue thread stack size") \
release(size_t, CPU_WORKER_THREAD_STACK_SIZE, 64*Ki, \
"The default CPU worker thread stack size") \
release(int, CPU_MAX_COMPUTE_UNITS, -1, \
"Override the number of computation units per CPU device") \
release(int, GPU_MAX_WORKGROUP_SIZE, 0, \
"Maximum number of workitems in a workgroup for GPU, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_2D_X, 0, \
"Maximum number of workitems in a 2D workgroup for GPU, x component, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_2D_Y, 0, \
"Maximum number of workitems in a 2D workgroup for GPU, y component, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_3D_X, 0, \
"Maximum number of workitems in a 3D workgroup for GPU, x component, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_3D_Y, 0, \
"Maximum number of workitems in a 3D workgroup for GPU, y component, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_3D_Z, 0, \
"Maximum number of workitems in a 3D workgroup for GPU, z component, 0 -use default") \
release(int, CPU_MAX_WORKGROUP_SIZE, 1024, \
"Maximum number of workitems in a workgroup for CPU") \
debug(bool, CPU_MEMORY_GUARD_PAGES, false, \
"Use guard pages for CPU memory") \
debug(size_t, CPU_MEMORY_GUARD_PAGE_SIZE, 64, \
"Size in KB of CPU memory guard page") \
debug(size_t, CPU_MEMORY_ALIGNMENT_SIZE, 256, \
"Size in bytes for the default alignment for guarded memory on CPU") \
debug(size_t, PARAMETERS_MIN_ALIGNMENT, 16, \
"Minimum alignment required for the abstract parameters stack") \
debug(size_t, MEMOBJ_BASE_ADDR_ALIGN, 4*Ki, \
"Alignment of the base address of any allocate memory object") \
release(cstring, GPU_DEVICE_NAME, "", \
"Select the device ordinal (will only report a single device)") \
release(cstring, GPU_DEVICE_ORDINAL, "", \
"Select the device ordinal (comma seperated list of available devices)") \
release(bool, REMOTE_ALLOC, false, \
"Use remote memory for the global heap allocation") \
release(int, GPU_INITIAL_HEAP_SIZE, 16, \
"Initial size of the GPU heap in MiB") \
release(uint, GPU_MAX_HEAP_SIZE, 100, \
"Set maximum size of the GPU heap to % of board memory") \
release(int, GPU_HEAP_GROWTH_INCREMENT, 8, \
"Amount to grow the GPU heap by in MiB") \
release(uint, GPU_STAGING_BUFFER_SIZE, 512, \
"Size of the GPU staging buffer in KiB") \
release(bool, GPU_DUMP_BLIT_KERNELS, false, \
"Dump the kernels for blit manager") \
release(uint, GPU_BLIT_ENGINE_TYPE, 0x0, \
"Blit engine type: 0 - Default, 1 - Host, 2 - CAL, 3 - Kernel") \
release(bool, GPU_FLUSH_ON_EXECUTION, false, \
"Submit commands to HW on every operation. 0 - Disable, 1 - Enable") \
release(bool, GPU_USE_SYNC_OBJECTS, true, \
"If enabled, use sync objects instead of polling") \
release(bool, ENABLE_CAL_SHUTDOWN, false, \
"Enable explicit CAL shutdown (for PM4 capture)") \
release(bool, CL_KHR_FP64, true, \
"Enable/Disable support for double precision") \
release(uint, GPU_OPEN_VIDEO, 0, \
"Non-zero value allows to report Open Video extension on GPU") \
release(cstring, AMD_OCL_BUILD_OPTIONS, 0, \
"Set clBuildProgram() and clCompileProgram()'s options (override)") \
release(cstring, AMD_OCL_BUILD_OPTIONS_APPEND, 0, \
"Append clBuildProgram() and clCompileProgram()'s options") \
release(cstring, AMD_OCL_LINK_OPTIONS, 0, \
"Set clLinkProgram()'s options (override)") \
release(cstring, AMD_OCL_LINK_OPTIONS_APPEND, 0, \
"Append clLinkProgram()'s options") \
debug(bool, AMD_OCL_SUPPRESS_MESSAGE_BOX, false, \
"Suppress the error dialog on Windows") \
debug(bool, OCL_STRESS_BINARY_IMAGE, false, \
"Exercise the binary image producer and consumer") \
release(cstring, GPU_PRE_RA_SCHED, "default", \
"Allows setting of alternate pre-RA-sched") \
release(size_t, GPU_PINNED_XFER_SIZE, 16, \
"The pinned buffer size for pinning in read/write transfers") \
release(size_t, GPU_PINNED_MIN_XFER_SIZE, 512, \
"The minimal buffer size for pinned read/write transfers in KBytes") \
release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \
"The resource cache size in MB") \
release(uint, GPU_ASYNC_MEM_COPY, 0, \
"Enables async memory transfers with DRM engine") \
release(bool, GPU_FORCE_64BIT_PTR, 0, \
"Forces 64 bit pointers on GPU") \
release(bool, GPU_FORCE_OCL20_32BIT, 0, \
"Forces 32 bit apps to take CLANG\HSAIL path") \
release(bool, GPU_PREALLOC_ADDR_SPACE, 0, \
"Preallocates 4GB address space. Valid for boards > 4GB") \
release(bool, GPU_RAW_TIMESTAMP, 0, \
"Reports GPU raw timestamps in GPU timeline") \
release(bool, CPU_IMAGE_SUPPORT, true, \
"Turn on image support on the CPU device") \
release(bool, GPU_PARTIAL_DISPATCH, true, \
"Enables partial dispatch on GPU") \
release(size_t, GPU_NUM_MEM_DEPENDENCY, 256, \
"Number of memory objects for dependency tracking") \
release(size_t, GPU_XFER_BUFFER_SIZE, 0, \
"Transfer buffer size for image copy optimization in KB") \
release(bool, GPU_IMAGE_DMA, true, \
"Enable DRM DMA for image transfers") \
release(uint, CPU_MAX_ALLOC_PERCENT, 25, \
"Maximum size of a single allocation in MiB") \
release(uint, GPU_MAX_ALLOC_PERCENT, 75, \
"Maximum size of a single allocation as percentage of total") \
release(uint, GPU_NUM_COMPUTE_RINGS, 2, \
"GPU number of compute rings. 0 - disabled, 1 , 2,.. - the number of compute rings") \
release_on_stg(bool, C1X_ATOMICS, !IS_MAINLINE, \
"Runtime will report c1x atomics support") \
release(uint, GPU_WORKLOAD_SPLIT, 22, \
"Workload split size") \
release(bool, GPU_USE_SINGLE_SCRATCH, false, \
"Use single scratch buffer per device instead of per HW ring") \
release_on_stg(cstring, GPU_TARGET_INFO_ARCH, "amdil", \
"Select the GPU TargetInfo arch (amdil|hsail)") \
release(bool, HSA_RUNTIME, 0, \
"1 = Enable HSA Runtime, any other value or absence disables it.") \
release(bool, AMD_OCL_WAIT_COMMAND, false, \
"1 = Enable a wait for every submitted command") \
debug(bool, AMD_OCL_DEBUG_LINKER, false, \
"Enable debug output in linker") \
debug(bool, GPU_SPLIT_LIB, true, \
"Enable splitting GPU 32/64 bit library") \
release(bool, GPU_STAGING_WRITE_PERSISTENT, false, \
"Enable Persistent writes") \
release(bool, DRMDMA_FOR_LNX_CF, false, \
"Enable DRMDMA for Linux CrossFire") \
release(bool, GPU_HSAIL_ENABLE, false, \
"Enable HSAIL on dGPU stack (requires CI+ HW)") \
release(bool, GPU_ASSUME_ALIASES, false, \
"Assume memory aliases in the compilation process") \
release(uint, GPU_PRINT_CHILD_KERNEL, 0, \
"Prints the specified number of the child kernels") \
release(bool, GPU_DIRECT_SRD, true, \
"Use indirect SRD access in HSAIL") \
release(bool, AMD_DEPTH_MSAA_INTEROP, false, \
"Enable depth stencil and MSAA buffer interop") \
release(bool, AMD_THREAD_TRACE_ENABLE, false, \
"Enable thread trace extension") \
release(bool, ENVVAR_HSA_POLL_KERNEL_COMPLETION, false, \
"Determines if Hsa runtime should use polling scheme") \
release(bool, HSA_LOCAL_MEMORY_ENABLE, false, \
"Enable HSA device local memory usage") \
release(bool, HSA_ENABLE_ATOMICS_32B, false, \
"1 = Enable SVM atomics in 32 bits (HSA backend-only). Any other value keeps then disabled.")
namespace amd {
//! \addtogroup Utils
// @{
struct Flag
{
enum Type
{
Tinvalid = 0,
Tbool, //!< A boolean type flag (true, false).
Tint, //!< An integer type flag (signed).
Tuint, //!< An integer type flag (unsigned).
Tsize_t, //!< A size_t type flag.
Tcstring //!< A string type flag.
};
#define DEFINE_FLAG_NAME(type, name, value, help) k##name,
enum Name
{
RUNTIME_FLAGS(DEFINE_FLAG_NAME, DEFINE_FLAG_NAME, DEFINE_FLAG_NAME)
numFlags_
};
#undef DEFINE_FLAG_NAME
#define CAN_SET(type, name, v, h) static const bool cannotSet##name = false;
#define CANNOT_SET(type, name, v, h) static const bool cannotSet##name = true;
#ifdef DEBUG
RUNTIME_FLAGS(CAN_SET, CAN_SET, CAN_SET)
#else // !DEBUG
RUNTIME_FLAGS(CANNOT_SET, CAN_SET, CANNOT_SET)
#endif // !DEBUG
#undef CAN_SET
#undef CANNOT_SET
private:
static Flag flags_[];
public:
static char* envstr_;
const char* name_;
const void* value_;
Type type_;
bool isDefault_;
public:
static bool init();
static void tearDown();
bool setValue(const char* value);
static bool isDefault(Name name) { return flags_[name].isDefault_; }
};
#define flagIsDefault(name) \
(amd::Flag::cannotSet##name || amd::Flag::isDefault(amd::Flag::k##name))
// @}
} // namespace amd
#ifdef _WIN32
# define EXPORT_FLAG extern "C" __declspec(dllexport)
#else // !_WIN32
# define EXPORT_FLAG extern "C"
#endif // !_WIN32
#define DECLARE_RELEASE_FLAG(type, name, value, help) EXPORT_FLAG type name;
#ifdef DEBUG
# define DECLARE_DEBUG_FLAG(type, name, value, help) EXPORT_FLAG type name;
#else // !DEBUG
# define DECLARE_DEBUG_FLAG(type, name, value, help) const type name = value;
#endif // !DEBUG
RUNTIME_FLAGS(DECLARE_DEBUG_FLAG, DECLARE_RELEASE_FLAG, DECLARE_DEBUG_FLAG);
#undef DECLARE_DEBUG_FLAG
#undef DECLARE_RELEASE_FLAG
#endif /*FLAGS_HPP_*/