SWDEV-556684 - Remove HSAIL support (#1183)

Este commit está contenido en:
Pengda Xie
2025-10-23 11:21:49 -07:00
cometido por GitHub
padre db949445c3
commit a4bbd73dc6
Se han modificado 53 ficheros con 410 adiciones y 5066 borrados
-41
Ver fichero
@@ -128,47 +128,6 @@ clGetKernelSubGroupInfo
clSetDefaultDeviceCommandQueue
#endif
#if !defined(WITH_LIGHTNING_COMPILER)
aclCompilerInit
aclCompilerFini
aclCompilerVersion
aclVersionSize
aclGetErrorString
aclGetArchInfo
aclGetDeviceInfo
aclGetTargetInfo
aclGetArchitecture
aclGetFamily
aclGetChip
aclBinaryInit
aclBinaryFini
aclReadFromFile
aclReadFromMem
aclWriteToFile
aclWriteToMem
aclCreateFromBinary
aclBinaryVersion
aclInsertSection
aclRemoveSection
aclExtractSection
aclInsertSymbol
aclRemoveSymbol
aclExtractSymbol
aclDbgAddArgument
aclDbgRemoveArgument
aclQueryInfo
aclCompile
aclLink
aclGetCompilerLog
aclRetrieveType
aclSetType
aclConvertType
aclDisassemble
aclInsertKernelStatistics
aclGetDeviceBinary
aclDumpBinary
#endif // !defined(WITH_LIGHTNING_COMPILER)
#if (OPENCL_MAJOR > 2) || (OPENCL_MAJOR == 2 && OPENCL_MINOR >= 1)
clCreateProgramWithIL
#endif
-3
Ver fichero
@@ -135,9 +135,6 @@ RUNTIME_ENTRY(cl_int, clGetPlatformInfo,
"cl_khr_dx9_media_sharing "
#endif //_WIN32
"cl_amd_event_callback "
#if defined(WITH_COMPILER_LIB)
"cl_amd_offline_devices "
#endif // defined(WITH_COMPILER_LIB)
;
break;
case CL_PLATFORM_ICD_SUFFIX_KHR:
+1 -14
Ver fichero
@@ -20,15 +20,9 @@
# ROCclr abstracts the usage of multiple AMD compilers and runtimes.
# It is possible to support multiple backends concurrently in the same binary.
option(ROCCLR_ENABLE_HSAIL "Enable support for HSAIL compiler" OFF)
option(ROCCLR_ENABLE_LC "Enable support for LC compiler" ON)
option(ROCCLR_ENABLE_HSA "Enable support for HSA runtime" ON)
option(ROCCLR_ENABLE_PAL "Enable support for PAL runtime" OFF)
if((NOT ROCCLR_ENABLE_HSAIL) AND (NOT ROCCLR_ENABLE_LC))
message(FATAL "Support for at least one compiler needs to be enabled!")
endif()
if((NOT ROCCLR_ENABLE_HSA) AND (NOT ROCCLR_ENABLE_PAL))
message(FATAL "Support for at least one runtime needs to be enabled!")
endif()
@@ -68,7 +62,6 @@ target_sources(rocclr PRIVATE
${ROCCLR_SRC_DIR}/device/device.cpp
${ROCCLR_SRC_DIR}/device/devkernel.cpp
${ROCCLR_SRC_DIR}/device/devprogram.cpp
${ROCCLR_SRC_DIR}/device/hsailctx.cpp
${ROCCLR_SRC_DIR}/elf/elf.cpp
${ROCCLR_SRC_DIR}/os/alloc.cpp
${ROCCLR_SRC_DIR}/os/os_posix.cpp
@@ -142,13 +135,7 @@ if(UNIX)
target_link_libraries(rocclr PUBLIC rt)
endif()
if(ROCCLR_ENABLE_HSAIL)
include(ROCclrHSAIL)
endif()
if(ROCCLR_ENABLE_LC)
include(ROCclrLC)
endif()
include(ROCclrLC)
if(ROCCLR_ENABLE_HSA)
include(ROCclrHSA)
-21
Ver fichero
@@ -1,21 +0,0 @@
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
target_compile_definitions(rocclr PUBLIC WITH_COMPILER_LIB HSAIL_DYN_DLL)
+1 -1
Ver fichero
@@ -37,7 +37,7 @@ if (NOT amd_comgr_FOUND)
endif()
get_target_property(_amd_comgr_lib_type amd_comgr TYPE)
target_compile_definitions(rocclr PUBLIC WITH_LIGHTNING_COMPILER USE_COMGR_LIBRARY)
target_compile_definitions(rocclr PUBLIC)
if(_amd_comgr_lib_type STREQUAL "SHARED_LIBRARY")
target_compile_definitions(rocclr PUBLIC COMGR_DYN_DLL)
endif()
@@ -1,217 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_0_8_H_
#define _ACL_0_8_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "aclTypes.h"
//!--------------------------------------------------------------------------!//
// Functions that deal with aclCompiler objects.
//!--------------------------------------------------------------------------!//
aclCompiler* ACL_API_ENTRY aclCompilerInit(aclCompilerOptions* opts,
acl_error* error_code) ACL_API_0_8;
acl_error ACL_API_ENTRY aclCompilerFini(aclCompiler* cl) ACL_API_0_8;
aclCLVersion ACL_API_ENTRY aclCompilerVersion(aclCompiler* cl, acl_error* error_code) ACL_API_0_8;
uint32_t ACL_API_ENTRY aclVersionSize(aclCLVersion num, acl_error* error_code) ACL_API_0_8;
const char* ACL_API_ENTRY aclGetErrorString(acl_error error_code) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with target specific information.
//!--------------------------------------------------------------------------!//
//! Returns in the names argument, if non-NULL, a pointer to each of the arch
// names that the compiler supports. If names is NULL and arch_size is
// non-NULL, returns the number of arch entries that are required.
acl_error ACL_API_ENTRY aclGetArchInfo(const char** arch_names, size_t* arch_size) ACL_API_0_8;
//! Returns in the arch argument, if non-NULL, a pointer to each device
// name that the compiler supports. If device_size is non-NULL,
// returns the number of device entries that are used.
acl_error ACL_API_ENTRY aclGetDeviceInfo(const char* arch, const char** names,
size_t* device_size) ACL_API_0_8;
//! Function that returns a correctly filled out aclTargetInfo structure based
// on the information passed into the kernel.
aclTargetInfo ACL_API_ENTRY aclGetTargetInfo(const char* arch, const char* device,
acl_error* error_code) ACL_API_0_8;
//! Function that returns a correctly filled out aclTargetInfo structure based
// on the information passed into the kernel.
aclTargetInfo ACL_API_ENTRY aclGetTargetInfoFromChipID(const char* arch, const uint32_t chip_id,
acl_error* error_code) ACL_API_0_8;
//! Function that returns a string representation of the target architecture.
const char* ACL_API_ENTRY aclGetArchitecture(const aclTargetInfo& target) ACL_API_0_8;
//! Function that returns a string representation of the target chip options.
const uint64_t ACL_API_ENTRY aclGetChipOptions(const aclTargetInfo& target) ACL_API_0_8;
//! Function that returns a string representation of the target family.
const char* ACL_API_ENTRY aclGetFamily(const aclTargetInfo& target) ACL_API_0_8;
//! Function that returns a string representation of the target chip.
const char* ACL_API_ENTRY aclGetChip(const aclTargetInfo& target) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with aclBinary objects.
//!--------------------------------------------------------------------------!//
aclBinary* ACL_API_ENTRY aclBinaryInit(size_t struct_version, const aclTargetInfo* target,
const aclBinaryOptions* options,
acl_error* error_code) ACL_API_0_8;
acl_error ACL_API_ENTRY aclBinaryFini(aclBinary* bin) ACL_API_0_8;
aclBinary* ACL_API_ENTRY aclReadFromFile(const char* str, acl_error* error_code) ACL_API_0_8;
aclBinary* ACL_API_ENTRY aclReadFromMem(const void* mem, size_t size,
acl_error* error_code) ACL_API_0_8;
acl_error ACL_API_ENTRY aclWriteToFile(aclBinary* bin, const char* str) ACL_API_0_8;
acl_error ACL_API_ENTRY aclWriteToMem(aclBinary* bin, void** mem, size_t* size) ACL_API_0_8;
aclBinary* ACL_API_ENTRY aclCreateFromBinary(const aclBinary* binary,
aclBIFVersion version) ACL_API_0_8;
aclBIFVersion ACL_API_ENTRY aclBinaryVersion(const aclBinary* binary) ACL_API_0_8;
acl_error ACL_API_ENTRY aclInsertSection(aclCompiler* cl, aclBinary* binary, const void* data,
size_t data_size, aclSections id) ACL_API_0_8;
acl_error ACL_API_ENTRY aclInsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data,
size_t data_size, aclSections id,
const char* symbol) ACL_API_0_8;
const void* ACL_API_ENTRY aclExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size,
aclSections id, acl_error* error_code) ACL_API_0_8;
const void* ACL_API_ENTRY aclExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size,
aclSections id, const char* symbol,
acl_error* error_code) ACL_API_0_8;
acl_error ACL_API_ENTRY aclRemoveSection(aclCompiler* cl, aclBinary* binary,
aclSections id) ACL_API_0_8;
acl_error ACL_API_ENTRY aclRemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id,
const char* symbol) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with debug/metdata.
//!--------------------------------------------------------------------------!//
acl_error ACL_API_ENTRY aclQueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query,
const char* kernel, void* data_ptr,
size_t* ptr_size) ACL_API_0_8;
acl_error ACL_API_ENTRY aclDbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
const char* name, bool byVal) ACL_API_0_8;
acl_error ACL_API_ENTRY aclDbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
const char* name) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with various compilation phases.
//!--------------------------------------------------------------------------!//
acl_error ACL_API_ENTRY aclCompile(aclCompiler* cl, aclBinary* bin, const char* options,
aclType from, aclType to,
aclLogFunction compile_callback) ACL_API_0_8;
acl_error ACL_API_ENTRY aclLink(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs,
aclBinary** libs, aclType link_mode, const char* options,
aclLogFunction link_callback) ACL_API_0_8;
const char* ACL_API_ENTRY aclGetCompilerLog(aclCompiler* cl) ACL_API_0_8;
const void* ACL_API_ENTRY aclRetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name,
size_t* data_size, aclType type,
acl_error* error_code) ACL_API_0_8;
acl_error ACL_API_ENTRY aclSetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type,
const void* data, size_t size) ACL_API_0_8;
acl_error ACL_API_ENTRY aclConvertType(aclCompiler* cl, aclBinary* bin, const char* name,
aclType type) ACL_API_0_8;
acl_error ACL_API_ENTRY aclDisassemble(aclCompiler* cl, aclBinary* bin, const char* kernel,
aclLogFunction disasm_callback) ACL_API_0_8;
const void* ACL_API_ENTRY aclGetDeviceBinary(aclCompiler* cl, const aclBinary* bin,
const char* kernel, size_t* size,
acl_error* error_code) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with binary image.
//!--------------------------------------------------------------------------!//
bool ACL_API_ENTRY aclValidateBinaryImage(const void* binary, size_t length, unsigned) ACL_API_0_8;
//!--------------------------------------------------------------------------!//
// Functions that deal with aclJITObjectImage objects.
//!--------------------------------------------------------------------------!//
aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCreate(aclCompiler* cl, const void* buffer,
size_t length, aclBinary* bin,
acl_error* error_code);
aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCopy(aclCompiler* cl, const void* buffer,
size_t length, acl_error* error_code);
acl_error ACL_API_ENTRY aclJITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer);
acl_error ACL_API_ENTRY aclJITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image);
size_t ACL_API_ENTRY aclJITObjectImageSize(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code);
const char* ACL_API_ENTRY aclJITObjectImageData(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code);
size_t ACL_API_ENTRY aclJITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code);
acl_error ACL_API_ENTRY aclJITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image,
aclJITSymbolCallback callback, void* data);
#if defined(LEGACY_COMPLIB)
char* ACL_API_ENTRY aclJITObjectImageDisassembleKernel(aclCompiler* cl,
constAclJITObjectImage image,
const char* kernel, acl_error* error_code);
#endif
//!--------------------------------------------------------------------------!//
// Debug functionality
//!--------------------------------------------------------------------------!//
void aclDumpBinary(const aclBinary* bin);
//!--------------------------------------------------------------------------!//
// Functions that deal with kenel statistics.
//!--------------------------------------------------------------------------!//
void aclGetKstatsSI(const void* shader, aclKernelStats& kstats);
acl_error ACL_API_ENTRY aclInsertKernelStatistics(aclCompiler* cl, aclBinary* bin);
//! Define hardware info constants for SI and above devices
static constexpr unsigned SI_sgprs_avail = 102;
static constexpr unsigned SI_vgprs_avail = 256;
static constexpr unsigned SI_ldssize_avail = 32 * 1024;
//!--------------------------------------------------------------------------!//
// Functions that deal with memory.
// Free memory allocated by aclWriteToMem
//!--------------------------------------------------------------------------!//
acl_error ACL_API_ENTRY aclFreeMem(aclBinary* bin, void* mem);
#ifdef __cplusplus
}
#endif
#endif // _ACL_0_8_H_
@@ -1,54 +0,0 @@
/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_DEFS_0_8_H_
#define _ACL_DEFS_0_8_H_
#ifndef ACL_API_ENTRY
#if defined(_WIN32) || defined(__CYGWIN__)
#define ACL_API_ENTRY __stdcall
#else
#define ACL_API_ENTRY
#endif
#endif
#ifndef ACL_API_0_8
#define ACL_API_0_8
#endif
#ifndef BIF_API_2_0
#define BIF_API_2_0
#endif
#ifndef BIF_API_2_1
#define BIF_API_2_1
#endif
#ifndef BIF_API_3_0
#define BIF_API_3_0
#endif
#ifndef MAX_HIDDEN_KERNARGS_NUM
#define MAX_HIDDEN_KERNARGS_NUM 6
#else
#error "MAX_HIDDEN_KERNARGS_NUM is already defined"
#endif
#endif // _ACL_DEFS_0_8_H_
@@ -1,364 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_ENUMS_0_8_H_
#define _ACL_ENUMS_0_8_H_
typedef enum _acl_error_enum_0_8 {
ACL_SUCCESS = 0,
ACL_ERROR = 1,
ACL_INVALID_ARG = 2,
ACL_OUT_OF_MEM = 3,
ACL_SYS_ERROR = 4,
ACL_UNSUPPORTED = 5,
ACL_ELF_ERROR = 6,
ACL_INVALID_FILE = 7,
ACL_INVALID_COMPILER = 8,
ACL_INVALID_TARGET = 9,
ACL_INVALID_BINARY = 10,
ACL_INVALID_OPTION = 11,
ACL_INVALID_TYPE = 12,
ACL_INVALID_SECTION = 13,
ACL_INVALID_SYMBOL = 14,
ACL_INVALID_QUERY = 15,
ACL_FRONTEND_FAILURE = 16,
ACL_INVALID_BITCODE = 17,
ACL_LINKER_ERROR = 18,
ACL_OPTIMIZER_ERROR = 19,
ACL_CODEGEN_ERROR = 20,
ACL_ISAGEN_ERROR = 21,
ACL_INVALID_SOURCE = 22,
ACL_LIBRARY_ERROR = 23,
ACL_INVALID_SPIR = 24,
ACL_LWVERIFY_FAIL = 25,
ACL_HWVERIFY_FAIL = 26,
ACL_SPIRV_LOAD_FAIL = 27,
ACL_SPIRV_SAVE_FAIL = 28,
ACL_LAST_ERROR = 29
} acl_error_0_8;
typedef enum _comp_device_caps_enum_0_8 {
capError = 0,
capFMA = 1,
capImageSupport = 2,
capSaveSOURCE = 3, // input source
capSaveLLVMIR = 4, // output LLVMIR from frontend
capSaveCG = 5, // output from LLVM-BE
capSaveEXE = 6, // output executable
capSaveAMDIL = 7, // Save per-kernel AMDIL
capSaveHSAIL = 8, // Save per-kernel HSAIL
capEncrypted = 9,
capSaveDISASM = 10,
capSaveAS = 11,
capSaveSPIR = 12,
capDumpLast = 13
} compDeviceCaps_0_8;
typedef enum _comp_opt_settings_enum_0_8 {
optO0 = 0, // No optimization setting.
optO1 = 1,
optO2 = 2,
optO3 = 3,
optO4 = 4,
optOs = 5,
optError = 6, // Invalid optimization set
optLast = 7
} compOptSettings_0_8;
#define FLAG_SHIFT_VALUE 5
#define FLAG_MASK_VALUE ((1 << capDumpLast) - 1)
#define FLAG_BITLOC(A) (1 << ((A) & FLAG_MASK_VALUE))
#define FLAG_ARRAY_SIZE 4
//! An enumeration that defines the possible valid device types that
// can be compiled for.
typedef enum _acl_dev_type_enum_0_8 {
aclError = 0, // aclDevType of 0 is an error.
aclX86 = 1, // Targeting a 32bit X86 CPU device.
aclAMDIL = 2, // Targeting an AMDIL GPU device.
aclHSAIL = 3, // Targeting an HSAIL GPU device.
aclX64 = 4, // Targeting a 64bit X86 CPU device.
aclHSAIL64 = 5, // Targeting a 64bit HSAIL GPU device.
aclAMDIL64 = 6, // Targeting a 64bit AMDIL GPU device
aclLast = 7
} aclDevType_0_8;
//! Enum that represents the versions of the compiler
typedef enum _acl_cl_version_enum_0_8 {
ACL_VERSION_ERROR = 0,
ACL_VERSION_0_7 = 1,
ACL_VERSION_0_8 = 2,
ACL_VERSION_0_8_1 = 3,
ACL_VERSION_0_9 = 4,
ACL_VERSION_1_0 = 5,
ACL_VERSION_LAST = 6
} aclCLVersion_0_8;
//! Enum of the various aclTypes that are supported
typedef enum _acl_type_enum_0_8 {
ACL_TYPE_DEFAULT = 0,
ACL_TYPE_OPENCL = 1,
ACL_TYPE_LLVMIR_TEXT = 2,
ACL_TYPE_LLVMIR_BINARY = 3,
ACL_TYPE_SPIR_TEXT = 4,
ACL_TYPE_SPIR_BINARY = 5,
ACL_TYPE_AMDIL_TEXT = 6,
ACL_TYPE_AMDIL_BINARY = 7,
ACL_TYPE_HSAIL_TEXT = 8,
ACL_TYPE_HSAIL_BINARY = 9,
ACL_TYPE_X86_TEXT = 10,
ACL_TYPE_X86_BINARY = 11,
ACL_TYPE_CG = 12,
ACL_TYPE_SOURCE = 13,
ACL_TYPE_ISA = 14,
ACL_TYPE_HEADER = 15,
ACL_TYPE_RSLLVMIR_BINARY = 16,
ACL_TYPE_SPIRV_BINARY = 17,
ACL_TYPE_ASM_TEXT = 18,
ACL_TYPE_LAST = 19
} aclType_0_8;
//! Enum of the various loader types that are supported.
typedef enum _acl_loader_type_enum_0_8 {
ACL_LOADER_COMPLIB = 0,
ACL_LOADER_FRONTEND = 1,
ACL_LOADER_LINKER = 2,
ACL_LOADER_OPTIMIZER = 3,
ACL_LOADER_CODEGEN = 4,
ACL_LOADER_BACKEND = 5,
ACL_LOADER_SC = 6,
ACL_LOADER_LAST = 7
} aclLoaderType_0_8;
// Enumeration for the various acl versions
typedef enum _bif_version_enum_0_8 {
aclBIFVersionError = 0, // Error
aclBIFVersion20 = 1, // Version 2.0 of the OpenCL BIF
aclBIFVersion21 = 2, // Version 2.1 of the OpenCL BIF
aclBIFVersion30 = 3, // Version 3.0 of the OpenCL BIF
aclBIFVersion31 = 4, // Version 3.1 of the OpenCL BIF
aclBIFVersionLatest = aclBIFVersion31, // Most recent version of the BIF
aclBIFVersionCAL = 5,
aclBIFVersionLast = 6
} aclBIFVersion_0_8;
// Enumeration for the various platform types
typedef enum _bif_platform_enum_0_8 {
aclPlatformCAL = 0, // For BIF 2.0 backward compatibility
aclPlatformCPU = 1, // For BIF 2.0 backward compatibility
aclPlatformCompLib = 2,
aclPlatformLast = 3
} aclPlatform_0_8;
// Enumeration for the various bif sections
typedef enum _bif_sections_enum_0_8 {
aclLLVMIR = 0,
aclSOURCE = 1,
aclILTEXT = 2, // For BIF 2.0 backward compatibility
aclASTEXT = 3, // For BIF 2.0 backward compatibility
aclCAL = 4, // For BIF 2.0 backward compatibility
aclDLL = 5, // For BIF 2.0 backward compatibility
aclSTRTAB = 6,
aclSYMTAB = 7,
aclRODATA = 8,
aclSHSTRTAB = 9,
aclNOTES = 10,
aclCOMMENT = 11,
aclILDEBUG = 12, // For BIF 2.0 backward compatibility
aclDEBUG_INFO = 13,
aclDEBUG_ABBREV = 14,
aclDEBUG_LINE = 15,
aclDEBUG_PUBNAMES = 16,
aclDEBUG_PUBTYPES = 17,
aclDEBUG_LOC = 18,
aclDEBUG_ARANGES = 19,
aclDEBUG_RANGES = 20,
aclDEBUG_MACINFO = 21,
aclDEBUG_STR = 22,
aclDEBUG_FRAME = 23,
aclJITBINARY = 24, // For BIF 2.0 backward compatibility
aclCODEGEN = 25,
aclTEXT = 26,
aclINTERNAL = 27,
aclSPIR = 28,
aclHEADER = 29,
aclBRIG = 30,
aclBRIGxxx1 = 31,
aclBRIGxxx2 = 32,
aclBRIGxxx3 = 33,
aclHSADEBUG = 34,
aclKSTATS = 35, // For storing kernel statistics
aclSPIRV = 36,
aclLAST = 37
} aclSections_0_8;
//! An enumeration that defines what are valid queries for aclQueryInfo.
typedef enum _rt_query_types_enum_0_8 {
RT_ABI_VERSION = 0,
RT_DEVICE_NAME = 1,
RT_MEM_SIZES = 2,
RT_GPU_FUNC_CAPS = 3,
RT_GPU_FUNC_ID = 4,
RT_GPU_DEFAULT_ID = 5,
RT_WORK_GROUP_SIZE = 6,
RT_WORK_REGION_SIZE = 7,
RT_ARGUMENT_ARRAY = 8,
RT_GPU_PRINTF_ARRAY = 9,
RT_CPU_BARRIER_NAMES = 10,
RT_DEVICE_ENQUEUE = 11,
RT_KERNEL_INDEX = 12,
RT_KERNEL_NAME = 13,
RT_KERNEL_NAMES = 14,
RT_CONTAINS_LLVMIR = 15,
RT_CONTAINS_OPTIONS = 16,
RT_CONTAINS_BRIG = 17,
RT_CONTAINS_HSAIL = 18,
RT_CONTAINS_ISA = 19,
RT_CONTAINS_LOADER_MAP = 20,
RT_CONTAINS_SPIR = 21,
RT_NUM_KERNEL_HIDDEN_ARGS = 22,
RT_CONTAINS_SPIRV = 23,
RT_WAVES_PER_SIMD_HINT = 24,
RT_WORK_GROUP_SIZE_HINT = 25,
RT_VEC_TYPE_HINT = 26,
RT_LAST_TYPE = 27
} aclQueryType_0_8;
//! An enumeration for the various GPU capabilities
typedef enum _rt_gpu_caps_enum_0_8 {
RT_COMPILER_WRITE = 1 << 0,
RT_DATA_SECTION = 1 << 1,
RT_WGS = 1 << 2,
RT_LIMIT_WGS = 1 << 3,
RT_PACKED_REGS = 1 << 4,
RT_64BIT_ABI = 1 << 5,
RT_PRINTF = 1 << 6,
RT_ARENA_UAV = 1 << 7,
RT_LRP_MEM = 1 << 8, // Local/Region/Private Memory
RT_INDEX_TEMPS = 1 << 9,
RT_WRS = 1 << 10,
RT_GWS = 1 << 11,
RT_SWGWS = 1 << 12,
RT_GPU_CAPS_MASK = 0xFFF
} aclGPUCaps_0_8;
//! An enumeration for the various CPU capabilities.
typedef enum _rt_cpu_caps_enum_0_8 {
RT_KERNEL_BARRIER = 1 << 0,
RT_PROGRAM_BARRIER = 1 << 1,
RT_CPU_CAPS_MASK = 0x3
} aclCPUCaps_0_8;
//! An enumeration that maps Resource type to index values
typedef enum _rt_gpu_resource_enum_0_8 {
RT_RES_UAV = 0, // UAV resources
RT_RES_PRI = 1, // Private resources
RT_RES_LDS = 2, // LDS resources
RT_RES_GDS = 3, // GDS resources
RT_RES_CON = 4, // Constant resources
RT_RES_LAST = 5
} aclGPUResource_0_8;
//! An enumeration that maps memory types to index values
typedef enum _rt_gpu_mem_sizes_enum_0_8 {
RT_MEM_HW_LOCAL = 0,
RT_MEM_SW_LOCAL = 1,
RT_MEM_HW_PRIVATE = 2,
RT_MEM_SW_PRIVATE = 3,
RT_MEM_HW_REGION = 4,
RT_MEM_SW_REGION = 5,
RT_MEM_LAST = 6
} aclGPUMemSizes_0_8;
// Enumerations for the various argument types.
typedef enum _acl_arg_type_enum_0_8 {
ARG_TYPE_ERROR = 0,
ARG_TYPE_SAMPLER = 1,
ARG_TYPE_IMAGE = 2,
ARG_TYPE_COUNTER = 3,
ARG_TYPE_VALUE = 4,
ARG_TYPE_POINTER = 5,
ARG_TYPE_SEMAPHORE = 6,
ARG_TYPE_QUEUE = 7, // enum for device enqueue
ARG_TYPE_LAST = 8
} aclArgType_0_8;
// Enumerations of the valid data types for pass by value and
// pass by pointer kernel arguments.
typedef enum _acl_data_type_enum_0_8 {
DATATYPE_ERROR = 0,
DATATYPE_i1 = 1,
DATATYPE_i8 = 2,
DATATYPE_i16 = 3,
DATATYPE_i32 = 4,
DATATYPE_i64 = 5,
DATATYPE_u8 = 6,
DATATYPE_u16 = 7,
DATATYPE_u32 = 8,
DATATYPE_u64 = 9,
DATATYPE_f16 = 10,
DATATYPE_f32 = 11,
DATATYPE_f64 = 12,
DATATYPE_f80 = 13,
DATATYPE_f128 = 14,
DATATYPE_struct = 15,
DATATYPE_union = 16,
DATATYPE_event = 17,
DATATYPE_opaque = 18,
DATATYPE_unknown = 19,
DATATYPE_LAST = 20
} aclArgDataType_0_8;
// Enumerations of the valid memory types for pass by pointer
// kernel arguments
typedef enum _acl_memory_type_enum_0_8 {
PTR_MT_ERROR = 0, // Error
PTR_MT_GLOBAL = 1, // global buffer
PTR_MT_SCRATCH_EMU = 2, // SW emulated private memory
PTR_MT_LDS_EMU = 3, // SW emulated local memory
PTR_MT_UAV = 4, // uniformed access vector memory
PTR_MT_CONSTANT_EMU = 5, // SW emulated constant memory
PTR_MT_GDS_EMU = 6, // SW emulated region memory
PTR_MT_LDS = 7, // HW local memory
PTR_MT_SCRATCH = 8, // HW private memory
PTR_MT_CONSTANT = 9, // HW constant memory
PTR_MT_GDS = 10, // HW region memory
PTR_MT_UAV_SCRATCH = 11, // SI and later HW private memory
PTR_MT_UAV_CONSTANT = 12, // SI and later HW constant memory
PTR_MT_LAST = 13
} aclMemoryType_0_8;
// Enumeration that specifies the various access types for a pointer/image.
typedef enum _acl_access_type_enum_0_8 {
ACCESS_TYPE_ERROR = 0,
ACCESS_TYPE_RO = 1,
ACCESS_TYPE_WO = 2,
ACCESS_TYPE_RW = 3,
ACCESS_TYPE_LAST = 4
} aclAccessType_0_8;
// Enumeration that specifies the binary types.
typedef enum _acl_binary_image_type_enum_0_8 {
BINARY_TYPE_ELF = 1,
BINARY_TYPE_LLVM = 2,
BINARY_TYPE_SPIRV = 4,
} aclBinaryImageType_0_8;
#endif // _ACL_ENUMS_0_8_H_
@@ -1,157 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_FUNCTORS_0_8_H_
#define _ACL_FUNCTORS_0_8_H_
//! Callback for the log function function pointer that many
// API calls take to have the calling application receive
// information on what errors occur.
typedef void (*aclLogFunction_0_8)(const char* msg, size_t size);
typedef bool (*aclJITSymbolCallback)(const char*, const void*, void*);
typedef void* aclJITObjectImage;
typedef const void* constAclJITObjectImage;
typedef acl_error(ACL_API_ENTRY* InsertSec_0_8)(aclCompiler* cl, aclBinary* binary,
const void* data, size_t data_size,
aclSections id) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* InsertSym_0_8)(aclCompiler* cl, aclBinary* binary,
const void* data, size_t data_size, aclSections id,
const char* symbol) ACL_API_0_8;
typedef const void*(ACL_API_ENTRY* ExtractSec_0_8)(aclCompiler* cl, const aclBinary* binary,
size_t* size, aclSections id,
acl_error* error_code)ACL_API_0_8;
typedef const void*(ACL_API_ENTRY* ExtractSym_0_8)(aclCompiler* cl, const aclBinary* binary,
size_t* size, aclSections id, const char* symbol,
acl_error* error_code)ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* RemoveSec_0_8)(aclCompiler* cl, aclBinary* binary,
aclSections id) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* RemoveSym_0_8)(aclCompiler* cl, aclBinary* binary, aclSections id,
const char* symbol) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* QueryInfo_0_8)(aclCompiler* cl, const aclBinary* binary,
aclQueryType query, const char* kernel,
void* data_ptr, size_t* ptr_size) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* AddDbgArg_0_8)(aclCompiler* cl, aclBinary* bin, const char* kernel,
const char* name, bool byVal) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* RemoveDbgArg_0_8)(aclCompiler* cl, aclBinary* bin,
const char* kernel,
const char* name) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* Compile_0_8)(aclCompiler* cl, aclBinary* bin, const char* options,
aclType from, aclType to,
aclLogFunction_0_8 compile_callback) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* Link_0_8)(aclCompiler* cl, aclBinary* src_bin,
unsigned int num_libs, aclBinary** libs,
aclType link_mode, const char* options,
aclLogFunction_0_8 link_callback) ACL_API_0_8;
typedef const char*(ACL_API_ENTRY* CompLog_0_8)(aclCompiler* cl)ACL_API_0_8;
typedef const void*(ACL_API_ENTRY* RetrieveType_0_8)(aclCompiler* cl, const aclBinary* bin,
const char* name, size_t* data_size,
aclType type,
acl_error* error_code)ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* SetType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name,
aclType type, const void* data,
size_t size) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* ConvertType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name,
aclType type) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* Disassemble_0_8)(aclCompiler* cl, aclBinary* bin,
const char* kernel,
aclLogFunction_0_8 disasm_callback) ACL_API_0_8;
typedef const void*(ACL_API_ENTRY* GetDevBinary_0_8)(aclCompiler* cl, const aclBinary* bin,
const char* kernel, size_t* size,
acl_error* error_code)ACL_API_0_8;
typedef aclLoaderData*(ACL_API_ENTRY* LoaderInit_0_8)(aclCompiler* cl, aclBinary* bin,
aclLogFunction_0_8 callback,
acl_error* error);
typedef acl_error(ACL_API_ENTRY* LoaderFini_0_8)(aclLoaderData* data);
typedef aclModule*(ACL_API_ENTRY* FEToIR_0_8)(aclLoaderData* ald, const char* source,
size_t data_size, aclContext* ctx,
acl_error* error)ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* SourceToISA_0_8)(aclLoaderData* ald, const char* source,
size_t data_size) ACL_API_0_8;
typedef aclModule*(ACL_API_ENTRY* IRPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx,
acl_error* error)ACL_API_0_8;
typedef aclModule*(ACL_API_ENTRY* LinkPhase_0_8)(aclLoaderData* data, aclModule* ir,
unsigned int num_libs, aclModule** libs,
aclContext* ctx, acl_error* error)ACL_API_0_8;
typedef const void*(ACL_API_ENTRY* CGPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx,
acl_error* error)ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* DisasmISA_0_8)(aclLoaderData* data, const char* kernel,
const void* isa_code, size_t isa_size) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* SetupLoaderObject_0_8)(aclCompiler* cl) ACL_API_0_8;
typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCreate_0_8)(
const void* buffer, size_t length, aclBinary* bin, acl_error* error_code) ACL_API_0_8;
typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCopy_0_8)(const void* buffer, size_t length,
acl_error* error_code) ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* JITObjectImageDestroy_0_8)(aclJITObjectImage image) ACL_API_0_8;
typedef size_t(ACL_API_ENTRY* JITObjectImageSize_0_8)(aclJITObjectImage image,
acl_error* error_code) ACL_API_0_8;
typedef const char*(ACL_API_ENTRY* JITObjectImageData_0_8)(aclJITObjectImage image,
acl_error* error_code)ACL_API_0_8;
typedef acl_error(ACL_API_ENTRY* JITObjectImageFinalize_0_8)(aclJITObjectImage image) ACL_API_0_8;
typedef size_t(ACL_API_ENTRY* JITObjectImageGetGlobalsSize_0_8)(aclJITObjectImage image,
acl_error* error_code) ACL_API_0_8;
typedef bool (*JITSymbolCallback_0_8)(const char*, const void*, void*);
typedef acl_error(ACL_API_ENTRY* JITObjectImageIterateSymbols_0_8)(
aclJITObjectImage image, JITSymbolCallback_0_8 jit_callback, void* data) ACL_API_0_8;
typedef char*(ACL_API_ENTRY* JITObjectImageDisassembleKernel_0_8)(constAclJITObjectImage image,
const char* kernel,
acl_error* error_code)ACL_API_0_8;
typedef void* (*AllocFunc_0_8)(size_t size)ACL_API_0_8;
typedef void (*FreeFunc_0_8)(void* ptr) ACL_API_0_8;
#endif // _ACL_FUNCTORS_0_8_H_
@@ -1,365 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_STRUCTS_0_8_H_
#define _ACL_STRUCTS_0_8_H_
#define ACL_STRUCT_HEADER size_t struct_size
//! A structure that holds information on the various types of arguments
// The format in memory of this structure is
// -------------
// | aclArgData |
// -------------
// |->argStr |
// -------------
// |->typeStr |
// -------------
typedef struct _acl_md_arg_type_0_8 {
ACL_STRUCT_HEADER;
size_t argNameSize;
size_t typeStrSize;
const char* argStr;
const char* typeStr;
union {
struct { // Struct for sampler arguments
unsigned ID;
unsigned isKernelDefined;
unsigned value;
} sampler;
struct { // Struct for image arguments
unsigned resID;
unsigned cbNum;
unsigned cbOffset;
aclAccessType type;
bool is2D;
bool is1D;
bool isArray;
bool isBuffer;
} image;
struct { // struct for atomic counter arguments
unsigned is32bit;
unsigned resID;
unsigned cbNum;
unsigned cbOffset;
} counter;
struct { // struct for semaphore arguments
unsigned resID;
unsigned cbNum;
unsigned cbOffset;
} sema;
struct { // struct for pass by value arguments
unsigned numElements;
unsigned cbNum;
unsigned cbOffset;
aclArgDataType data;
} value;
struct { // struct for pass by pointer arguments
unsigned numElements;
unsigned cbNum;
unsigned cbOffset;
unsigned bufNum;
unsigned align;
aclArgDataType data;
aclMemoryType memory;
aclAccessType type;
bool isVolatile;
bool isRestrict;
bool isPipe;
} pointer;
struct { // Struct for queue arguments
unsigned numElements;
unsigned cbNum;
unsigned cbOffset;
aclArgDataType data;
aclMemoryType memory;
} queue;
} arg;
aclArgType type;
bool isConst;
} aclArgData_0_8;
//! A structure that holds information for printf
// The format in memory of this structure is
// --------------
// | aclPrintfFmt|
// --------------
// |->argSizes |
// --------------
// |->fmrStr |
// --------------
typedef struct _acl_md_printf_fmt_0_8 {
ACL_STRUCT_HEADER;
unsigned ID;
size_t numSizes;
size_t fmtStrSize;
uint32_t* argSizes;
const char* fmtStr;
} aclPrintfFmt_0_8;
//! A structure that holds the metadata in the RODATA section.
typedef struct _acl_metadata_0_8 {
ACL_STRUCT_HEADER; // This holds the size of the structure itself for versioning.
size_t data_size; // This holds the size of all the memory allocated for this structure.
uint32_t major, minor, revision; // RT_ABI_VERSION
uint32_t gpuCaps; // RT_GPU_FUNC_CAPS
uint32_t funcID; // RT_GPU_FUNC_ID
uint32_t gpuRes[5]; // RT_GPU_DEFAULT_ID
size_t wgs[3]; // RT_WORK_GROUP_SIZE
uint32_t wrs[3]; // RT_WORK_REGION_SIZE
size_t kernelNameSize;
size_t deviceNameSize;
size_t mem[6]; // RT_MEM_SIZES
size_t numArgs;
size_t numPrintf;
aclArgData_0_8* args; // RT_ARGUMENT_ARRAY
aclPrintfFmt_0_8* printf; // RT_GPU_PRINTF_ARRAY
const char* kernelName; // RT_KERNEL_NAME
const char* deviceName; // RT_DEVICE_NAME
bool enqueue_kernel; // RT_DEVICE_ENQUEUE
uint32_t kernel_index; // RT_KERNEL_INDEX
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT
size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT
size_t vecTypeHintSize;
const char* vth; // RT_VEC_TYPE_HINT
} aclMetadata_0_8;
//! An structure that holds information on the capabilities of the bif device.
typedef struct _acl_device_caps_rec_0_8 {
ACL_STRUCT_HEADER;
uint32_t flags[4];
uint32_t encryptCode;
} aclDevCaps_0_8;
//! Structure that holds information on the target that the source is
// being compiled for.
typedef struct _acl_target_info_rec_0_8 {
ACL_STRUCT_HEADER;
aclDevType arch_id; // An identifier for the architecture.
uint32_t chip_id; // A identifier for the chip.
} aclTargetInfo_0_8;
// Structure for the version 0.8 of the structure.
typedef struct _acl_binary_opts_rec_0_8 {
ACL_STRUCT_HEADER;
uint32_t elfclass;
uint32_t bitness;
const char* temp_file;
uint32_t kernelArgAlign;
} aclBinaryOptions_0_8;
// Structure for the version 0.8.1 of the structure.
// This versions addes in alloc/dealloc functions.
typedef struct _acl_binary_opts_rec_0_8_1 {
ACL_STRUCT_HEADER;
uint32_t elfclass;
uint32_t bitness;
const char* temp_file;
uint32_t kernelArgAlign;
AllocFunc_0_8 alloc;
FreeFunc_0_8 dealloc;
} aclBinaryOptions_0_8_1;
//! Structure that holds the OpenCL binary information.
typedef struct _acl_bif_rec_0_8 {
ACL_STRUCT_HEADER;
aclTargetInfo_0_8 target; // Information about the target device.
aclBIF* bin; // Pointer to the acl.
aclOptions* options; // Pointer to acl options.
aclBinaryOptions_0_8 binOpts; // Pointer to the binary options.
aclDevCaps_0_8 caps; // Capabilities of the BIF.
} aclBinary_0_8;
//! Version of the aclBinary that uses the 0_8_1 version of the aclBinaryOptions.
typedef struct _acl_bif_rec_0_8_1 {
ACL_STRUCT_HEADER;
aclTargetInfo_0_8 target; // Information about the target device.
aclBIF* bin; // Pointer to the acl.
aclOptions* options; // Pointer to acl options.
aclBinaryOptions_0_8_1 binOpts; // Pointer to the binary options.
aclDevCaps_0_8 caps; // Capabilities of the BIF.
} aclBinary_0_8_1;
#define ACL_LOADER_COMMON \
ACL_STRUCT_HEADER; \
bool isBuiltin; \
const char* libName; \
void* handle; \
LoaderInit init; \
LoaderFini fini;
// Struct that maps to the common structure between all loaders.
typedef struct _acl_common_loader_rec_0_8 {
ACL_LOADER_COMMON;
} aclCommonLoader_0_8;
typedef struct _acl_cl_loader_rec_0_8 {
ACL_LOADER_COMMON;
Compile compile;
Link link;
CompLog getLog;
RetrieveType_0_8 retrieveType;
SetType_0_8 setType;
ConvertType_0_8 convertType;
Disassemble disassemble;
GetDevBinary_0_8 devBinary;
InsertSec insSec;
ExtractSec extSec;
RemoveSec remSec;
InsertSym insSym;
ExtractSym extSym;
RemoveSym remSym;
QueryInfo getInfo;
AddDbgArg addDbg;
RemoveDbgArg removeDbg;
SetupLoaderObject setupLoaderObject;
JITObjectImageCreate jitOICreate;
JITObjectImageCopy jitOICopy;
JITObjectImageDestroy jitOIDestroy;
JITObjectImageSize jitOISize;
JITObjectImageData jitOIData;
JITObjectImageFinalize jitOIFinalize;
JITObjectImageGetGlobalsSize jitOIGlobalSize;
JITObjectImageIterateSymbols jitOIIterateSymbols;
JITObjectImageDisassembleKernel jitOIDisassembleKernel;
} aclCLLoader_0_8;
//! Structure that holds the required functions
// that sc exports for the SCDLL infrastructure.
typedef struct _acl_sc_loader_rec_0_8 {
ACL_LOADER_COMMON;
uint32_t /*SC_UINT32*/ sc_interface_version;
void /**SC_EXPORT_FUNCTIONS**/* scef;
// Any version specific fields go here.
} aclSCLoader_0_8;
typedef struct _acl_fe_loader_rec_0_8 {
ACL_LOADER_COMMON;
FEToIR toIR; // Used for Source to aclModule containing LLVMIR
FEToIR toModule; // Used to convert raw SPIR/LLVM-IR to aclModule
SourceToISA toISA; // Used for Source to ISA
} aclFELoader_0_8;
typedef struct _acl_opt_loader_rec_0_8 {
ACL_LOADER_COMMON;
IRPhase optimize; // Used for IR to IR transformation
} aclOptLoader_0_8;
typedef struct _acl_link_loader_rec_0_8 {
ACL_LOADER_COMMON;
LinkPhase link; // Used for Linking in IR modules
IRPhase toLLVMIR; // Used for converting SPIR to LLVMIR
IRPhase toSPIR; // Used for converting LLVMIR to SPIR
} aclLinkLoader_0_8;
typedef struct _acl_cg_loader_rec_0_8 {
ACL_LOADER_COMMON;
CGPhase codegen; // Used for converting from LLVMIR to target ASM.
} aclCGLoader_0_8;
typedef struct _acl_be_loader_rec_0_8 {
ACL_LOADER_COMMON;
SourceToISA finalize; // Used for converting from target source to target ISA.
SourceToISA assemble; // Used for converting from target text to target binary.
DisasmISA disassemble; // Used for converting from target binary to target ISA.
} aclBELoader_0_8;
typedef struct _acl_compiler_opts_rec_0_8 {
ACL_STRUCT_HEADER; // Size of the structure for version checking.
const char* clLib;
const char* feLib;
const char* optLib;
const char* linkLib;
const char* cgLib;
const char* beLib;
const char* scLib;
} aclCompilerOptions_0_8;
typedef struct _acl_compiler_opts_rec_0_8_1 {
ACL_STRUCT_HEADER; // Size of the structure for version checking.
const char* clLib;
const char* feLib;
const char* optLib;
const char* linkLib;
const char* cgLib;
const char* beLib;
const char* scLib; // Name or path to the shader compiler shared library
AllocFunc alloc;
FreeFunc dealloc;
} aclCompilerOptions_0_8_1;
//! Structure that holds the OpenCL compiler and various loaders.
typedef struct _acl_compiler_rec_0_8 {
ACL_STRUCT_HEADER; // Size of structure for version checking.
aclCLLoader clAPI; // Pointer to the compiler API.
aclFELoader feAPI; // Pointer to the FE Loader API.
aclOptLoader optAPI; // Pointer to the Opt Loader API.
aclLinkLoader linkAPI; // Pointer to the Link Loader API.
aclCGLoader cgAPI; // Pointer to the CG Loader API.
aclBELoader beAPI; // Pointer to the BE Loader API.
aclSCLoader scAPI; // Pointer to the SC Loader API.
aclCompilerOptions* opts; // The options structure for the compiler.
void* llvm_shutdown; // Pointer to the llvm shutdown object.
char* buildLog; // Pointer to the current build log.
unsigned logSize; // Size of the current build log.
aclLoaderData* apiData; // pointer to data store for the compiler API loader.
} aclCompilerHandle_0_8;
//! Structure that holds the OpenCL compiler and various loaders.
typedef struct _acl_compiler_rec_0_8_1 {
ACL_STRUCT_HEADER;
aclCLLoader clAPI; // Pointer to the compiler API.
aclFELoader feAPI; // Pointer to the FE Loader API.
aclOptLoader optAPI; // Pointer to the Opt Loader API.
aclLinkLoader linkAPI; // Pointer to the Link Loader API.
aclCGLoader cgAPI; // Pointer to the CG Loader API.
aclBELoader beAPI; // Pointer to the BE Loader API.
aclSCLoader scAPI; // Pointer to the SC Loader API.
AllocFunc alloc;
FreeFunc dealloc;
aclCompilerOptions* opts; // The options structure for the compiler.
void* llvm_shutdown; // Pointer to the llvm shutdown object.
char* buildLog; // Pointer to the current build log.
unsigned logSize; // Size of the current build log.
aclLoaderData* apiData; // pointer to data store for the compiler API loader.
} aclCompilerHandle_0_8_1;
//! Structure to hold kernel statistics obtained from kernel
typedef struct _acl_kernel_stats_0_8_1 {
unsigned int scratchRegs;
unsigned int scratchSize;
unsigned int availablevgprs;
unsigned int availablesgprs;
unsigned int usedvgprs;
unsigned int usedsgprs;
unsigned int availableldssize;
unsigned int usedldssize;
unsigned int availablestacksize;
unsigned int usedstacksize;
unsigned int wavefrontsize;
unsigned int wavefrontpersimd;
unsigned int threadsperworkgroup;
unsigned int reqdworkgroup_x;
unsigned int reqdworkgroup_y;
unsigned int reqdworkgroup_z;
} aclKernelStats;
#endif // _ACL_STRUCTS_0_8_H_
@@ -1,117 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _ACL_API_TYPES_0_8_H_
#define _ACL_API_TYPES_0_8_H_
#include "aclDefs.h"
#include <stdint.h>
#include <stddef.h>
// Typedefs that always point to the most recent versions of the objects.
typedef struct _acl_md_arg_type_0_8 aclArgData;
typedef struct _acl_md_printf_fmt_0_8 aclPrintfFmt;
typedef struct _acl_metadata_0_8 aclMetadata;
typedef struct _acl_device_caps_rec_0_8 aclDevCaps;
typedef struct _acl_target_info_rec_0_8 aclTargetInfo;
typedef struct _acl_bif_rec_0_8_1 aclBinary;
typedef struct _acl_binary_opts_rec_0_8_1 aclBinaryOptions;
typedef struct _acl_compiler_rec_0_8_1 aclCompiler;
typedef struct _acl_compiler_opts_rec_0_8_1 aclCompilerOptions;
typedef struct _acl_options_0_8* aclOptions; // Opaque pointer to amd::Options
typedef struct _acl_binary_0_8* aclBIF; // Opaque pointer to bifbase
typedef struct _acl_common_loader_rec_0_8 aclCommonLoader;
typedef struct _acl_cl_loader_rec_0_8 aclCLLoader;
typedef struct _acl_sc_loader_rec_0_8 aclSCLoader;
typedef struct _acl_fe_loader_rec_0_8 aclFELoader;
typedef struct _acl_link_loader_rec_0_8 aclLinkLoader;
typedef struct _acl_opt_loader_rec_0_8 aclOptLoader;
typedef struct _acl_cg_loader_rec_0_8 aclCGLoader;
typedef struct _acl_be_loader_rec_0_8 aclBELoader;
typedef struct _acl_llvm_module_0_8* aclModule; // Opaque pointer to llvm::Module
typedef struct _acl_llvm_context_0_8* aclContext; // Opaque pointer to llvm::Context
typedef struct _acl_loader_data_0_8* aclLoaderData; // Opaque pointer to loader data
#include "aclEnums.h"
// Typedefs for enumerations
typedef enum _acl_error_enum_0_8 acl_error;
typedef enum _comp_device_caps_enum_0_8 compDeviceCaps;
typedef enum _comp_opt_settings_enum_0_8 compOptSettings;
typedef enum _acl_dev_type_enum_0_8 aclDevType;
typedef enum _acl_cl_version_enum_0_8 aclCLVersion;
typedef enum _acl_type_enum_0_8 aclType;
typedef enum _rt_query_types_enum_0_8 aclQueryType;
typedef enum _rt_gpu_caps_enum_0_8 aclGPUCaps;
typedef enum _rt_gpu_resource_enum_0_8 aclGPUResource;
typedef enum _rt_gpu_mem_sizes_enum_0_8 aclGPUMemSizes;
typedef enum _acl_arg_type_enum_0_8 aclArgType;
typedef enum _acl_data_type_enum_0_8 aclArgDataType;
typedef enum _acl_memory_type_enum_0_8 aclMemoryType;
typedef enum _acl_access_type_enum_0_8 aclAccessType;
typedef enum _bif_version_enum_0_8 aclBIFVersion;
typedef enum _bif_platform_enum_0_8 aclPlatform;
typedef enum _bif_sections_enum_0_8 aclSections;
typedef enum _acl_loader_type_enum_0_8 aclLoaderType;
typedef enum _acl_binary_image_type_enum_0_8 aclBinaryImageType;
#include "aclFunctors.h"
// Typedefs for function pointers
typedef aclLogFunction_0_8 aclLogFunction;
typedef InsertSec_0_8 InsertSec;
typedef RemoveSec_0_8 RemoveSec;
typedef ExtractSec_0_8 ExtractSec;
typedef InsertSym_0_8 InsertSym;
typedef RemoveSym_0_8 RemoveSym;
typedef ExtractSym_0_8 ExtractSym;
typedef QueryInfo_0_8 QueryInfo;
typedef Compile_0_8 Compile;
typedef Link_0_8 Link;
typedef AddDbgArg_0_8 AddDbgArg;
typedef RemoveDbgArg_0_8 RemoveDbgArg;
typedef SetupLoaderObject_0_8 SetupLoaderObject;
typedef CompLog_0_8 CompLog;
typedef RetrieveType_0_8 RetrieveType;
typedef SetType_0_8 SetType;
typedef ConvertType_0_8 ConvertType;
typedef Disassemble_0_8 Disassemble;
typedef GetDevBinary_0_8 GetDevBinary;
typedef LoaderInit_0_8 LoaderInit;
typedef LoaderFini_0_8 LoaderFini;
typedef FEToIR_0_8 FEToIR;
typedef SourceToISA_0_8 SourceToISA;
typedef IRPhase_0_8 IRPhase;
typedef LinkPhase_0_8 LinkPhase;
typedef CGPhase_0_8 CGPhase;
typedef DisasmISA_0_8 DisasmISA;
typedef AllocFunc_0_8 AllocFunc;
typedef FreeFunc_0_8 FreeFunc;
typedef JITObjectImageCreate_0_8 JITObjectImageCreate;
typedef JITObjectImageCopy_0_8 JITObjectImageCopy;
typedef JITObjectImageDestroy_0_8 JITObjectImageDestroy;
typedef JITObjectImageSize_0_8 JITObjectImageSize;
typedef JITObjectImageData_0_8 JITObjectImageData;
typedef JITObjectImageFinalize_0_8 JITObjectImageFinalize;
typedef JITObjectImageGetGlobalsSize_0_8 JITObjectImageGetGlobalsSize;
typedef JITSymbolCallback_0_8 JITSymbolCallback;
typedef JITObjectImageIterateSymbols_0_8 JITObjectImageIterateSymbols;
typedef JITObjectImageDisassembleKernel_0_8 JITObjectImageDisassembleKernel;
#include "aclStructs.h"
#endif // _CL_API_TYPES_0_8_H_
@@ -1,29 +0,0 @@
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _COMPLIB_SPIRV_UTILS_H
#define _COMPLIB_SPIRV_UTILS_H
#include <cstddef>
bool validateSPIRV(const void* image, size_t length);
bool isSPIRVMagic(const void* image, size_t length);
#endif
@@ -1,230 +0,0 @@
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _CL_UTILS_BIF_SECTION_LABELS_HPP_
#define _CL_UTILS_BIF_SECTION_LABELS_HPP_
#ifdef __cplusplus
extern "C" {
#endif
namespace bif {
const unsigned PRE = 0;
const unsigned POST = 1;
} // namespace bif
typedef enum {
symOpenclCompilerOptions,
symAMDILCompilerOptions,
symHSACompilerOptions,
symOpenclLinkerOptions,
symOpenclMeta,
symOpenclKernel,
symOpenclStub,
symOpenclGlobal,
symISAMeta,
symISABinary,
symAMDILText,
symAMDILBinary,
symHSAILText,
symBRIG,
symAMDILFMeta,
symISAText,
symBRIGxxx1,
symBRIGxxx2,
symBRIGxxx3,
symX86Barrier,
symAMDILHeader,
symDebugInfo,
symDebugilText,
symDebugilBinary,
symAsmText,
symDLL,
symLast,
symKernelStats,
symBRIGLoaderMap
} oclBIFSymbolID;
struct oclBIFSymbolStruct {
oclBIFSymbolID id;
// pre/post fix of the symbol string
const char* str[2];
// the BIF section that the symbol is stored for GPU/CPU
aclSections sections[2];
};
// TODO: analyze the changes since 30 and remove unused anymore symbols,
// for example, symISAMeta, update convert functions, check backward compatibility.
// These are the symbols that are defined by the BIF 3.1 spec
static constexpr oclBIFSymbolStruct BIF31[28] = {
// 0: BIF 3.0 compiler options, .comment section via library support.
{symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}},
// 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil.
{symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
// 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail.
{symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
// 3: BIF 3.0 linker options, .comment section via library support.
{symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}},
// 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU,
// .rodata section via -fbin-exe for GPU
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}},
// 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg.
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}},
// 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg.
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}},
// 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe.
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}},
// 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe.
{symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}},
// 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe.
{symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}},
// 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil.
{symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}},
// 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil.
{symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}},
// 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail.
{symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}},
// 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail.
{symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}},
// 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil.
{symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}},
// 15: BIF 3.0 per kernel ISA text, .internal section via disassembly.
{symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}},
// 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig.
{symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}},
// 17: Unused after changes in HSAIL PRM
{symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}},
// 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig.
{symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}},
// 19: BIF 3.0 per kernel barrier metadata, only valid for X86.
{symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}},
// 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x)
{symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}},
// 21: BIF 3.0 HSA BRIG or ISA debug info
{symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}},
// 22: BIF 3.0 debugil text, .internal section via -g
{symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}},
// 23: BIF 3.0 debugil binary, .internal section, can be converted from
// __debugil_text
{symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}},
{symAsmText, {"", ""}, {aclLAST, aclCODEGEN}},
{symDLL, {"", ""}, {aclLAST, aclTEXT}},
// 26: BIF 3.0 HSAIL kernel statistics
{symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}},
// 27: BIF 3.0 BRIG loader map
{symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}},
}; // BIF31
// These are the symbols that are defined by the BIF 3.0 spec
static constexpr oclBIFSymbolStruct BIF30[28] = {
// 0: BIF 3.0 compiler options, .comment section via library support.
{symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}},
// 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil.
{symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
// 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail.
{symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
// 3: BIF 3.0 linker options, .comment section via library support.
{symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}},
// 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU,
// .rodata section via -fbin-exe for GPU
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}},
// 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg.
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}},
// 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg.
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}},
// 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe.
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}},
// 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe.
{symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}},
// 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe.
{symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}},
// 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil.
{symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}},
// 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil.
{symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}},
// 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail.
{symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}},
// 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail.
{symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}},
// 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil.
{symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}},
// 15: BIF 3.0 per kernel ISA text, .internal section via disassembly.
{symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}},
// 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig.
{symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}},
// 17: Unused after changes in HSAIL PRM
{symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}},
// 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig.
{symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}},
// 19: BIF 3.0 per kernel barrier metadata, only valid for X86.
{symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}},
// 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x)
{symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}},
// 21: BIF 3.0 HSA BRIG or ISA debug info
{symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}},
// 22: BIF 3.0 debugil text, .internal section via -g
{symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}},
// 23: BIF 3.0 debugil binary, .internal section, can be converted from
// __debugil_text
{symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}},
{symAsmText, {"", ""}, {aclLAST, aclCODEGEN}},
{symDLL, {"", ""}, {aclLAST, aclTEXT}},
// 26: BIF 3.0 HSAIL kernel statistics
{symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}},
// 27: BIF 3.0 BRIG loader map
{symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}},
}; // BIF30
// These are the sections that are defined by the BIF 2.0 spec
static constexpr oclBIFSymbolStruct BIF20[13] = {
{symOpenclCompilerOptions, {"__OpenCL_compile_options", ""}, {aclCOMMENT, aclCOMMENT}},
{symOpenclLinkerOptions, {"__OpenCL_linker_options", ""}, {aclCOMMENT, aclCOMMENT}},
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclDLL}},
{symISABinary, {"__OpenCL_", "_kernel"}, {aclCAL, aclLAST}},
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclDLL}},
{symAMDILHeader, {"__OpenCL_", "_header"}, {aclRODATA, aclLAST}},
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclLAST}},
{symAMDILText, {"__OpenCL_", "_amdil"}, {aclILTEXT, aclLAST}},
{symAMDILFMeta, {"__OpenCL_", "_fmetadata"}, {aclRODATA, aclLAST}},
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclDLL}},
{symDebugilText, {"", ""}, {aclILDEBUG, aclLAST}},
{symAsmText, {"", ""}, {aclLAST, aclASTEXT}},
{symDLL, {"", ""}, {aclLAST, aclDLL}},
}; // BIF20
inline const oclBIFSymbolStruct* findBIFSymbolStruct(const oclBIFSymbolStruct* symbols,
size_t nSymbols, oclBIFSymbolID id) {
for (size_t i = 0; i < nSymbols; ++i) {
if (id == symbols[i].id) {
return &symbols[i];
}
}
return NULL;
}
inline const oclBIFSymbolStruct* findBIF30SymStruct(oclBIFSymbolID id) {
size_t nBIF30Symbol = sizeof(BIF30) / sizeof(oclBIFSymbolStruct);
return findBIFSymbolStruct(BIF30, nBIF30Symbol, id);
}
#ifdef __cplusplus
}
#endif
#endif // _CL_UTILS_BIF_SECTION_LABELS_HPP_
@@ -1,383 +0,0 @@
/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef _CL_LIB_UTILS_0_8_H_
#define _CL_LIB_UTILS_0_8_H_
#include "acl.h"
#include <string>
#include <sstream>
#include <iterator>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include "library.hpp"
#include "utils/bif_section_labels.hpp"
#include "utils/options.hpp"
using namespace bif;
// Utility function to set a flag in option structure
// of the aclDevCaps.
void setFlag(aclDevCaps* elf, compDeviceCaps option);
// Utility function to flip a flag in option structure
// of the aclDevCaps.
void flipFlag(aclDevCaps* elf, compDeviceCaps option);
// Utility function to clear a flag in option structure
// of the aclDevCaps.
void clearFlag(aclDevCaps* elf, compDeviceCaps option);
// Utility function to check that a flag in option structure
// of the aclDevCaps is set.
bool checkFlag(aclDevCaps* elf, compDeviceCaps option);
// Utility function to initialize and elf device capabilities
void initElfDeviceCaps(aclBinary* elf);
// Append the string to the aclCompiler log string.
void appendLogToCL(aclCompiler* cl, const std::string& logStr);
const char* getDeviceName(const aclTargetInfo& target);
// Select the correct library from the target information.
amd::LibrarySelector getLibraryType(const aclTargetInfo* target);
// get family_enum from the target information.
unsigned getFamilyEnum(const aclTargetInfo* target);
// get chip_enum from the target information.
unsigned getChipEnum(const aclTargetInfo* target);
// get isa type name (compute capability) from the target information.
const std::string& getIsaTypeName(const aclTargetInfo* target);
// get isa type (compute capability) from the target information.
int getIsaType(const aclTargetInfo* target);
// get Feature String for target.
std::string getFeatureString(const aclTargetInfo& target, amd::option::Options* OptionsObj);
// Create a copy of an ELF and duplicate all sections/symbols
aclBinary* createELFCopy(aclBinary* src);
// Create a BIF2.1 elf from a BIF 2.0 elf
aclBinary* convertBIF20ToBIF21(aclBinary* src);
// Create a BIF3.0 elf from a BIF 2.0 elf
aclBinary* convertBIF20ToBIF30(aclBinary* src);
// Create a BIF3.1 elf from a BIF 2.0 elf
aclBinary* convertBIF20ToBIF31(aclBinary* src);
// Create a BIF2.0 elf from a BIF 2.1 elf
aclBinary* convertBIF21ToBIF20(aclBinary* src);
// Create a BIF3.0 elf from a BIF 2.1 elf
aclBinary* convertBIF21ToBIF30(aclBinary* src);
// Create a BIF3.1 elf from a BIF 2.1 elf
aclBinary* convertBIF21ToBIF31(aclBinary* src);
// Create a BIF2.0 elf from a BIF 3.0 elf
aclBinary* convertBIF30ToBIF20(aclBinary* src);
// Create a BIF2.1 elf from a BIF 3.0 elf
aclBinary* convertBIF30ToBIF21(aclBinary* src);
// Create a BIF3.1 elf from a BIF 3.0 elf
aclBinary* convertBIF30ToBIF31(aclBinary* src);
// Create a BIF2.0 elf from a BIF 3.1 elf
aclBinary* convertBIF31ToBIF20(aclBinary* src);
// Create a BIF2.1 elf from a BIF 3.1 elf
aclBinary* convertBIF31ToBIF21(aclBinary* src);
// Create a BIF3.0 elf from a BIF 3.1 elf
aclBinary* convertBIF31ToBIF30(aclBinary* src);
// get a pointer to the aclBIF irrespective of the
// binary version.
aclBIF* aclutGetBIF(aclBinary*);
// Get a pointer to the aclOptions irrespective of
// the binary version.
aclOptions* aclutGetOptions(aclBinary*);
// Get a pointer to the aclBinaryOptions struct
// irrespective of the binary version.
aclBinaryOptions* aclutGetBinOpts(aclBinary*);
// Get a pointer to the target info struct
// irrespective of the binary version.
aclTargetInfo* aclutGetTargetInfo(aclBinary*);
// Get a pointer to the device caps
// irrespective of the binary version.
aclDevCaps* aclutGetCaps(aclBinary*);
// Copy two binary option structures irrespective
// of the binary version and uses defaults when
// things don't match up.
void aclutCopyBinOpts(aclBinaryOptions* dst, const aclBinaryOptions* src, bool is64bit);
// Retrieve kernel statistics from binary
// and insert to elf as symbol
acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*);
// Returns target chip name.
std::string aclutGetCodegenName(const aclTargetInfo& tgtInfo);
// Helper function that returns the
// allocation function from the binary.
AllocFunc aclutAlloc(const aclBinary* bin);
// Helper function that returns the
// de-allocation function from the binary.
FreeFunc aclutFree(const aclBinary* bin);
// Helper function that returns the
// allocation function from the compiler.
AllocFunc aclutAlloc(const aclCompiler* bin);
// Helper function that returns the
// de-allocation function from the compiler.
FreeFunc aclutFree(const aclCompiler* bin);
// Helper function that returns the
// allocation function from the compiler options.
AllocFunc aclutAlloc(const aclCompilerOptions* bin);
// Helper function that returns the
// de-allocation function from the compiler options.
FreeFunc aclutFree(const aclCompilerOptions* bin);
inline std::vector<std::string> splitSpaceSeparatedString(char* str) {
std::string s(str);
std::stringstream ss(s);
std::istream_iterator<std::string> beg(ss), end;
std::vector<std::string> vec(beg, end);
return vec;
}
// Helper function that returns OpenCL mangled kernel name.
inline std::string aclutOpenclMangledKernelName(const std::string& kernel_name) {
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclKernel);
assert(sym && "symbol not found");
return std::string("&") + sym->str[PRE] + kernel_name + sym->str[POST];
}
// Helper function that returns OpenCL mangled kernel metadata symbol name.
inline std::string aclutOpenclMangledKernelMetadataName(const std::string& kernel_name) {
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
assert(sym && "symbol not found");
return sym->str[PRE] + aclutOpenclMangledKernelName(kernel_name) + sym->str[POST];
}
#ifdef WITH_TARGET_HSAIL
// Helper function that updates metadata for all the kernels in binary;
// the updated attribute is the number of hidden kernel arguments.
inline acl_error aclutUpdateMetadataWithHiddenKernargsNum(aclCompiler* cl, aclBinary* bin,
uint32_t num) {
if (num == MAX_HIDDEN_KERNARGS_NUM) {
return ACL_SUCCESS;
}
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
assert(sym && "symbol not found");
aclSections secID = sym->sections[0];
size_t kernelNamesSize = 0;
acl_error error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
if (error_code != ACL_SUCCESS) {
return error_code;
}
char* kernelNames = new char[kernelNamesSize];
error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
if (error_code != ACL_SUCCESS) {
delete[] kernelNames;
return error_code;
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
delete[] kernelNames;
size_t roSize = 0;
for (auto it = vKernels.begin(); it != vKernels.end(); ++it) {
std::string symbol = aclutOpenclMangledKernelMetadataName(*it);
void* roSec =
const_cast<void*>(aclExtractSymbol(cl, bin, &roSize, secID, symbol.c_str(), &error_code));
if (error_code != ACL_SUCCESS) {
return error_code;
}
if (!roSec || roSize == 0) {
error_code = ACL_ELF_ERROR;
return error_code;
}
aclMetadata* md = reinterpret_cast<aclMetadata*>(roSec);
md->numHiddenKernelArgs = num;
error_code = aclRemoveSymbol(cl, bin, secID, symbol.c_str());
if (error_code != ACL_SUCCESS) {
return error_code;
}
error_code = aclInsertSymbol(cl, bin, md, roSize, secID, symbol.c_str());
if (error_code != ACL_SUCCESS) {
return error_code;
}
}
return error_code;
}
#endif
struct _target_mappings_rec;
typedef _target_mappings_rec TargetMapping;
// Returns the TargetMapping for the specific target device.
const TargetMapping& getTargetMapping(const aclTargetInfo& target);
inline bool is64BitTarget(const aclTargetInfo& target) {
return (target.arch_id == aclX64 || target.arch_id == aclAMDIL64 || target.arch_id == aclHSAIL64);
}
inline bool isCpuTarget(const aclTargetInfo& target) {
return (target.arch_id == aclX64 || target.arch_id == aclX86);
}
inline bool isGpuTarget(const aclTargetInfo& target) {
return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64 ||
target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64);
}
inline bool isAMDILTarget(const aclTargetInfo& target) {
return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64);
}
inline bool isHSAILTarget(const aclTargetInfo& target) {
return (target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64);
}
const std::string& getLegacyLibName();
inline bool isValidTarget(const aclTargetInfo& target) {
return (target.arch_id && target.chip_id);
}
bool isChipSupported(const aclTargetInfo& target);
enum scId {
SC_AMDIL = 0,
SC_HSAIL = 0,
SC_LAST,
};
// Helper function that allocates an aligned memory.
inline void* alignedMalloc(size_t size, size_t alignment) {
#if defined(_WIN32)
return ::_aligned_malloc(size, alignment);
#else
void* ptr = NULL;
if (0 == ::posix_memalign(&ptr, alignment, size)) {
return ptr;
}
return NULL;
#endif
}
// Helper function that frees an aligned memory.
inline void alignedFree(void* ptr) {
#if defined(_WIN32)
::_aligned_free(ptr);
#else
free(ptr);
#endif
}
#if defined(_WIN32)
inline void convertLongAbsFilePathIfNeeded(std::string& filename) {
if (filename.empty()) {
return;
}
std::wstring ws(filename.begin(), filename.end());
wchar_t abs_path[_MAX_ENV];
_wfullpath(abs_path, ws.c_str(), _MAX_ENV);
std::wstring ws_abs = std::wstring(abs_path);
if (ws_abs.size() >= _MAX_PATH) {
std::string s(ws_abs.begin(), ws_abs.end());
filename = "\\\\?\\" + s;
}
}
#endif
inline char* readFile(std::string source_filename, size_t& size) {
#if defined(_WIN32)
convertLongAbsFilePathIfNeeded(source_filename);
#endif
FILE* fp = ::fopen(source_filename.c_str(), "rb");
unsigned int length;
size_t offset = 0;
char* ptr;
if (!fp) {
return NULL;
}
// obtain file size
::fseek(fp, 0, SEEK_END);
length = ::ftell(fp);
::rewind(fp);
ptr = reinterpret_cast<char*>(::malloc(offset + length + 1));
if (length != fread(&ptr[offset], 1, length, fp)) {
::free(ptr);
::fclose(fp);
return NULL;
}
ptr[offset + length] = '\0';
size = offset + length;
::fclose(fp);
return ptr;
}
inline bool writeFile(std::string source_filename, const char* source, size_t size) {
#if defined(_WIN32)
convertLongAbsFilePathIfNeeded(source_filename);
#endif
FILE* fp = ::fopen(source_filename.c_str(), "wb");
if (!fp) {
return EXIT_FAILURE;
}
if (!::fwrite(source, size, 1, fp)) {
::fclose(fp);
return EXIT_FAILURE;
}
::fclose(fp);
return EXIT_SUCCESS;
}
#if !defined(BCMAG)
#define BCMAG "BC"
#define SBCMAG 2
#endif
// Helper predicate returns true if p starts with bit code signature.
// TODO: Move it into Compiler Lib back in new 1_0 API
inline static bool isBcMagic(const char* p) {
if (p == NULL || strncmp(p, BCMAG, SBCMAG) != 0) {
return false;
}
return true;
}
void dump(aclBinary* bin);
#endif // _CL_LIB_UTILS_0_8_H_
@@ -593,7 +593,7 @@ int getOptionDesc(std::string& options, size_t StartPos, bool IsShortForm, Optio
}
bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, bool IsPrefixOption,
bool IsOffFlag, bool IsLC) {
bool IsOffFlag) {
OptionVariables* ovars = Opts.oVariables;
OptionDescriptor* od = &OptDescTable[OptDescTableIx];
@@ -733,9 +733,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
Opts.clcOptions.append(" -D__FAST_RELAXED_MATH__=1");
Opts.clangOptions.push_back("-D__FAST_RELAXED_MATH__=1");
if (IsLC) { // w/a for SWDEV-116690
Opts.clangOptions.push_back("-cl-fast-relaxed-math");
}
Opts.clangOptions.push_back("-cl-fast-relaxed-math");
// fall-through to handle UnsafeMathOpt
case OID_UnsafeMathOpt:
@@ -861,10 +859,8 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
break;
case OID_OptUseNative:
if (IsLC) {
Opts.llvmOptions.append(" -mllvm -amdgpu-use-native=");
Opts.llvmOptions.append(sval);
}
Opts.llvmOptions.append(" -mllvm -amdgpu-use-native=");
Opts.llvmOptions.append(sval);
break;
case OID_WFComma:
@@ -886,9 +882,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
Opts.clangOptions.push_back(sval);
} else if (((OptionIdentifier)OptDescTableIx) == OID_WBComma) {
Opts.llvmOptions.append(" ");
if (IsLC) {
Opts.llvmOptions.append("-mllvm ");
}
Opts.llvmOptions.append("-mllvm ");
Opts.llvmOptions.append(sval);
} else if (((OptionIdentifier)OptDescTableIx) == OID_WHComma) {
Opts.finalizerOptions.push_back(sval);
@@ -953,7 +947,7 @@ namespace amd {
namespace option {
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC) {
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly) {
Opts.origOptionStr = options;
OptionVariables* ovars = Opts.oVariables;
OptionDescriptor* od = OptDescTable;
@@ -1084,8 +1078,7 @@ bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, boo
if (!(OPTION_info(od) & OA_RUNTIME)) continue;
}
if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno),
isLC)) {
if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno))) {
// Keep the optionsLog set in processOption().
std::string tmpStr("Invalid option: ");
tmpStr += options.substr(bpos, (pos == std::string::npos) ? pos : pos - bpos);
@@ -323,9 +323,9 @@ class Options {
OptionDescriptor* getOptDescTable();
bool init();
bool teardown();
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC);
inline bool parseLinkOptions(std::string& options, Options& Opts, bool isLC) {
return parseAllOptions(options, Opts, true /*linkOptsOnly*/, isLC);
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly);
inline bool parseLinkOptions(std::string& options, Options& Opts) {
return parseAllOptions(options, Opts, true /*linkOptsOnly*/);
}
-2
Ver fichero
@@ -18,7 +18,6 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#if defined(USE_COMGR_LIBRARY)
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "comgrctx.hpp"
@@ -129,4 +128,3 @@ bool Comgr::LoadLib(bool is_versioned) {
}
} // namespace amd
#endif
-2
Ver fichero
@@ -21,7 +21,6 @@
#pragma once
#include <mutex>
#if defined(USE_COMGR_LIBRARY)
#include "top.hpp"
#include "amd_comgr/amd_comgr.h"
@@ -446,4 +445,3 @@ class Comgr : public amd::AllStatic {
};
} // namespace amd
#endif
+3 -115
Ver fichero
@@ -51,12 +51,6 @@ extern void PalDeviceUnload();
#include "blowfish/oclcrypt.hpp"
#endif
#if defined(WITH_COMPILER_LIB)
#include "utils/bif_section_labels.hpp"
#include "utils/libUtils.h"
#include "spirv/spirvUtils.h"
#endif
#include <vector>
#include <string>
#include <cstring>
@@ -641,10 +635,6 @@ bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKe
// Build all kernels
std::string opt = "-cl-internal-kernel ";
if (!device->settings().useLightning_) {
opt += "-Wf,--force_disable_spir ";
}
if (!extraOptions.empty()) {
opt += extraOptions;
}
@@ -786,29 +776,10 @@ Device::~Device() {
}
bool Device::ValidateComgr() {
#if defined(USE_COMGR_LIBRARY)
// Check if Lightning compiler was requested
if (settings_->useLightning_) {
constexpr bool kComgrVersioned = false;
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned);
// Use Lightning only if it's available
settings_->useLightning_ = amd::Comgr::IsReady();
return settings_->useLightning_;
}
#endif
return true;
}
bool Device::ValidateHsail() {
#if defined(WITH_COMPILER_LIB)
// Check if HSAIL compiler was requested
if (!settings_->useLightning_) {
std::call_once(amd::Hsail::initialized, amd::Hsail::LoadLib);
// Use Hsail only if it's available
return amd::Hsail::IsReady();
}
#endif
return true;
constexpr bool kComgrVersioned = false;
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned);
return amd::Comgr::IsReady();
}
size_t GetMaxStackSize(const std::string& procName) {
@@ -1272,43 +1243,6 @@ bool ClBinary::setElfTarget() {
return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM);
}
#if defined(WITH_COMPILER_LIB)
std::string ClBinary::getBIFSymbol(unsigned int symbolID) const {
size_t nSymbols = 0;
// Due to PRE & POST defines in bif_section_labels.hpp conflict with
// PRE & POST struct members in sp3-si-chip-registers.h
// unable to include bif_section_labels.hpp in device.hpp
//! @todo: resolve conflict by renaming defines,
// then include bif_section_labels.hpp in device.hpp &
// use oclBIFSymbolID instead of unsigned int as a parameter
const oclBIFSymbolID symID = static_cast<oclBIFSymbolID>(symbolID);
switch (format_) {
case BIF_VERSION2: {
nSymbols = sizeof(BIF20) / sizeof(oclBIFSymbolStruct);
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
assert(symb && "BIF20 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
}
break;
}
case BIF_VERSION3: {
nSymbols = sizeof(BIF30) / sizeof(oclBIFSymbolStruct);
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
assert(symb && "BIF30 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
}
break;
}
default:
assert(0 && "unexpected BIF type");
return "";
}
return "";
}
#endif
void ClBinary::init(amd::option::Options* optionsObj) {
// option has higher priority than environment variable.
if ((flags_ & BinarySourceMask) != BinaryRemoveSource) {
@@ -1588,52 +1522,6 @@ bool ClBinary::loadLlvmBinary(std::string& llvmBinary,
return false;
}
bool ClBinary::loadCompileOptions(std::string& compileOptions) const {
char* options = nullptr;
size_t sz;
compileOptions.clear();
#if defined(WITH_COMPILER_LIB)
if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), &options,
&sz)) {
if (sz > 0) {
compileOptions.append(options, sz);
}
return true;
}
#endif
return false;
}
bool ClBinary::loadLinkOptions(std::string& linkOptions) const {
char* options = nullptr;
size_t sz;
linkOptions.clear();
#if defined(WITH_COMPILER_LIB)
if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), &options,
&sz)) {
if (sz > 0) {
linkOptions.append(options, sz);
}
return true;
}
#endif
return false;
}
void ClBinary::storeCompileOptions(const std::string& compileOptions) {
#if defined(WITH_COMPILER_LIB)
elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(),
compileOptions.c_str(), compileOptions.length());
#endif
}
void ClBinary::storeLinkOptions(const std::string& linkOptions) {
#if defined(WITH_COMPILER_LIB)
elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(),
linkOptions.c_str(), linkOptions.length());
#endif
}
bool ClBinary::isSPIR() const {
char* section = nullptr;
size_t sz = 0;
+2 -43
Ver fichero
@@ -34,9 +34,6 @@
#include "devprogram.hpp"
#include "devkernel.hpp"
#include "amdocl/cl_profile_amd.h"
#if defined(WITH_COMPILER_LIB)
#include "hsailctx.hpp"
#endif
#include "devsignal.hpp"
#if defined(__clang__)
@@ -692,8 +689,6 @@ class Settings : public amd::HeapObject {
// that replaces generic OS allocation routines
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device
uint useLightning_ : 1; //!< Enable LC path for this device
uint enableWgpMode_ : 1; //!< Enable WGP mode for this device
uint enableWave32Mode_ : 1; //!< Enable Wave32 mode for this device
uint lcWavefrontSize64_ : 1; //!< Enable Wave64 mode for this device
@@ -705,7 +700,7 @@ class Settings : public amd::HeapObject {
uint gwsInitSupported_ : 1; //!< Check if GWS is supported on this machine.
uint kernel_arg_opt_ : 1; //!< Enables kernel arg optimization for blit kernels
uint kernel_arg_impl_ : 2; //!< Kernel argument implementation
uint reserved_ : 12;
uint reserved_ : 14;
};
uint value_;
};
@@ -968,7 +963,7 @@ class Memory : public amd::HeapObject {
HostMemoryRegistered = 0x00000010, //!< Host memory was registered
MemoryCpuUncached = 0x00000020, //!< Memory is uncached on CPU access(slow read)
AllowedPeerAccess = 0x00000040, //!< Memory can be accessed from peer
PersistentMap = 0x00000080 //!< Map Peristent memory
PersistentMap = 0x00000080 //!< Map Persistent memory
};
uint flags_; //!< Memory object flags
@@ -1086,22 +1081,6 @@ class ClBinary : public amd::HeapObject {
amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format
) const;
//! Loads compile options from OCL binary file
bool loadCompileOptions(std::string& compileOptions //!< return the compile options loaded
) const;
//! Loads link options from OCL binary file
bool loadLinkOptions(std::string& linkOptions //!< return the link options loaded
) const;
//! Store compile options into OCL binary file
void storeCompileOptions(const std::string& compileOptions //!< the compile options to be stored
);
//! Store link options into OCL binary file
void storeLinkOptions(const std::string& linkOptions //!< the link options to be stored
);
//! Check if the binary is recompilable
bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform);
@@ -1165,12 +1144,6 @@ class ClBinary : public amd::HeapObject {
//! Returns TRUE if binary file was allocated
bool isBinaryAllocated() const { return (flags_ & BinaryAllocated) ? true : false; }
#if defined(WITH_COMPILER_LIB)
//! Returns BIF symbol name by symbolID,
//! returns empty string if not found or if BIF version is unsupported
std::string getBIFSymbol(unsigned int symbolID) const;
#endif
protected:
const amd::Device& dev_; //!< Device object
@@ -1377,10 +1350,7 @@ class VirtualDevice : public amd::HeapObject {
mutable std::atomic<uint64_t> queued_async_handlers_ = 0; //!< Outstanding HSA async handlers
};
#if defined(USE_COMGR_LIBRARY)
extern bool getValueFromIsaMeta(const std::string& isa, const char* key, std::string& retValue);
#endif
} // namespace amd::device
namespace amd {
@@ -1615,9 +1585,6 @@ class Isa {
*/
class Device : public RuntimeObject {
protected:
#if defined(WITH_COMPILER_LIB)
typedef aclCompiler Compiler;
#endif
public:
// The structures below for MGPU launch match the device library format
@@ -1692,11 +1659,6 @@ class Device : public RuntimeObject {
);
};
#if defined(WITH_COMPILER_LIB)
virtual Compiler* compiler() const = 0;
virtual Compiler* binCompiler() const { return compiler(); }
#endif
Device();
virtual ~Device();
@@ -2115,9 +2077,6 @@ class Device : public RuntimeObject {
//! Checks if OCL runtime can use code object manager for compilation
bool ValidateComgr();
//! Checks if OCL runtime can use hsail for compilation
bool ValidateHsail();
bool IpcCreate(void* dev_ptr, size_t* mem_size, char* handle, size_t* mem_offset) const;
bool IpcAttach(const char* handle, size_t mem_size, size_t mem_offset, unsigned int flags,
-467
Ver fichero
@@ -25,20 +25,12 @@
#include "devkernel.hpp"
#include "utils/macros.hpp"
#include "utils/options.hpp"
#if defined(WITH_COMPILER_LIB)
#include "utils/bif_section_labels.hpp"
#include "utils/libUtils.h"
#endif
#include "comgrctx.hpp"
#include <map>
#include <string>
#include <sstream>
#if defined(WITH_COMPILER_LIB)
#include "hsailctx.hpp"
#endif
namespace amd::device {
// ================================================================================================
@@ -51,8 +43,6 @@ static constexpr clk_value_type_t ClkValueMapType[6][6] = {
{T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16},
};
#if defined(USE_COMGR_LIBRARY)
// ================================================================================================
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
std::string* str) {
@@ -593,7 +583,6 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
return status;
}
#endif
// ================================================================================================
Kernel::Kernel(const amd::Device& dev, const std::string& name, const Program& prog)
@@ -674,15 +663,6 @@ bool Kernel::createSignature(const parameters_t& params, uint32_t numParameters,
// ================================================================================================
Kernel::~Kernel() { delete signature_; }
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
std::string Kernel::openclMangledName(const std::string& name) {
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
assert(bifSym && "symbol not found");
return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST];
}
#endif
// ================================================================================================
void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
amd::NDRange& lclWorkSize) const {
@@ -772,300 +752,6 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
}
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) {
if (argInfo->argStr[0] == '_' && argInfo->argStr[1] == '.') {
*isHidden = true;
if (strcmp(&argInfo->argStr[2], "global_offset_0") == 0) {
return amd::KernelParameterDescriptor::HiddenGlobalOffsetX;
} else if (strcmp(&argInfo->argStr[2], "global_offset_1") == 0) {
return amd::KernelParameterDescriptor::HiddenGlobalOffsetY;
} else if (strcmp(&argInfo->argStr[2], "global_offset_2") == 0) {
return amd::KernelParameterDescriptor::HiddenGlobalOffsetZ;
} else if (strcmp(&argInfo->argStr[2], "printf_buffer") == 0) {
return amd::KernelParameterDescriptor::HiddenPrintfBuffer;
} else if (strcmp(&argInfo->argStr[2], "hostcall_buffer") == 0) {
return amd::KernelParameterDescriptor::HiddenHostcallBuffer;
} else if (strcmp(&argInfo->argStr[2], "vqueue_pointer") == 0) {
return amd::KernelParameterDescriptor::HiddenDefaultQueue;
} else if (strcmp(&argInfo->argStr[2], "aqlwrap_pointer") == 0) {
return amd::KernelParameterDescriptor::HiddenCompletionAction;
}
return amd::KernelParameterDescriptor::HiddenNone;
}
switch (argInfo->type) {
case ARG_TYPE_POINTER:
return amd::KernelParameterDescriptor::MemoryObject;
case ARG_TYPE_QUEUE:
return amd::KernelParameterDescriptor::QueueObject;
case ARG_TYPE_VALUE:
return (argInfo->arg.value.data == DATATYPE_struct)
? amd::KernelParameterDescriptor::ReferenceObject
: amd::KernelParameterDescriptor::ValueObject;
case ARG_TYPE_IMAGE:
return amd::KernelParameterDescriptor::ImageObject;
case ARG_TYPE_SAMPLER:
return amd::KernelParameterDescriptor::SamplerObject;
case ARG_TYPE_ERROR:
default:
return amd::KernelParameterDescriptor::HiddenNone;
}
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) {
uint sizeType;
uint numElements;
if (argInfo->type == ARG_TYPE_QUEUE) {
return T_QUEUE;
} else if (argInfo->type == ARG_TYPE_POINTER || argInfo->type == ARG_TYPE_IMAGE) {
return T_POINTER;
} else if (argInfo->type == ARG_TYPE_VALUE) {
switch (argInfo->arg.value.data) {
case DATATYPE_i8:
case DATATYPE_u8:
sizeType = 0;
numElements = size;
break;
case DATATYPE_i16:
case DATATYPE_u16:
sizeType = 1;
numElements = size / 2;
break;
case DATATYPE_i32:
case DATATYPE_u32:
sizeType = 2;
numElements = size / 4;
break;
case DATATYPE_i64:
case DATATYPE_u64:
sizeType = 3;
numElements = size / 8;
break;
case DATATYPE_f16:
sizeType = 4;
numElements = size / 2;
break;
case DATATYPE_f32:
sizeType = 4;
numElements = size / 4;
break;
case DATATYPE_f64:
sizeType = 5;
numElements = size / 8;
break;
case DATATYPE_struct:
case DATATYPE_opaque:
case DATATYPE_ERROR:
default:
return T_VOID;
}
switch (numElements) {
case 1:
return ClkValueMapType[sizeType][0];
case 2:
return ClkValueMapType[sizeType][1];
case 3:
return ClkValueMapType[sizeType][2];
case 4:
return ClkValueMapType[sizeType][3];
case 8:
return ClkValueMapType[sizeType][4];
case 16:
return ClkValueMapType[sizeType][5];
default:
return T_VOID;
}
} else if (argInfo->type == ARG_TYPE_SAMPLER) {
return T_SAMPLER;
} else {
return T_VOID;
}
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
return sizeof(void*);
case ARG_TYPE_VALUE:
switch (argInfo->arg.value.data) {
case DATATYPE_i8:
case DATATYPE_u8:
return 1;
case DATATYPE_u16:
case DATATYPE_i16:
case DATATYPE_f16:
return 2;
case DATATYPE_u32:
case DATATYPE_i32:
case DATATYPE_f32:
return 4;
case DATATYPE_i64:
case DATATYPE_u64:
case DATATYPE_f64:
return 8;
case DATATYPE_struct:
return 128;
case DATATYPE_ERROR:
default:
return -1;
}
case ARG_TYPE_IMAGE:
return sizeof(cl_mem);
case ARG_TYPE_SAMPLER:
return sizeof(cl_sampler);
default:
return -1;
}
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
return argInfo->arg.pointer.align;
}
return 1;
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
return (argInfo->arg.pointer.type == ACCESS_TYPE_RO) ? true : false;
} else if (argInfo->type == ARG_TYPE_IMAGE) {
return (argInfo->arg.image.type == ACCESS_TYPE_RO) ? true : false;
}
return false;
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
inline static int GetArgSizeOCL(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
return sizeof(void*);
case ARG_TYPE_VALUE:
switch (argInfo->arg.value.data) {
case DATATYPE_i8:
case DATATYPE_u8:
case DATATYPE_struct:
return 1 * argInfo->arg.value.numElements;
case DATATYPE_u16:
case DATATYPE_i16:
case DATATYPE_f16:
return 2 * argInfo->arg.value.numElements;
case DATATYPE_u32:
case DATATYPE_i32:
case DATATYPE_f32:
return 4 * argInfo->arg.value.numElements;
case DATATYPE_i64:
case DATATYPE_u64:
case DATATYPE_f64:
return 8 * argInfo->arg.value.numElements;
case DATATYPE_ERROR:
default:
return -1;
}
case ARG_TYPE_IMAGE:
case ARG_TYPE_SAMPLER:
case ARG_TYPE_QUEUE:
return sizeof(void*);
default:
return -1;
}
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
switch (argInfo->arg.pointer.memory) {
case PTR_MT_UAV_CONSTANT:
case PTR_MT_CONSTANT_EMU:
case PTR_MT_CONSTANT:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
case PTR_MT_UAV:
case PTR_MT_GLOBAL:
case PTR_MT_SCRATCH_EMU:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
case PTR_MT_LDS_EMU:
case PTR_MT_LDS:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
case PTR_MT_ERROR:
default:
LogError("Unsupported address type");
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
} else if ((argInfo->type == ARG_TYPE_IMAGE) || (argInfo->type == ARG_TYPE_QUEUE)) {
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
}
// default for all other cases
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_IMAGE) {
switch (argInfo->arg.image.type) {
case ACCESS_TYPE_RO:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
case ACCESS_TYPE_WO:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
default:
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
}
}
return CL_KERNEL_ARG_ACCESS_NONE;
}
#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) {
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
if (argInfo->type == ARG_TYPE_POINTER) {
if (argInfo->arg.pointer.isVolatile) {
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
}
if (argInfo->arg.pointer.isRestrict) {
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
}
if (argInfo->arg.pointer.isPipe) {
rv |= CL_KERNEL_ARG_TYPE_PIPE;
}
if (argInfo->isConst) {
rv |= CL_KERNEL_ARG_TYPE_CONST;
}
switch (argInfo->arg.pointer.memory) {
case PTR_MT_CONSTANT:
case PTR_MT_UAV_CONSTANT:
case PTR_MT_CONSTANT_EMU:
rv |= CL_KERNEL_ARG_TYPE_CONST;
break;
default:
break;
}
}
return rv;
}
#endif
// ================================================================================================
#if defined(USE_COMGR_LIBRARY)
bool Kernel::GetAttrCodePropMetadata() {
amd_comgr_metadata_node_t kernelMetaNode;
if (!prog().getKernelMetadata(name(), &kernelMetaNode)) {
@@ -1313,89 +999,8 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
}
#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
// Iterate through the arguments and insert into parameterList
device::Kernel::parameters_t params;
device::Kernel::parameters_t hiddenParams;
amd::KernelParameterDescriptor desc;
size_t offset = 0;
size_t offsetStruct = argBufferSize;
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
size_t size = GetArgSizeOCL(aclArg);
size_t alignment = GetArgAlignmentOCL(aclArg);
bool isHidden = false;
desc.info_.oclObject_ = GetOclArgumentTypeOCL(aclArg, &isHidden);
// Allocate the hidden arguments, but abstraction layer will skip them
if (isHidden) {
offset = amd::alignUp(offset, alignment);
desc.offset_ = offset;
desc.size_ = size;
offset += size;
hiddenParams.push_back(desc);
continue;
}
desc.name_ = aclArg->argStr;
desc.typeName_ = aclArg->typeStr;
desc.type_ = GetOclTypeOCL(aclArg, size);
desc.addressQualifier_ = GetOclAddrQualOCL(aclArg);
desc.accessQualifier_ = GetOclAccessQualOCL(aclArg);
desc.typeQualifier_ = GetOclTypeQualOCL(aclArg);
desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(aclArg);
desc.size_ = size;
// Check if HSAIL expects data by reference and allocate it behind
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) {
desc.offset_ = offsetStruct;
// Align the offset reference
offset = amd::alignUp(offset, sizeof(size_t));
patchReferences_.insert({desc.offset_, offset});
offsetStruct += size;
// Adjust the offset of arguments
offset += sizeof(size_t);
} else {
// These objects have forced data size to uint64_t
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
offset = amd::alignUp(offset, sizeof(uint64_t));
desc.offset_ = offset;
offset += sizeof(uint64_t);
} else {
offset = amd::alignUp(offset, alignment);
desc.offset_ = offset;
offset += size;
}
}
// Update read only flag
desc.info_.readOnly_ = GetReadOnlyOCL(aclArg);
params.push_back(desc);
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
flags_.imageEna_ = true;
if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
flags_.imageWriteEna_ = true;
}
}
}
// Save the number of OCL arguments
uint32_t numParams = params.size();
// Append the hidden arguments to the OCL arguments
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
}
#endif
// ================================================================================================
#if defined(USE_COMGR_LIBRARY)
void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
size_t HIPPrintfInfoID = 0;
for (auto str : printfInfoStrings) {
@@ -1501,76 +1106,4 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
// ]
}
}
#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) {
uint index = 0, HIPIndex = 0;
for (; aclPrintf->struct_size != 0; aclPrintf++) {
if (amd::IS_HIP) {
index = HIPIndex++;
printf_.resize(HIPIndex);
} else {
index = aclPrintf->ID;
if (printf_.size() <= index) {
printf_.resize(index + 1);
}
}
PrintfInfo& info = printf_[index];
const std::string& pfmt = aclPrintf->fmtStr;
bool need_nl = true;
for (size_t pos = 0; pos < pfmt.size(); ++pos) {
char symbol = pfmt[pos];
need_nl = true;
if (symbol == '\\') {
switch (pfmt[pos + 1]) {
case 'a':
pos++;
symbol = '\a';
break;
case 'b':
pos++;
symbol = '\b';
break;
case 'f':
pos++;
symbol = '\f';
break;
case 'n':
pos++;
symbol = '\n';
need_nl = false;
break;
case 'r':
pos++;
symbol = '\r';
break;
case 'v':
pos++;
symbol = '\v';
break;
case '7':
if (pfmt[pos + 2] == '2') {
pos += 2;
symbol = '\72';
}
break;
default:
break;
}
}
info.fmtString_.push_back(symbol);
}
if (need_nl && !amd::IS_HIP) {
info.fmtString_ += "\n";
}
uint32_t* tmp_ptr = const_cast<uint32_t*>(aclPrintf->argSizes);
for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) {
info.arguments_.push_back(*tmp_ptr);
}
}
}
#endif // defined(WITH_COMPILER_LIB)
} // namespace amd::device
+1 -20
Ver fichero
@@ -20,9 +20,6 @@
#pragma once
#if defined(WITH_COMPILER_LIB)
#include "aclTypes.h"
#endif
#include "platform/context.hpp"
#include "platform/object.hpp"
#include "platform/memory.hpp"
@@ -98,7 +95,6 @@ struct KernelParameterDescriptor {
};
} // namespace amd
#if defined(USE_COMGR_LIBRARY)
//! Runtime handle structure for device enqueue
struct RuntimeHandle {
uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
@@ -174,8 +170,6 @@ enum class KernelField : uint8_t {
MaxSize = 18
};
#endif // defined(USE_COMGR_LIBRARY)
namespace amd {
namespace hsa {
namespace loader {
@@ -290,10 +284,6 @@ class Kernel : public amd::HeapObject {
//! Return the build log
const std::string& buildLog() const { return buildLog_; }
#if defined(WITH_COMPILER_LIB)
static std::string openclMangledName(const std::string& name);
#endif
const std::unordered_map<size_t, size_t>& patch() const { return patchReferences_; }
//! Returns TRUE if kernel uses dynamic parallelism
@@ -356,7 +346,6 @@ class Kernel : public amd::HeapObject {
protected:
//! Initializes the abstraction layer kernel parameters
#if defined(USE_COMGR_LIBRARY)
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
//! Retrieve kernel attribute and code properties metadata
@@ -372,13 +361,7 @@ class Kernel : public amd::HeapObject {
const uint32_t codeObjectVer() const { return prog().codeObjectVer(); }
//! Initializes HSAIL Printf metadata and info for LC
void InitPrintf(const std::vector<std::string>& printfInfoStrings);
#endif
#if defined(WITH_COMPILER_LIB)
void InitParameters(const aclArgData* aclArg, //!< List of ACL arguments
uint32_t argBufferSize);
//! Initializes HSAIL Printf metadata and info
void InitPrintf(const aclPrintfFmt* aclPrintf);
#endif
//! Returns program associated with this kernel
const Program& prog() const { return prog_; }
@@ -426,7 +409,5 @@ class Kernel : public amd::HeapObject {
KernelKind kind_{Normal}; //!< Kernel kind, is normal unless specified otherwise
};
#if defined(USE_COMGR_LIBRARY)
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string* str);
#endif // defined(USE_COMGR_LIBRARY)
} // namespace amd::device
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
+9 -82
Ver fichero
@@ -20,16 +20,10 @@
#pragma once
#if defined(WITH_COMPILER_LIB)
#include "aclTypes.h"
#endif
#include "platform/context.hpp"
#include "platform/object.hpp"
#include "platform/memory.hpp"
#if defined(USE_COMGR_LIBRARY)
#include "amd_comgr/amd_comgr.h"
#endif // defined(USE_COMGR_LIBRARY)
namespace amd {
namespace hsa {
@@ -120,16 +114,11 @@ class Program : public amd::HeapObject {
bool runInitFiniKernel(const std::vector<const Kernel*>& kernels) const;
#if defined(WITH_COMPILER_LIB)
static amd::Monitor buildLock_; //!< Global build lock for HSAIL which isn't thread-safe
#endif
protected:
union {
struct {
uint32_t isNull_ : 1; //!< Null program no memory allocations
uint32_t internal_ : 1; //!< Internal blit program
uint32_t isLC_ : 1; //!< LC was used for the program compilation
uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables
uint32_t isHIP_ : 1; //!< Determine if the program is for HIP
uint32_t coLoaded_ : 1; //!< Has the code objected been loaded
@@ -143,30 +132,20 @@ class Program : public amd::HeapObject {
amd::Elf::ElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format
std::string compileOptions_; //!< compile/build options.
std::string linkOptions_; //!< link options.
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
//! or clBuildProgram(), whichever is called last
#if defined(WITH_COMPILER_LIB)
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
aclBinary* binaryElf_; //!< Binary for the new compiler library
#endif
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
//!< or clBuildProgram(), whichever is called last
std::string lastBuildOptionsArg_;
mutable std::string buildLog_; //!< build log.
int32_t buildStatus_; //!< build status.
int32_t buildError_; //!< build error
#if defined(WITH_COMPILER_LIB)
aclTargetInfo info_; //!< The info target for this binary.
#endif
size_t globalVariableTotalSize_;
amd::option::Options* programOptions_;
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata
uint32_t codeObjectVer_; //!< version of code object
std::map<std::string, amd_comgr_metadata_node_t> kernelMetadataMap_; //!< Map of kernel metadata
#endif
//! Sanitizer lock - lock when launching init/fini kernels
static amd::Monitor initFiniLock_;
@@ -249,20 +228,12 @@ class Program : public amd::HeapObject {
size_t globalVariableTotalSize() const { return globalVariableTotalSize_; }
#if defined(WITH_COMPILER_LIB)
//! Returns the aclBinary associated with the program
aclBinary* binaryElf() const { return static_cast<aclBinary*>(binaryElf_); }
#endif
//! Returns TRUE if the program just compiled
bool isNull() const { return isNull_; }
//! Returns TRUE if the program used internally by runtime
bool isInternal() const { return internal_; }
//! Returns TRUE if Lightning compiler was used for this program
bool isLC() const { return isLC_; }
//! Global variables are a part of the code segment
bool hasGlobalStores() const { return hasGlobalStores_; }
@@ -272,7 +243,6 @@ class Program : public amd::HeapObject {
//! Returns TRUE if the program is a trap handler for debugger support
bool isTrapHandler() const { return trapHandler_; }
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t metadata() const { return metadata_; }
//! Get the kernel metadata
@@ -286,7 +256,6 @@ class Program : public amd::HeapObject {
}
const uint32_t codeObjectVer() const { return codeObjectVer_; }
#endif
//! Check if program is HIP based
const bool isHIP() const { return (isHIP_ == 1); }
@@ -324,18 +293,18 @@ class Program : public amd::HeapObject {
*
* \return True if we successefully compiled a GPU program
*/
virtual bool compileImpl(const std::string& sourceCode, //!< the program's source code
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
amd::option::Options* options //!< compile options's object
bool compileImpl(const std::string& sourceCode, //!< the program's source code
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
amd::option::Options* options //!< compile options's object
);
//! Link the device program.
virtual bool linkImpl(amd::option::Options* options);
bool linkImpl(amd::option::Options* options);
//! Link the device programs.
virtual bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
bool createLibrary);
bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
bool createLibrary);
virtual bool createBinary(amd::option::Options* options) = 0;
@@ -347,15 +316,9 @@ class Program : public amd::HeapObject {
//! Initialize Binary
virtual bool initClBinary();
virtual bool saveBinaryAndSetType(type_t type) = 0;
//! Release the Binary
void releaseClBinary();
#if defined(WITH_COMPILER_LIB)
//! return target info
virtual const aclTargetInfo& info() = 0;
#endif
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
return true;
@@ -402,44 +365,13 @@ class Program : public amd::HeapObject {
return false;
}
#if defined(USE_COMGR_LIBRARY)
bool getSymbolsFromCodeObj(std::vector<std::string>* var_names,
amd_comgr_symbol_type_t sym_type) const;
#endif
bool getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size);
bool defineUndefinedVars();
private:
//! Compile the device program with LC path
bool compileImplLC(const std::string& sourceCode, const std::vector<const std::string*>& headers,
const char** headerIncludeNames, amd::option::Options* options);
//! Compile the device program with HSAIL path
bool compileImplHSAIL(const std::string& sourceCode,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames, amd::option::Options* options);
//! Link the device programs with LC path
bool linkImplLC(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
bool createLibrary);
//! Link the device programs with HSAIL path
bool linkImplHSAIL(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
bool createLibrary);
//! Link the device program with LC path
bool linkImplLC(amd::option::Options* options);
//! Link the device program with HSAIL path
bool linkImplHSAIL(amd::option::Options* options);
//! Load the device program with LC path
bool loadLC();
//! Load the device program with HSAIL path
bool loadHSAIL();
#if defined(USE_COMGR_LIBRARY)
//! Dump the log data object to the build log, if a log data object is present
void extractBuildLog(amd_comgr_data_set_t dataSet);
//! Dump the code object data
@@ -477,7 +409,6 @@ class Program : public amd::HeapObject {
//! Create the map for the kernel name and its metadata for fast access
bool createKernelMetadataMap(void* binary, size_t binSize);
#endif
bool trySubstObjFile(const char* SubstCfgFile, const std::string& sourceCode,
const amd::option::Options* options);
@@ -489,8 +420,6 @@ class Program : public amd::HeapObject {
Program& operator=(const Program&);
};
#if defined(USE_COMGR_LIBRARY)
class ComgrBinaryData {
public:
ComgrBinaryData() : binaryData_({0}), created_(false) {}
@@ -503,6 +432,4 @@ class ComgrBinaryData {
bool created_;
};
#endif
} // namespace amd::device
-98
Ver fichero
@@ -1,98 +0,0 @@
/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#if defined(WITH_COMPILER_LIB)
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "hsailctx.hpp"
namespace amd {
std::once_flag Hsail::initialized;
HsailEntryPoints Hsail::cep_;
bool Hsail::is_ready_ = false;
bool Hsail::LoadLib() {
#if defined(HSAIL_DYN_DLL)
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loading HSAIL library.");
static constexpr const char* HsailLibName =
LP64_SWITCH(WINDOWS_SWITCH("amdhsail32.dll", "libamdhsail32.so"),
WINDOWS_SWITCH("amdhsail64.dll", "libamdhsail64.so"));
cep_.handle = Os::loadLibrary(HsailLibName);
if (nullptr == cep_.handle) {
return false;
}
#endif
GET_HSAIL_SYMBOL(aclCompilerInit)
GET_HSAIL_SYMBOL(aclCompilerFini)
GET_HSAIL_SYMBOL(aclCompilerVersion)
GET_HSAIL_SYMBOL(aclVersionSize)
GET_HSAIL_SYMBOL(aclGetErrorString)
GET_HSAIL_SYMBOL(aclGetArchInfo)
GET_HSAIL_SYMBOL(aclGetDeviceInfo)
GET_HSAIL_SYMBOL(aclGetTargetInfo)
GET_HSAIL_SYMBOL(aclGetTargetInfoFromChipID)
GET_HSAIL_SYMBOL(aclGetArchitecture)
GET_HSAIL_SYMBOL(aclGetChipOptions)
GET_HSAIL_SYMBOL(aclGetFamily)
GET_HSAIL_SYMBOL(aclGetChip)
GET_HSAIL_SYMBOL(aclBinaryInit)
GET_HSAIL_SYMBOL(aclBinaryFini)
GET_HSAIL_SYMBOL(aclReadFromFile)
GET_HSAIL_SYMBOL(aclReadFromMem)
GET_HSAIL_SYMBOL(aclWriteToFile)
GET_HSAIL_SYMBOL(aclWriteToMem)
GET_HSAIL_SYMBOL(aclCreateFromBinary)
GET_HSAIL_SYMBOL(aclBinaryVersion)
GET_HSAIL_SYMBOL(aclInsertSection)
GET_HSAIL_SYMBOL(aclInsertSymbol)
GET_HSAIL_SYMBOL(aclExtractSection)
GET_HSAIL_SYMBOL(aclExtractSymbol)
GET_HSAIL_SYMBOL(aclRemoveSection)
GET_HSAIL_SYMBOL(aclRemoveSymbol)
GET_HSAIL_SYMBOL(aclQueryInfo)
GET_HSAIL_SYMBOL(aclDbgAddArgument)
GET_HSAIL_SYMBOL(aclDbgRemoveArgument)
GET_HSAIL_SYMBOL(aclCompile)
GET_HSAIL_SYMBOL(aclLink)
GET_HSAIL_SYMBOL(aclGetCompilerLog)
GET_HSAIL_SYMBOL(aclRetrieveType)
GET_HSAIL_SYMBOL(aclSetType)
GET_HSAIL_SYMBOL(aclConvertType)
GET_HSAIL_SYMBOL(aclDisassemble)
GET_HSAIL_SYMBOL(aclGetDeviceBinary)
GET_HSAIL_SYMBOL(aclValidateBinaryImage)
GET_HSAIL_SYMBOL(aclJITObjectImageCreate)
GET_HSAIL_SYMBOL(aclJITObjectImageCopy)
GET_HSAIL_SYMBOL(aclJITObjectImageDestroy)
GET_HSAIL_SYMBOL(aclJITObjectImageFinalize)
GET_HSAIL_SYMBOL(aclJITObjectImageSize)
GET_HSAIL_SYMBOL(aclJITObjectImageData)
GET_HSAIL_SYMBOL(aclJITObjectImageGetGlobalsSize)
GET_HSAIL_SYMBOL(aclJITObjectImageIterateSymbols)
GET_HSAIL_SYMBOL(aclDumpBinary)
GET_HSAIL_SYMBOL(aclGetKstatsSI)
GET_HSAIL_SYMBOL(aclInsertKernelStatistics)
GET_HSAIL_SYMBOL(aclFreeMem)
is_ready_ = true;
return true;
}
} // namespace amd
#endif
-394
Ver fichero
@@ -1,394 +0,0 @@
/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <mutex>
#if defined(WITH_COMPILER_LIB)
#include "top.hpp"
#include "acl.h"
#ifndef ACL_API_ENTRY
#if defined(_WIN32) || defined(__CYGWIN__)
#define ACL_API_ENTRY __stdcall
#else
#define ACL_API_ENTRY
#endif
#endif
namespace amd {
typedef aclCompiler*(ACL_API_ENTRY* t_aclCompilerInit)(aclCompilerOptions* opts,
acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclCompilerFini)(aclCompiler* cl);
typedef aclCLVersion(ACL_API_ENTRY* t_aclCompilerVersion)(aclCompiler* cl, acl_error* error_code);
typedef uint32_t(ACL_API_ENTRY* t_aclVersionSize)(aclCLVersion num, acl_error* error_code);
typedef const char*(ACL_API_ENTRY* t_aclGetErrorString)(acl_error error_code);
typedef acl_error(ACL_API_ENTRY* t_aclGetArchInfo)(const char** arch_names, size_t* arch_size);
typedef acl_error(ACL_API_ENTRY* t_aclGetDeviceInfo)(const char* arch, const char** names,
size_t* device_size);
typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfo)(const char* arch, const char* device,
acl_error* error_code);
typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfoFromChipID)(const char* arch,
const uint32_t chip_id,
acl_error* error_code);
typedef const char*(ACL_API_ENTRY* t_aclGetArchitecture)(const aclTargetInfo& target);
typedef const uint64_t(ACL_API_ENTRY* t_aclGetChipOptions)(const aclTargetInfo& target);
typedef const char*(ACL_API_ENTRY* t_aclGetFamily)(const aclTargetInfo& target);
typedef const char*(ACL_API_ENTRY* t_aclGetChip)(const aclTargetInfo& target);
typedef aclBinary*(ACL_API_ENTRY* t_aclBinaryInit)(size_t struct_version,
const aclTargetInfo* target,
const aclBinaryOptions* options,
acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclBinaryFini)(aclBinary* bin);
typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromFile)(const char* str, acl_error* error_code);
typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromMem)(const void* mem, size_t size,
acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclWriteToFile)(aclBinary* bin, const char* str);
typedef acl_error(ACL_API_ENTRY* t_aclWriteToMem)(aclBinary* bin, void** mem, size_t* size);
typedef aclBinary*(ACL_API_ENTRY* t_aclCreateFromBinary)(const aclBinary* binary,
aclBIFVersion version);
typedef aclBIFVersion(ACL_API_ENTRY* t_aclBinaryVersion)(const aclBinary* binary);
typedef acl_error(ACL_API_ENTRY* t_aclInsertSection)(aclCompiler* cl, aclBinary* binary,
const void* data, size_t data_size,
aclSections id);
typedef acl_error(ACL_API_ENTRY* t_aclInsertSymbol)(aclCompiler* cl, aclBinary* binary,
const void* data, size_t data_size,
aclSections id, const char* symbol);
typedef const void*(ACL_API_ENTRY* t_aclExtractSection)(aclCompiler* cl, const aclBinary* binary,
size_t* size, aclSections id,
acl_error* error_code);
typedef const void*(ACL_API_ENTRY* t_aclExtractSymbol)(aclCompiler* cl, const aclBinary* binary,
size_t* size, aclSections id,
const char* symbol, acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclRemoveSection)(aclCompiler* cl, aclBinary* binary,
aclSections id);
typedef acl_error(ACL_API_ENTRY* t_aclRemoveSymbol)(aclCompiler* cl, aclBinary* binary,
aclSections id, const char* symbol);
typedef acl_error(ACL_API_ENTRY* t_aclQueryInfo)(aclCompiler* cl, const aclBinary* binary,
aclQueryType query, const char* kernel,
void* data_ptr, size_t* ptr_size);
typedef acl_error(ACL_API_ENTRY* t_aclDbgAddArgument)(aclCompiler* cl, aclBinary* binary,
const char* kernel, const char* name,
bool byVal);
typedef acl_error(ACL_API_ENTRY* t_aclDbgRemoveArgument)(aclCompiler* cl, aclBinary* binary,
const char* kernel, const char* name);
typedef acl_error(ACL_API_ENTRY* t_aclCompile)(aclCompiler* cl, aclBinary* bin, const char* options,
aclType from, aclType to,
aclLogFunction compile_callback);
typedef acl_error(ACL_API_ENTRY* t_aclLink)(aclCompiler* cl, aclBinary* src_bin,
unsigned int num_libs, aclBinary** libs,
aclType link_mode, const char* options,
aclLogFunction link_callback);
typedef const char*(ACL_API_ENTRY* t_aclGetCompilerLog)(aclCompiler* cl);
typedef const void*(ACL_API_ENTRY* t_aclRetrieveType)(aclCompiler* cl, const aclBinary* bin,
const char* name, size_t* data_size,
aclType type, acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclSetType)(aclCompiler* cl, aclBinary* bin, const char* name,
aclType type, const void* data, size_t size);
typedef acl_error(ACL_API_ENTRY* t_aclConvertType)(aclCompiler* cl, aclBinary* bin,
const char* name, aclType type);
typedef acl_error(ACL_API_ENTRY* t_aclDisassemble)(aclCompiler* cl, aclBinary* bin,
const char* kernel,
aclLogFunction disasm_callback);
typedef const void*(ACL_API_ENTRY* t_aclGetDeviceBinary)(aclCompiler* cl, const aclBinary* bin,
const char* kernel, size_t* size,
acl_error* error_code);
typedef bool(ACL_API_ENTRY* t_aclValidateBinaryImage)(const void* binary, size_t length,
unsigned type);
typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCreate)(aclCompiler* cl,
const void* buffer,
size_t length, aclBinary* bin,
acl_error* error_code);
typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCopy)(aclCompiler* cl,
const void* buffer, size_t length,
acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageDestroy)(aclCompiler* cl,
aclJITObjectImage buffer);
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageFinalize)(aclCompiler* cl,
aclJITObjectImage image);
typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageSize)(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code);
typedef const char*(ACL_API_ENTRY* t_aclJITObjectImageData)(aclCompiler* cl,
aclJITObjectImage image,
acl_error* error_code);
typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageGetGlobalsSize)(aclCompiler* cl,
aclJITObjectImage image,
acl_error* error_code);
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageIterateSymbols)(aclCompiler* cl,
aclJITObjectImage image,
aclJITSymbolCallback callback,
void* data);
typedef void(ACL_API_ENTRY* t_aclDumpBinary)(const aclBinary* bin);
typedef void(ACL_API_ENTRY* t_aclGetKstatsSI)(const void* shader, aclKernelStats& kstats);
typedef acl_error(ACL_API_ENTRY* t_aclInsertKernelStatistics)(aclCompiler* cl, aclBinary* bin);
typedef acl_error(ACL_API_ENTRY* t_aclFreeMem)(aclBinary* bin, void* mem);
struct HsailEntryPoints {
void* handle;
t_aclCompilerInit aclCompilerInit;
t_aclCompilerFini aclCompilerFini;
t_aclCompilerVersion aclCompilerVersion;
t_aclVersionSize aclVersionSize;
t_aclGetErrorString aclGetErrorString;
t_aclGetArchInfo aclGetArchInfo;
t_aclGetDeviceInfo aclGetDeviceInfo;
t_aclGetTargetInfo aclGetTargetInfo;
t_aclGetTargetInfoFromChipID aclGetTargetInfoFromChipID;
t_aclGetArchitecture aclGetArchitecture;
t_aclGetChipOptions aclGetChipOptions;
t_aclGetFamily aclGetFamily;
t_aclGetChip aclGetChip;
t_aclBinaryInit aclBinaryInit;
t_aclBinaryFini aclBinaryFini;
t_aclReadFromFile aclReadFromFile;
t_aclReadFromMem aclReadFromMem;
t_aclWriteToFile aclWriteToFile;
t_aclWriteToMem aclWriteToMem;
t_aclCreateFromBinary aclCreateFromBinary;
t_aclBinaryVersion aclBinaryVersion;
t_aclInsertSection aclInsertSection;
t_aclInsertSymbol aclInsertSymbol;
t_aclExtractSection aclExtractSection;
t_aclExtractSymbol aclExtractSymbol;
t_aclRemoveSection aclRemoveSection;
t_aclRemoveSymbol aclRemoveSymbol;
t_aclQueryInfo aclQueryInfo;
t_aclDbgAddArgument aclDbgAddArgument;
t_aclDbgRemoveArgument aclDbgRemoveArgument;
t_aclCompile aclCompile;
t_aclLink aclLink;
t_aclGetCompilerLog aclGetCompilerLog;
t_aclRetrieveType aclRetrieveType;
t_aclSetType aclSetType;
t_aclConvertType aclConvertType;
t_aclDisassemble aclDisassemble;
t_aclGetDeviceBinary aclGetDeviceBinary;
t_aclValidateBinaryImage aclValidateBinaryImage;
t_aclJITObjectImageCreate aclJITObjectImageCreate;
t_aclJITObjectImageCopy aclJITObjectImageCopy;
t_aclJITObjectImageDestroy aclJITObjectImageDestroy;
t_aclJITObjectImageFinalize aclJITObjectImageFinalize;
t_aclJITObjectImageSize aclJITObjectImageSize;
t_aclJITObjectImageData aclJITObjectImageData;
t_aclJITObjectImageGetGlobalsSize aclJITObjectImageGetGlobalsSize;
t_aclJITObjectImageIterateSymbols aclJITObjectImageIterateSymbols;
t_aclDumpBinary aclDumpBinary;
t_aclGetKstatsSI aclGetKstatsSI;
t_aclInsertKernelStatistics aclInsertKernelStatistics;
t_aclFreeMem aclFreeMem;
};
#ifdef HSAIL_DYN_DLL
#define HSAIL_DYN(NAME) cep_.NAME
#define GET_HSAIL_SYMBOL(NAME) \
cep_.NAME = reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME)); \
if (nullptr == cep_.NAME) { \
return false; \
}
#else
#define HSAIL_DYN(NAME) NAME
#define GET_HSAIL_SYMBOL(NAME)
#endif
class Hsail : public amd::AllStatic {
public:
static std::once_flag initialized;
static bool LoadLib();
static bool IsReady() { return is_ready_; }
static aclCompiler* CompilerInit(aclCompilerOptions* opts, acl_error* error_code) {
return HSAIL_DYN(aclCompilerInit)(opts, error_code);
}
static acl_error CompilerFini(aclCompiler* cl) { return HSAIL_DYN(aclCompilerFini)(cl); }
static aclCLVersion CompilerVersion(aclCompiler* cl, acl_error* error_code) {
return HSAIL_DYN(aclCompilerVersion)(cl, error_code);
}
static uint32_t VersionSize(aclCLVersion num, acl_error* error_code) {
return HSAIL_DYN(aclVersionSize)(num, error_code);
}
static const char* GetErrorString(acl_error error_code) {
return HSAIL_DYN(aclGetErrorString)(error_code);
}
static acl_error GetArchInfo(const char** arch_names, size_t* arch_size) {
return HSAIL_DYN(aclGetArchInfo)(arch_names, arch_size);
}
static acl_error GetDeviceInfo(const char* arch, const char** names, size_t* device_size) {
return HSAIL_DYN(aclGetDeviceInfo)(arch, names, device_size);
}
static aclTargetInfo GetTargetInfo(const char* arch, const char* device, acl_error* error_code) {
return HSAIL_DYN(aclGetTargetInfo)(arch, device, error_code);
}
static aclTargetInfo GetTargetInfoFromChipID(const char* arch, const uint32_t chip_id,
acl_error* error_code) {
return HSAIL_DYN(aclGetTargetInfoFromChipID)(arch, chip_id, error_code);
}
static const char* GetArchitecture(const aclTargetInfo& target) {
return HSAIL_DYN(aclGetArchitecture)(target);
}
static uint64_t GetChipOptions(const aclTargetInfo& target) {
return HSAIL_DYN(aclGetChipOptions)(target);
}
static const char* GetFamily(const aclTargetInfo& target) {
return HSAIL_DYN(aclGetFamily)(target);
}
static const char* GetChip(const aclTargetInfo& target) { return HSAIL_DYN(aclGetChip)(target); }
static aclBinary* BinaryInit(size_t struct_version, const aclTargetInfo* target,
const aclBinaryOptions* options, acl_error* error_code) {
return HSAIL_DYN(aclBinaryInit)(struct_version, target, options, error_code);
}
static acl_error BinaryFini(aclBinary* bin) { return HSAIL_DYN(aclBinaryFini)(bin); }
static aclBinary* ReadFromFile(const char* str, acl_error* error_code) {
return HSAIL_DYN(aclReadFromFile)(str, error_code);
}
static aclBinary* ReadFromMem(const void* mem, size_t size, acl_error* error_code) {
return HSAIL_DYN(aclReadFromMem)(mem, size, error_code);
}
static acl_error WriteToFile(aclBinary* bin, const char* str) {
return HSAIL_DYN(aclWriteToFile)(bin, str);
}
static acl_error WriteToMem(aclBinary* bin, void** mem, size_t* size) {
return HSAIL_DYN(aclWriteToMem)(bin, mem, size);
}
static aclBinary* CreateFromBinary(const aclBinary* binary, aclBIFVersion version) {
return HSAIL_DYN(aclCreateFromBinary)(binary, version);
}
static aclBIFVersion BinaryVersion(const aclBinary* binary) {
return HSAIL_DYN(aclBinaryVersion)(binary);
}
static acl_error InsertSection(aclCompiler* cl, aclBinary* binary, const void* data,
size_t data_size, aclSections id) {
return HSAIL_DYN(aclInsertSection)(cl, binary, data, data_size, id);
}
static const acl_error InsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data,
size_t data_size, aclSections id, const char* symbol) {
return HSAIL_DYN(aclInsertSymbol)(cl, binary, data, data_size, id, symbol);
}
static const void* ExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size,
aclSections id, acl_error* error_code) {
return HSAIL_DYN(aclExtractSection)(cl, binary, size, id, error_code);
}
static const void* ExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size,
aclSections id, const char* symbol, acl_error* error_code) {
return HSAIL_DYN(aclExtractSymbol)(cl, binary, size, id, symbol, error_code);
}
static acl_error RemoveSection(aclCompiler* cl, aclBinary* binary, aclSections id) {
return HSAIL_DYN(aclRemoveSection)(cl, binary, id);
}
static acl_error RemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id,
const char* symbol) {
return HSAIL_DYN(aclRemoveSymbol)(cl, binary, id, symbol);
}
static acl_error QueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query,
const char* kernel, void* data_ptr, size_t* ptr_size) {
return HSAIL_DYN(aclQueryInfo)(cl, binary, query, kernel, data_ptr, ptr_size);
}
static acl_error DbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
const char* name, bool byVal) {
return HSAIL_DYN(aclDbgAddArgument)(cl, binary, kernel, name, byVal);
}
static acl_error DbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
const char* name) {
return HSAIL_DYN(aclDbgRemoveArgument)(cl, binary, kernel, name);
}
static acl_error Compile(aclCompiler* cl, aclBinary* bin, const char* options, aclType from,
aclType to, aclLogFunction compile_callback) {
return HSAIL_DYN(aclCompile)(cl, bin, options, from, to, compile_callback);
}
static acl_error Link(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs,
aclBinary** libs, aclType link_mode, const char* options,
aclLogFunction link_callback) {
return HSAIL_DYN(aclLink)(cl, src_bin, num_libs, libs, link_mode, options, link_callback);
}
static const char* GetCompilerLog(aclCompiler* cl) { return HSAIL_DYN(aclGetCompilerLog)(cl); }
static const void* RetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name,
size_t* data_size, aclType type, acl_error* error_code) {
return HSAIL_DYN(aclRetrieveType)(cl, bin, name, data_size, type, error_code);
}
static acl_error SetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type,
const void* data, size_t size) {
return HSAIL_DYN(aclSetType)(cl, bin, name, type, data, size);
}
static acl_error ConvertType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type) {
return HSAIL_DYN(aclConvertType)(cl, bin, name, type);
}
static acl_error Disassemble(aclCompiler* cl, aclBinary* bin, const char* kernel,
aclLogFunction disasm_callback) {
return HSAIL_DYN(aclDisassemble)(cl, bin, kernel, disasm_callback);
}
static const void* GetDeviceBinary(aclCompiler* cl, const aclBinary* bin, const char* kernel,
size_t* size, acl_error* error_code) {
return HSAIL_DYN(aclGetDeviceBinary)(cl, bin, kernel, size, error_code);
}
static const bool ValidateBinaryImage(const void* binary, size_t length, unsigned type) {
#if defined(HSAIL_DYN_DLL)
if (cep_.aclValidateBinaryImage == nullptr) {
return false;
}
#endif // defined(HSAIL_DYN_DLL)
return HSAIL_DYN(aclValidateBinaryImage)(binary, length, type);
}
static aclJITObjectImage JITObjectImageCreate(aclCompiler* cl, const void* buffer, size_t length,
aclBinary* bin, acl_error* error_code) {
return HSAIL_DYN(aclJITObjectImageCreate)(cl, buffer, length, bin, error_code);
}
static aclJITObjectImage JITObjectImageCopy(aclCompiler* cl, const void* buffer, size_t length,
acl_error* error_code) {
return HSAIL_DYN(aclJITObjectImageCopy)(cl, buffer, length, error_code);
}
static acl_error JITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer) {
return HSAIL_DYN(aclJITObjectImageDestroy)(cl, buffer);
}
static acl_error JITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image) {
return HSAIL_DYN(aclJITObjectImageFinalize)(cl, image);
}
static size_t JITObjectImageSize(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code) {
return HSAIL_DYN(aclJITObjectImageSize)(cl, image, error_code);
}
static const char* JITObjectImageData(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code) {
return HSAIL_DYN(aclJITObjectImageData)(cl, image, error_code);
}
static size_t JITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image,
acl_error* error_code) {
return HSAIL_DYN(aclJITObjectImageGetGlobalsSize)(cl, image, error_code);
}
static acl_error JITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image,
aclJITSymbolCallback callback, void* data) {
return HSAIL_DYN(aclJITObjectImageIterateSymbols)(cl, image, callback, data);
}
static void DumpBinary(const aclBinary* bin) { HSAIL_DYN(aclDumpBinary)(bin); }
static void GetKstatsSI(const void* shader, aclKernelStats& kstats) {
return HSAIL_DYN(aclGetKstatsSI)(shader, kstats);
}
static acl_error InsertKernelStatistics(aclCompiler* cl, aclBinary* bin) {
return HSAIL_DYN(aclInsertKernelStatistics)(cl, bin);
}
static acl_error FreeMem(aclBinary* bin, void* mem) { return HSAIL_DYN(aclFreeMem)(bin, mem); }
private:
static HsailEntryPoints cep_;
static bool is_ready_;
};
} // namespace amd
#endif
+1 -1
Ver fichero
@@ -49,7 +49,7 @@ extern void __amd_scheduler_pal(__global void*, __global void*, uint);
* The trap handler source is copied from the above URL, with the following
* modifications:
* - Add the following directive to declare the trap_entry symbol (this is
* later used by LightningProgram::GetTrapHandlerAddress to locate the load
* later used by pal::Program::GetTrapHandlerAddress to locate the load
* address of the trap handler):
*
* .globl trap_entry
@@ -27,7 +27,7 @@ namespace amd::pal {
class Device;
class VirtualGPU;
class HSAILKernel;
class Kernel;
// ================================================================================================
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
@@ -256,7 +256,7 @@ class ICaptureMgr {
public:
virtual bool Update(Pal::IPlatform* platform) = 0;
virtual void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
virtual void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
size_t z) = 0;
virtual void PostDispatch(VirtualGPU* gpu) = 0;
+31 -120
Ver fichero
@@ -37,7 +37,6 @@
#include "palPlatform.h"
#include "palDevice.h"
#include "palQueueSemaphore.h"
#include "hsailctx.hpp"
#include "vdi_common.hpp"
@@ -167,10 +166,6 @@ namespace amd::pal {
Util::GenericAllocator NullDevice::allocator_;
char* Device::platformObj_;
Pal::IPlatform* Device::platform_;
#if defined(WITH_COMPILER_LIB)
NullDevice::Compiler* NullDevice::compiler_;
#endif
AppProfile Device::appProfile_;
Pal::IDevice* gDeviceList[Pal::MaxDevices] = {};
@@ -261,25 +256,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve
LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId());
return false;
}
if (!settings().useLightning_) {
if ((isa.hsailName() != nullptr)) {
palName_ = isa.hsailName();
} else {
return false;
}
}
if (!ValidateComgr()) {
LogPrintfError("Code object manager initialization failed for offline PAL device %s",
isa.targetId());
return false;
}
if (!ValidateHsail()) {
LogPrintfError("HSAIL initialization failed for offline PAL device %s", isa.targetId());
return false;
}
if (!amd::Device::create(isa)) {
LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId());
return false;
@@ -293,37 +275,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve
info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64;
if (!settings().useLightning_) {
#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
library,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr};
// Initialize the compiler handle
acl_error error;
compiler_ = amd::Hsail::CompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId());
return false;
}
#endif // defined(WITH_COMPILER_LIB)
}
return true;
}
device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) {
device::Program* program;
if (settings().useLightning_) {
program = new LightningProgram(*this, owner);
} else {
program = new HSAILProgram(*this, owner);
}
program = new pal::Program(*this, owner);
if (program == nullptr) {
LogError("Memory allocation has failed!");
@@ -471,10 +428,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.globalMemSize_ = std::min(4 * info_.maxMemAllocSize_, info_.globalMemSize_);
// Use 64 bit pointers
if (settings().use64BitPtr_) {
info_.addressBits_ = 64;
} else {
info_.addressBits_ = (settings().useLightning_) ? 64 : 32;
info_.addressBits_ = 64;
if (!settings().use64BitPtr_) {
// Limit total size with 3GB for 32 bit
info_.globalMemSize_ = std::min(info_.globalMemSize_, uint64_t(3 * Gi));
}
@@ -531,11 +486,10 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.platform_ = AMD_PLATFORM;
::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_,
sizeof(info_.name_));
::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_));
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s",
settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]");
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1,
AMD_BUILD_STRING " (PAL,LC)%s", isOnline() ? "" : " [Offline]");
info_.profile_ = "FULL_PROFILE";
info_.spirVersions_ = "";
@@ -1037,11 +991,6 @@ bool Device::create(Pal::IDevice* device) {
return false;
}
if (!ValidateHsail()) {
LogError("Hsail initialization failed!");
return false;
}
computeEnginesId_.resize(std::min(numComputeEngines(), settings().numComputeRings_));
amd::Context::Info info = {0};
@@ -1086,27 +1035,6 @@ bool Device::create(Pal::IDevice* device) {
allocedMem[i] = 0;
}
if (!settings().useLightning_) {
#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
library,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr};
// Initialize the compiler handle
acl_error error;
compiler_ = amd::Hsail::CompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
return false;
}
#endif // defined(WITH_COMPILER_LIB)
}
// Allocate SRD manager
srdManager_ = new SrdManager(*this, std::max(HsaImageObjectSize, HsaSamplerObjectSize), 64 * Ki);
if (srdManager_ == nullptr) {
@@ -1117,7 +1045,7 @@ bool Device::create(Pal::IDevice* device) {
}
// ================================================================================================
// Master function that handles developer callbacks from PAL.
// Primary function that handles developer callbacks from PAL.
void PAL_STDCALL Device::PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
Pal::Developer::CallbackType type, void* pCbData) {
#ifdef PAL_GPUOPEN_OCL
@@ -1248,7 +1176,7 @@ bool Device::initializeHeapResources() {
// Setup trap handler if available
if (trap_handler_ != nullptr) {
auto program =
reinterpret_cast<pal::LightningProgram*>(trap_handler_->getDeviceProgram(*this));
reinterpret_cast<pal::Program*>(trap_handler_->getDeviceProgram(*this));
if (program != nullptr) {
Pal::Result result{Pal::Result::Success};
Pal::GpuMemoryRef memRef = {};
@@ -1314,12 +1242,7 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) {
}
device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) {
device::Program* program;
if (settings().useLightning_) {
program = new LightningProgram(*this, owner);
} else {
program = new HSAILProgram(*this, owner);
}
device::Program* program = new pal::Program(*this, owner);
if (program == nullptr) {
LogError("We failed memory allocation for program!");
}
@@ -1534,12 +1457,6 @@ void Device::tearDown() {
delete platformObj_;
platform_ = nullptr;
}
#if defined(WITH_COMPILER_LIB)
if (compiler_ != nullptr) {
amd::Hsail::CompilerFini(compiler_);
compiler_ = nullptr;
}
#endif // defined(WITH_COMPILER_LIB)
}
Memory* Device::getGpuMemory(amd::Memory* mem) const {
@@ -2361,7 +2278,7 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi
}
}
const HSAILKernel* hsaKernel = static_cast<const HSAILKernel*>(devKernel);
const pal::Kernel* hsaKernel = static_cast<const pal::Kernel*>(devKernel);
if (hsaKernel->dynamicParallelism()) {
if (settings().useDeviceQueue_) {
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(*this);
@@ -2805,39 +2722,33 @@ bool Device::createBlitProgram() {
} else {
if (settings().oclVersion_ >= OpenCL20) {
extraBlits = iDev()->GetDispatchKernelSource();
if (settings().useLightning_) {
extraBlits.append(SchedulerSourceCode20);
} else {
extraBlits.append(SchedulerSourceCode);
}
extraBlits.append(SchedulerSourceCode20);
ocl20 = "-cl-std=CL2.0";
}
}
if (settings().useLightning_) {
const std::string TrapHandlerAsm = TrapHandlerCode;
// Create a program for trap handler
// note: It's not critical for runtime functionality to fail trap handler initialization
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
if (asm_program != nullptr) {
std::vector<amd::Device*> devices;
devices.push_back(this);
std::string opt = "-cl-internal-kernel ";
if (auto retval =
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
asm_program->release();
} else {
if (asm_program->load()) {
trap_handler_ = asm_program;
} else {
DevLogError("Could not load the trap handler \n");
asm_program->release();
}
}
const std::string TrapHandlerAsm = TrapHandlerCode;
// Create a program for trap handler
// note: It's not critical for runtime functionality to fail trap handler initialization
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
if (asm_program != nullptr) {
std::vector<amd::Device*> devices;
devices.push_back(this);
std::string opt = "-cl-internal-kernel ";
if (auto retval =
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
asm_program->release();
} else {
DevLogError("Trap handler creation failed\n");
if (asm_program->load()) {
trap_handler_ = asm_program;
} else {
DevLogError("Could not load the trap handler \n");
asm_program->release();
}
}
} else {
DevLogError("Trap handler creation failed\n");
}
blitProgram_ = new BlitProgram(context_);
-11
Ver fichero
@@ -38,7 +38,6 @@
#include "device/pal/palappprofile.hpp"
#include "device/pal/palcapturemgr.hpp"
#include "device/pal/palsignal.hpp"
#include "acl.h"
#include "memory"
#include <atomic>
@@ -58,16 +57,6 @@ namespace amd::pal {
//! A nil device object
class NullDevice : public amd::Device {
protected:
#if defined(WITH_COMPILER_LIB)
static Compiler* compiler_;
#endif
public:
#if defined(WITH_COMPILER_LIB)
Compiler* compiler() const { return compiler_; }
#endif
public:
static bool init(void);
+1 -1
Ver fichero
@@ -358,7 +358,7 @@ Pal::Result RgpCaptureMgr::CheckForTraceResults() {
// ================================================================================================
// Called after a swap chain presents. This signals a (next) frame-begin boundary and is
// used to coordinate RGP trace start/stop.
void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
size_t z) {
// Wait for the driver to be resumed in case it's been paused.
WaitForDriverResume();
+3 -3
Ver fichero
@@ -41,7 +41,7 @@ namespace amd::pal {
class Settings;
class Device;
class VirtualGPU;
class HSAILKernel;
class Kernel;
// ================================================================================================
enum class RgpSqqtBarrierReason : uint32_t {
@@ -99,7 +99,7 @@ class RgpCaptureMgr final : public ICaptureMgr {
static RgpCaptureMgr* Create(Pal::IPlatform* platform, const Device& device);
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
size_t z) override;
void PostDispatch(VirtualGPU* gpu) override;
@@ -230,7 +230,7 @@ class RgpCaptureMgr {
Pal::SubmitInfo& submitInfo) const {
return Pal::Result::Success;
}
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) {}
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) {}
void PostDispatch(VirtualGPU* gpu) {}
void FinishRGPTrace(VirtualGPU* gpu, bool aborted) {}
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const {
+62 -235
Ver fichero
@@ -25,7 +25,6 @@
#include "device/pal/palsched.hpp"
#include "platform/commandqueue.hpp"
#include "utils/options.hpp"
#include "hsailctx.hpp"
#include <string>
#include <memory>
#include <fstream>
@@ -36,9 +35,9 @@
namespace amd::pal {
void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
const uint16_t numVGPRs) {
void Kernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
const uint16_t numVGPRs) {
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t);
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
constexpr uint32_t ScratchRegAlignment = 256;
@@ -71,7 +70,7 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
static_cast<int>(workGroupInfo_.availableLDSSize_ - workGroupInfo_.localMemSize_);
}
bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) {
bool Kernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) {
if (!sym) {
return false;
}
@@ -86,185 +85,94 @@ bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t
return true;
}
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel)
Kernel::Kernel(std::string name, pal::Program* prog, bool internalKernel)
: device::Kernel(prog->device(), name, *prog), index_(0), code_(0), codeSize_(0) {
flags_.hsa_ = true;
flags_.internalKernel_ = internalKernel;
}
HSAILKernel::~HSAILKernel() {}
Kernel::~Kernel() {}
bool HSAILKernel::postLoad() { return true; }
bool Kernel::postLoad() {
if (codeObjectVer() == 2) {
symbolName_ = name();
}
bool HSAILKernel::init() {
#if defined(WITH_COMPILER_LIB)
// Copy codeobject of this kernel from the program CPU segment
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
std::string openClKernelName = openclMangledName(name());
amd::hsa::loader::Symbol* sym = prog().getSymbol(openClKernelName.c_str(), &agent);
if (!sym) {
LogPrintfError("Error: Getting kernel ISA code symbol %s from AMD HSA Code Object failed.\n",
openClKernelName.c_str());
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
if (!setKernelDescriptor(sym, &akd_)) {
return false;
}
amd_kernel_code_t* akc = &akc_;
if (!setKernelCode(sym, akc)) {
LogError("Error: setKernelCode() failed.");
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK,
reinterpret_cast<void*>(&kernelHasDynamicCallStack_))) {
return false;
}
if (!prog().isNull()) {
codeSize_ = prog().codeSegGpu().owner()->getSize();
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE,
reinterpret_cast<void*>(&codeSize_))) {
LogError("Error: sym->GetInfo() failed.");
return false;
// handle device enqueue
if (!RuntimeHandle().empty()) {
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent);
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
// Copy the kernel_object pointer to the runtime handle symbol GPU address
const Memory& codeSegGpu = prog().codeSegGpu();
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
uint64_t kernel_object = gpuAqlCode();
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
}
}
// Setup the the workgroup info
setWorkGroupInfo(akc->workitem_private_segment_byte_size, akc->workgroup_group_segment_byte_size,
akc->wavefront_sgpr_count, akc->workitem_vgpr_count);
workgroupGroupSegmentByteSize_ = workGroupInfo_.usedLDSSize_;
kernargSegmentByteSize_ = akc->kernarg_segment_byte_size;
// Pull out metadata from the ELF
size_t sizeOfArgList;
acl_error error =
amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfArgList);
if (error != ACL_SUCCESS) {
return false;
}
char* aclArgList = new char[sizeOfArgList];
if (nullptr == aclArgList) {
return false;
}
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
openClKernelName.c_str(), aclArgList, &sizeOfArgList);
if (error != ACL_SUCCESS) {
return false;
}
// Set the argList
InitParameters(reinterpret_cast<const aclArgData*>(aclArgList), argsBufferSize());
delete[] aclArgList;
size_t sizeOfWorkGroupSize;
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
if (error != ACL_SUCCESS) {
return false;
}
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
openClKernelName.c_str(), workGroupInfo_.compileSize_,
&sizeOfWorkGroupSize);
if (error != ACL_SUCCESS) {
return false;
}
setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
} else {
workGroupInfo_.size_ = device().info().preferredWorkGroupSize_;
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
if (workGroupInfo_.size_ == 0) {
return false;
}
if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) {
workGroupInfo_.scratchRegs_ =
std::max<uint32_t>(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t));
workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t);
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
}
// Pull out printf metadata from the ELF
size_t sizeOfPrintfList;
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
if (error != ACL_SUCCESS) {
// handle the printf metadata if any
std::vector<std::string> printfStr;
if (!GetPrintfStr(&printfStr)) {
return false;
}
// Make sure kernel has any printf info
if (0 != sizeOfPrintfList) {
char* aclPrintfList = new char[sizeOfPrintfList];
if (nullptr == aclPrintfList) {
return false;
}
error =
amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList);
if (error != ACL_SUCCESS) {
return false;
}
// Set the PrintfList
InitPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList));
delete[] aclPrintfList;
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
aclMetadata md;
md.enqueue_kernel = false;
size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel);
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue);
if (error != ACL_SUCCESS) {
return false;
}
flags_.dynamicParallelism_ = md.enqueue_kernel;
md.kernel_index = -1;
size_t sizeOfIndex = sizeof(md.kernel_index);
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex);
if (error != ACL_SUCCESS) {
return false;
}
index_ = md.kernel_index;
size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(),
RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(),
&workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint);
if (error != ACL_SUCCESS) {
return false;
}
size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(),
RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(),
workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint);
if (error != ACL_SUCCESS) {
return false;
}
size_t sizeOfVecTypeHint;
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
openClKernelName.c_str(), NULL, &sizeOfVecTypeHint);
if (error != ACL_SUCCESS) {
return false;
}
if (0 != sizeOfVecTypeHint) {
char* VecTypeHint = new char[sizeOfVecTypeHint + 1];
if (NULL == VecTypeHint) {
return false;
}
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint);
if (error != ACL_SUCCESS) {
return false;
}
VecTypeHint[sizeOfVecTypeHint] = '\0';
workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint);
delete[] VecTypeHint;
}
#endif // defined(WITH_COMPILER_LIB)
return true;
}
const HSAILProgram& HSAILKernel::prog() const {
return reinterpret_cast<const HSAILProgram&>(prog_);
bool Kernel::init() {
return GetAttrCodePropMetadata();
}
// ================================================================================================
hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
const pal::Program& Kernel::prog() const {
return reinterpret_cast<const pal::Program&>(prog_);
}
hsa_kernel_dispatch_packet_t* Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
const amd::NDRangeContainer& sizes,
const_address params, size_t ldsAddress,
uint64_t vmDefQueue,
@@ -496,86 +404,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
return hsaDisp;
}
// ================================================================================================
const LightningProgram& LightningKernel::prog() const {
return reinterpret_cast<const LightningProgram&>(prog_);
}
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() { return GetAttrCodePropMetadata(); }
bool LightningKernel::postLoad() {
if (codeObjectVer() == 2) {
symbolName_ = name();
}
// Copy codeobject of this kernel from the program CPU segment
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
if (!setKernelDescriptor(sym, &akd_)) {
return false;
}
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK,
reinterpret_cast<void*>(&kernelHasDynamicCallStack_))) {
return false;
}
if (!prog().isNull()) {
codeSize_ = prog().codeSegGpu().owner()->getSize();
// handle device enqueue
if (!RuntimeHandle().empty()) {
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent);
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
// Copy the kernel_object pointer to the runtime handle symbol GPU address
const Memory& codeSegGpu = prog().codeSegGpu();
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
uint64_t kernel_object = gpuAqlCode();
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
}
}
// Setup the the workgroup info
setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
if (workGroupInfo_.size_ == 0) {
return false;
}
if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) {
workGroupInfo_.scratchRegs_ =
std::max<uint32_t>(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t));
workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t);
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
}
// handle the printf metadata if any
std::vector<std::string> printfStr;
if (!GetPrintfStr(&printfStr)) {
return false;
}
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
return true;
}
bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
bool Kernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
llvm::amdhsa::kernel_descriptor_t* akd) {
if (!sym) {
return false;
@@ -591,6 +420,4 @@ bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
return true;
}
#endif // defined(USE_COMGR_LIBRARY)
} // namespace amd::pal
+10 -27
Ver fichero
@@ -52,17 +52,16 @@ namespace amd::pal {
class VirtualGPU;
class Device;
class NullDevice;
class HSAILProgram;
class LightningProgram;
class Program;
/*! \addtogroup pal PAL Device Implementation
* @{
*/
class HSAILKernel : public device::Kernel {
class Kernel : public device::Kernel {
public:
HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel);
Kernel(std::string name, pal::Program* prog, bool internalKernel);
virtual ~HSAILKernel();
virtual ~Kernel();
//! Initializes the metadata required for this kernel,
bool init();
@@ -80,7 +79,7 @@ class HSAILKernel : public device::Kernel {
}
//! Returns HSA program associated with this kernel
const HSAILProgram& prog() const;
const pal::Program& prog() const;
//! Returns LDS size used in this kernel
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
@@ -119,12 +118,15 @@ class HSAILKernel : public device::Kernel {
//! Returns the kernel index in the program
uint index() const { return index_; }
//! Get the kernel descriptor and copy the code object from the program CPU segment
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
private:
//! Disable copy constructor
HSAILKernel(const HSAILKernel&);
Kernel(const pal::Kernel&);
//! Disable operator=
HSAILKernel& operator=(const HSAILKernel&);
Kernel& operator=(const pal::Kernel&);
protected:
//! Get the kernel code and copy the code object from the program CPU segment
@@ -144,24 +146,5 @@ class HSAILKernel : public device::Kernel {
size_t codeSize_; //!< Size of ISA code
};
class LightningKernel : public HSAILKernel {
public:
LightningKernel(const std::string& name, HSAILProgram* prog, bool internalKernel)
: HSAILKernel(name, prog, internalKernel) {}
//! Returns Lightning program associated with this kernel
const LightningProgram& prog() const;
#if defined(USE_COMGR_LIBRARY)
//! Get the kernel descriptor and copy the code object from the program CPU segment
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
//! Initializes the metadata required for this kernel
bool init();
//! Setup after code object loading
bool postLoad();
#endif
};
/*@}*/ // namespace amd::pal
} // namespace amd::pal
+116 -273
Ver fichero
@@ -20,7 +20,6 @@
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "aclTypes.h"
#include "device/pal/palprogram.hpp"
#include "device/pal/palblit.hpp"
#include "utils/options.hpp"
@@ -67,7 +66,7 @@ bool Segment::gpuAddressOffset(uint64_t offAddr, size_t* offset) {
return true;
}
bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
bool Segment::alloc(pal::Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
bool zero) {
if (prog.isNull()) {
LogError("[OCL] cannot create a mem object on an offline device!");
@@ -174,8 +173,9 @@ bool Segment::freeze(bool destroySysmem) {
return result;
}
HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
: Program(device, owner),
// ================================================================================================
Program::Program(Device& device, amd::Program& owner)
: device::Program(device, owner),
rawBinary_(nullptr),
kernels_(nullptr),
codeSegGpu_(nullptr),
@@ -186,10 +186,11 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
loaderContext_(this) {
assert(device.isOnline());
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
isHIP_ = (owner.language() == amd::Program::HIP);
}
HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
: Program(device, owner),
Program::Program(NullDevice& device, amd::Program& owner)
: device::Program(device, owner),
rawBinary_(nullptr),
kernels_(nullptr),
codeSegGpu_(nullptr),
@@ -201,26 +202,14 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
assert(!device.isOnline());
isNull_ = true;
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
isHIP_ = (owner.language() == amd::Program::HIP);
}
HSAILProgram::~HSAILProgram() {
Program::~Program() {
// Destroy internal static samplers
for (auto& it : staticSamplers_) {
delete it;
}
#if defined(WITH_COMPILER_LIB)
if (rawBinary_ != nullptr) {
amd::Hsail::FreeMem(binaryElf_, rawBinary_);
}
acl_error error;
// Free the elf binary
if (binaryElf_ != nullptr) {
error = amd::Hsail::BinaryFini(binaryElf_);
if (error != ACL_SUCCESS) {
LogWarning("Error while destroying the acl binary \n");
}
}
#endif // defined(WITH_COMPILER_LIB)
releaseClBinary();
if (executable_) {
loader_->DestroyExecutable(executable_);
@@ -233,15 +222,6 @@ HSAILProgram::~HSAILProgram() {
}
}
inline static std::vector<std::string> splitSpaceSeparatedString(char* str) {
std::string s(str);
std::stringstream ss(s);
std::istream_iterator<std::string> beg(ss), end;
std::vector<std::string> vec(beg, end);
return vec;
}
inline static std::string GetUriFromMemoryAddress(const void* memory, size_t size) {
int pid = amd::Os::getProcessId();
std::ostringstream uri_stream;
@@ -250,100 +230,7 @@ inline static std::string GetUriFromMemoryAddress(const void* memory, size_t siz
return uri_stream.str();
}
bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
#if defined(WITH_COMPILER_LIB)
// ACL_TYPE_CG stage is not performed for offline compilation
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
if (executable_ == nullptr) {
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
return false;
}
size_t size = binSize;
hsa_code_object_t code_object;
code_object.handle = reinterpret_cast<uint64_t>(binary);
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto uri = GetUriFromMemoryAddress(binary, binSize);
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
return false;
}
status = loader_->FreezeExecutable(executable_, nullptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog_ += "Error: AMD HSA Code Object freeze failed.\n";
return false;
}
size_t kernelNamesSize = 0;
acl_error errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_,
RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
return false;
}
if (kernelNamesSize > 0) {
std::vector<char> kernelNames(kernelNamesSize);
errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES,
nullptr, kernelNames.data(), &kernelNamesSize);
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
return false;
}
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames.data());
for (const auto& it : vKernels) {
std::string kernelName(it);
HSAILKernel* aKernel = new HSAILKernel(kernelName, this, internalKernel);
addKernel(aKernel);
if (!aKernel->init()) {
buildLog_ += "Error: Kernel initialization failed.\n";
return false;
}
aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
}
}
DestroySegmentCpuAccess();
#endif // defined(WITH_COMPILER_LIB)
return true;
}
bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset,
std::string uri) {
#if defined(WITH_COMPILER_LIB)
if (!device().isOnline()) {
return true;
}
bool dynamicParallelism = false;
for (auto& kit : kernels()) {
HSAILKernel* aKernel = static_cast<HSAILKernel*>(kit.second);
if (!aKernel->postLoad()) {
return false;
}
dynamicParallelism |= aKernel->dynamicParallelism();
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
std::max(static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
maxVgprs_ = std::max(static_cast<uint>(aKernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
}
// Allocate kernel table for device enqueuing
if (!isNull() && dynamicParallelism && !allocKernelTable()) {
return false;
}
#endif // defined(WITH_COMPILER_LIB)
return true;
}
bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
bool HSAILProgram::allocKernelTable() {
bool Program::allocKernelTable() {
if (isNull()) {
// Cannot create a kernel table for offline devices.
return false;
@@ -359,7 +246,7 @@ bool HSAILProgram::allocKernelTable() {
} else {
size_t* table = reinterpret_cast<size_t*>(kernels_->map(nullptr, pal::Resource::WriteOnly));
for (auto& it : kernels()) {
HSAILKernel* kernel = static_cast<HSAILKernel*>(it.second);
pal::Kernel* kernel = static_cast<pal::Kernel*>(it.second);
table[kernel->index()] = static_cast<size_t>(kernel->gpuAqlCode());
}
kernels_->unmap(nullptr);
@@ -367,41 +254,9 @@ bool HSAILProgram::allocKernelTable() {
return true;
}
void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
void Program::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
#if defined(WITH_COMPILER_LIB)
const aclTargetInfo& HSAILProgram::info() {
acl_error err;
info_ = amd::Hsail::GetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
device().isa().hsailName(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
return info_;
}
#endif
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
#if defined(WITH_COMPILER_LIB)
// Write binary to memory
if (rawBinary_ != nullptr) {
// Free memory containing rawBinary
amd::Hsail::FreeMem(binaryElf_, rawBinary_);
rawBinary_ = nullptr;
}
size_t size = 0;
if (amd::Hsail::WriteToMem(binaryElf_, &rawBinary_, &size) != ACL_SUCCESS) {
buildLog_ += "Failed to write binary to memory \n";
return false;
}
setBinary(static_cast<char*>(rawBinary_), size);
// Set the type of binary
setType(type);
#endif // defined(WITH_COMPILER_LIB)
return true;
}
bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) {
bool Program::defineGlobalVar(const char* name, void* dptr) {
if (!device().isOnline()) {
return false;
}
@@ -419,7 +274,7 @@ bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) {
return true;
}
bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
const char* global_name) const {
if (!device().isOnline()) {
return false;
@@ -528,6 +383,107 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
return true;
}
bool Program::createBinary(amd::option::Options* options) {
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
return true;
}
bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
// Skip metadata look-up and kernel creation for assembly and internal kernel.
// @note: Runtime compiles only the second level trap handler from assembly
if ((owner()->language() != amd::Program::Assembly) || !internal_) {
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
return false;
}
for (const auto& kernelMeta : kernelMetadataMap_) {
auto kernelName = kernelMeta.first;
auto kernel = new pal::Kernel(kernelName, this, internalKernel);
if (kernel == nullptr) {
return false;
}
if (!kernel->init()) {
buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n";
return false;
}
addKernel(kernel);
if (codeObjectVer() < 5) {
kernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
}
}
}
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
if (executable_ == nullptr) {
LogError("Error: Executable for AMD HSA Code Object isn't created.");
return false;
}
hsa_code_object_t code_object;
code_object.handle = reinterpret_cast<uint64_t>(binary);
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto uri = GetUriFromMemoryAddress(binary, binSize);
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
if (status != HSA_STATUS_SUCCESS) {
LogError("Error: AMD HSA Code Object loading failed.");
return false;
}
if (isInternal() && (owner()->language() == amd::Program::Assembly)) {
// Don't register trap handler with the debugger, since user shouldn't see this kernel
status = executable_->Freeze(nullptr);
trapHandler_ = true;
} else {
status = loader_->FreezeExecutable(executable_, nullptr);
}
if (status != HSA_STATUS_SUCCESS) {
LogError("Error: Freezing the executable failed.");
return false;
}
return true;
}
bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
size_t foffset, std::string uri) {
// Collect the information about compiled binary, except the trap handler
if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) {
apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize,
codeSegGpu_->iMem(), codeSegGpu_->offset());
}
for (auto& kit : kernels()) {
pal::Kernel* kernel = static_cast<pal::Kernel*>(kit.second);
if (!kernel->postLoad()) {
return false;
}
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
maxVgprs_ = std::max(static_cast<uint>(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
}
DestroySegmentCpuAccess();
return true;
}
uint64_t Program::GetTrapHandlerAddress() const {
uint64_t address = 0;
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto trap_sym = executable_->GetSymbol("trap_entry", &agent);
if (trap_sym != nullptr) {
trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address);
}
return address;
}
// ================================================================================================
hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
const amd::Isa* isa_p = amd::Isa::findIsa(name);
return {amd::Isa::toHandle(isa_p)};
@@ -696,12 +652,10 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent,
if (!sampler_handle.handle) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Samplers will be destroyed by the pal::HSAILProgam destructor.
// Samplers will be destroyed by the pal::Program destructor.
return HSA_STATUS_SUCCESS;
}
#if defined(USE_COMGR_LIBRARY)
static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol,
void* data) {
auto symbol = amd::hsa::loader::Symbol::Object(hSymbol);
@@ -729,115 +683,4 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl
return HSA_STATUS_SUCCESS;
}
#endif // defined(USE_COMGR_LIBRARY)
bool LightningProgram::createBinary(amd::option::Options* options) {
#if defined(USE_COMGR_LIBRARY)
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
#endif // defined(USE_COMGR_LIBRARY)
return true;
}
// ================================================================================================
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
#if defined(USE_COMGR_LIBRARY)
// Skip metadata look-up and kernel creation for assembly and internal kernel.
// @note: Runtime compiles only the second level trap handler from assembly
if ((owner()->language() != amd::Program::Assembly) || !internal_) {
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
return false;
}
for (const auto& kernelMeta : kernelMetadataMap_) {
auto kernelName = kernelMeta.first;
auto kernel = new LightningKernel(kernelName, this, internalKernel);
if (kernel == nullptr) {
return false;
}
if (!kernel->init()) {
buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n";
return false;
}
addKernel(kernel);
if (codeObjectVer() < 5) {
kernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
}
}
}
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
if (executable_ == nullptr) {
LogError("Error: Executable for AMD HSA Code Object isn't created.");
return false;
}
hsa_code_object_t code_object;
code_object.handle = reinterpret_cast<uint64_t>(binary);
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto uri = GetUriFromMemoryAddress(binary, binSize);
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
if (status != HSA_STATUS_SUCCESS) {
LogError("Error: AMD HSA Code Object loading failed.");
return false;
}
if (isInternal() && (owner()->language() == amd::Program::Assembly)) {
// Don't register trap handler with the debugger, since user shouldn't see this kernel
status = executable_->Freeze(nullptr);
trapHandler_ = true;
} else {
status = loader_->FreezeExecutable(executable_, nullptr);
}
if (status != HSA_STATUS_SUCCESS) {
LogError("Error: Freezing the executable failed.");
return false;
}
#endif
return true;
}
// ================================================================================================
bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
size_t foffset, std::string uri) {
#if defined(USE_COMGR_LIBRARY)
// Collect the information about compiled binary, except the trap handler
if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) {
apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize,
codeSegGpu_->iMem(), codeSegGpu_->offset());
}
for (auto& kit : kernels()) {
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
if (!kernel->postLoad()) {
return false;
}
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
maxScratchRegs_ =
std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
maxVgprs_ = std::max(static_cast<uint>(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
}
DestroySegmentCpuAccess();
#endif // defined(USE_COMGR_LIBRARY)
return true;
}
// ================================================================================================
uint64_t LightningProgram::GetTrapHandlerAddress() const {
uint64_t address = 0;
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
auto trap_sym = executable_->GetSymbol("trap_entry", &agent);
if (trap_sym != nullptr) {
trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address);
}
return address;
}
} // namespace amd::pal
+17 -45
Ver fichero
@@ -44,7 +44,7 @@ namespace amd::pal {
*/
using namespace amd::hsa::loader;
class HSAILProgram;
class Program;
class Segment : public amd::HeapObject {
public:
@@ -52,7 +52,7 @@ class Segment : public amd::HeapObject {
~Segment();
//! Allocates a segment
bool alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
bool alloc(Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
bool zero);
//! Copies data from host to the segment
@@ -81,7 +81,7 @@ class Segment : public amd::HeapObject {
class PALHSALoaderContext final : public hsa::loader::Context {
public:
PALHSALoaderContext(HSAILProgram* program) : program_(program) {}
PALHSALoaderContext(pal::Program* program) : program_(program) {}
virtual ~PALHSALoaderContext() {}
@@ -127,26 +127,26 @@ class PALHSALoaderContext final : public hsa::loader::Context {
const hsa_ext_sampler_descriptor_t* sampler_descriptor,
hsa_ext_sampler_t* sampler_handle) override;
//! All samplers are owned by HSAILProgram and are deleted in its destructor.
//! All samplers are owned by pal program and are deleted in its destructor.
hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override;
private:
PALHSALoaderContext(const PALHSALoaderContext& c);
PALHSALoaderContext& operator=(const PALHSALoaderContext& c);
pal::HSAILProgram* program_;
pal::Program* program_;
};
//! \class HSAIL program
class HSAILProgram : public device::Program {
//! \class pal program
class Program : public device::Program {
friend class ClBinary;
public:
//! Default constructor
HSAILProgram(Device& device, amd::Program& owner);
HSAILProgram(NullDevice& device, amd::Program& owner);
Program(Device& device, amd::Program& owner);
Program(NullDevice& device, amd::Program& owner);
//! Default destructor
virtual ~HSAILProgram();
virtual ~Program();
void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); }
@@ -201,14 +201,9 @@ class HSAILProgram : public device::Program {
//! Returns API hash value of the program for RGP thread trace
uint64_t ApiHash() const { return apiHash_; }
protected:
bool saveBinaryAndSetType(type_t type);
//! Returns the load address of the trap handler
uint64_t GetTrapHandlerAddress() const;
virtual bool createBinary(amd::option::Options* options);
#if defined(WITH_COMPILER_LIB)
virtual const aclTargetInfo& info();
#endif
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override;
@@ -216,6 +211,9 @@ class HSAILProgram : public device::Program {
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
virtual bool createBinary(amd::option::Options* options) override;
protected:
//! Destroys CPU allocations in the code segment
void DestroySegmentCpuAccess() const {
if (codeSegment_ != nullptr) {
@@ -229,10 +227,10 @@ class HSAILProgram : public device::Program {
private:
//! Disable default copy constructor
HSAILProgram(const HSAILProgram&);
Program(const Program&);
//! Disable operator=
HSAILProgram& operator=(const HSAILProgram&);
Program& operator=(const Program&);
protected:
//! Allocate kernel table
@@ -256,31 +254,5 @@ class HSAILProgram : public device::Program {
PALHSALoaderContext loaderContext_; //!< Context for HSA Loader
};
//! \class Lightning Compiler Program
class LightningProgram : public HSAILProgram {
public:
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
}
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
}
virtual ~LightningProgram() {}
uint64_t GetTrapHandlerAddress() const;
protected:
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override;
virtual bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
virtual bool createBinary(amd::option::Options* options) override;
};
/*@}*/ // namespace amd::pal
} // namespace amd::pal
+2 -21
Ver fichero
@@ -119,10 +119,8 @@ Settings::Settings() {
std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
maxCmdBuffers_ = 12;
useLightning_ = amd::IS_HIP ? true : ((!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : false);
enableWgpMode_ = false;
enableWave32Mode_ = false;
hsailExplicitXnack_ = false;
lcWavefrontSize64_ = true;
enableHwP2P_ = false;
imageBufferWar_ = false;
@@ -152,7 +150,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
}
enableXNACK_ = (isa.xnack() == amd::Isa::Feature::Enabled);
hsailExplicitXnack_ = enableXNACK_;
bool useWavefront64 = false;
std::string appName = {};
@@ -192,11 +189,8 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
case Pal::AsicRevision::Navi14:
case Pal::AsicRevision::Navi12:
case Pal::AsicRevision::Navi10:
useLightning_ = GPU_ENABLE_LC;
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
if (useLightning_) {
enableWave32Mode_ = true;
}
enableWave32Mode_ = true;
if (!flagIsDefault(GPU_ENABLE_WAVE32_MODE)) {
enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE;
}
@@ -211,9 +205,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableHwP2P_ = true;
enableCoopGroups_ = IS_LINUX;
enableCoopMultiDeviceGroups_ = IS_LINUX;
if (useLightning_) {
singleFpDenorm_ = true;
}
singleFpDenorm_ = true;
enableExtension(ClKhrFp16);
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
// Cache line size is 64 bytes
@@ -279,11 +271,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableExtension(ClAmdCopyBufferP2P);
}
if (!useLightning_) {
enableExtension(ClAmdPopcnt);
enableExtension(ClAmdVec3);
enableExtension(ClAmdPrintf);
}
// Enable some platform extensions
enableExtension(ClAmdDeviceAttributeQuery);
@@ -306,12 +293,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableExtension(ClKhrFp64);
}
if (!useLightning_) {
// Enable AMD double precision extension
doublePrecision_ = true;
enableExtension(ClAmdFp64);
}
if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) {
// Enable bus addressable memory extension
enableExtension(ClAMDBusAddressableMemory);
@@ -216,7 +216,7 @@ bool UberTraceCaptureMgr::Init(Pal::IPlatform* platform) {
}
// ================================================================================================
void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x,
void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x,
size_t y, size_t z) {
// Wait for the driver to be resumed in case it's been paused.
WaitForDriverResume();
@@ -45,7 +45,7 @@ class UberTraceCaptureMgr final : public ICaptureMgr {
bool Update(Pal::IPlatform* platform) override;
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
size_t z) override;
void PostDispatch(VirtualGPU* gpu) override;
+10 -16
Ver fichero
@@ -2346,7 +2346,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
}
// ================================================================================================
void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQueue) {
void VirtualGPU::PrintChildren(const pal::Kernel& hsaKernel, VirtualGPU* gpuDefQueue) {
AmdAqlWrap* wraps = (AmdAqlWrap*)(&((AmdVQueueHeader*)gpuDefQueue->virtualQueue_->data())[1]);
uint p = 0;
for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) {
@@ -2381,11 +2381,11 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
print << wraps[i].aql.grid_size_y << ", ";
print << wraps[i].aql.grid_size_z << "]\n";
HSAILKernel* child = nullptr;
pal::Kernel* child = nullptr;
for (auto it = hsaKernel.prog().kernels().begin(); it != hsaKernel.prog().kernels().end();
++it) {
if (wraps[i].aql.kernel_object == static_cast<HSAILKernel*>(it->second)->gpuAqlCode()) {
child = static_cast<HSAILKernel*>(it->second);
if (wraps[i].aql.kernel_object == static_cast<pal::Kernel*>(it->second)->gpuAqlCode()) {
child = static_cast<pal::Kernel*>(it->second);
}
}
if (child == nullptr) {
@@ -2449,7 +2449,7 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
}
// ================================================================================================
bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel,
VirtualGPU** gpuDefQueue, uint64_t* vmDefQueue) {
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
if (nullptr == defQueue) {
@@ -2482,7 +2482,7 @@ bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel&
}
// ================================================================================================
void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel,
VirtualGPU* gpuDefQueue, uint64_t vmDefQueue,
uint64_t vmParentWrap, GpuEvent* gpuEvent) {
uint32_t id = gpuEvent->id_;
@@ -2628,7 +2628,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
state_.anyOrder_ = anyOrder;
// Get the HSA kernel object
const HSAILKernel& hsaKernel = static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
const pal::Kernel& hsaKernel = static_cast<const pal::Kernel&>(*(kernel.getDeviceKernel(dev())));
// If RGP capturing is enabled, then start SQTT trace
if (rgpCaptureEna()) {
@@ -2696,7 +2696,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
assert((nullptr != aqlPkt) && "Couldn't load kernel arguments");
// Dynamic call stack size is considered to calculate private segment size and scratch regs
// in LightningKernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike
// in pal::Kernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike
// hipLaunchKernel/hipLaunchKernelGGL, Updated value is passed to dispatch packet.
size_t privateMemSize = hsaKernel.spillSegSize();
if ((hsaKernel.workGroupInfo()->usedStackSize_ & 0x1) == 0x1) {
@@ -2725,13 +2725,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
}
dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlKd();
dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress();
if (!hsaKernel.prog().isLC() && hsaKernel.workGroupInfo()->wavesPerSimdHint_ != 0) {
constexpr uint32_t kWavesPerSimdLimit = 4;
dispatchParam.wavesPerSh =
kWavesPerSimdLimit * dev().info().cuPerShaderArray_ * dev().info().simdPerCU_;
} else {
dispatchParam.wavesPerSh = 0;
}
dispatchParam.wavesPerSh = 0;
dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false;
dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize();
dispatchParam.aqlPacketIndex = aql_index;
@@ -3584,7 +3578,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
bool srdResource = false;
amd::Memory* const* memories =
reinterpret_cast<amd::Memory* const*>(params + kernelParams.memoryObjOffset());
const HSAILKernel& hsaKernel = static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
const pal::Kernel& hsaKernel = static_cast<const pal::Kernel&>(*(kernel.getDeviceKernel(dev())));
const amd::KernelSignature& signature = kernel.signature();
ldsAddress = hsaKernel.ldsSize();
+4 -5
Ver fichero
@@ -45,14 +45,13 @@
namespace amd::pal {
class Device;
class Kernel;
class Memory;
class CalCounterReference;
class VirtualGPU;
class Program;
class BlitManager;
class ThreadTrace;
class HSAILKernel;
class Kernel;
struct AqlPacketMgmt : public amd::EmbeddedObject {
static constexpr uint32_t kAqlPacketsListSize = 4 * Ki;
@@ -693,19 +692,19 @@ class VirtualGPU : public device::VirtualDevice {
amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData
);
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel
VirtualGPU* gpuDefQueue //!< Device queue for children execution
);
bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object
const HSAILKernel& hsaKernel, //!< Parent HSAIL object
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue
uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue
);
void PostDeviceEnqueue(
const amd::Kernel& kernel, //!< Parent amd kernel object
const HSAILKernel& hsaKernel, //!< Parent HSAIL object
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
VirtualGPU* gpuDefQueue, //!< GPU default queue
uint64_t vmDefQueue, //!< VM handle to the virtual queue
uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location
+10 -31
Ver fichero
@@ -125,8 +125,7 @@ bool NullDevice::create(const amd::Isa& isa) {
info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " ";
info_.spirVersions_ = "";
std::stringstream ss;
ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL");
ss << ") [Offline]";
ss << AMD_BUILD_STRING " (HSA,LC) [Offline]";
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
return true;
@@ -704,12 +703,7 @@ bool Device::create() {
// ================================================================================================
device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) {
device::Program* program;
if (settings().useLightning_) {
program = new LightningProgram(*this, owner);
} else {
program = new HSAILProgram(*this, owner);
}
device::Program* program = new roc::Program(*this, owner);
if (program == nullptr) {
LogError("Memory allocation has failed!");
@@ -722,19 +716,15 @@ bool Device::createBlitProgram() {
bool result = true;
std::string extraKernel;
#if defined(USE_COMGR_LIBRARY)
if (settings().useLightning_) {
if (amd::IS_HIP) {
if (settings().gwsInitSupported_) {
extraKernel = device::HipExtraSourceCode;
} else {
extraKernel = device::HipExtraSourceCodeNoGWS;
}
if (amd::IS_HIP) {
if (settings().gwsInitSupported_) {
extraKernel = device::HipExtraSourceCode;
} else {
extraKernel = SchedulerSourceCode;
extraKernel = device::HipExtraSourceCodeNoGWS;
}
} else {
extraKernel = SchedulerSourceCode;
}
#endif // USE_COMGR_LIBRARY
blitProgram_ = new BlitProgram(context_);
// Create blit programs
@@ -749,12 +739,7 @@ bool Device::createBlitProgram() {
}
device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) {
device::Program* program;
if (settings().useLightning_) {
program = new LightningProgram(*this, owner);
} else {
program = new HSAILProgram(*this, owner);
}
device::Program* program = new roc::Program(*this, owner);
if (program == nullptr) {
LogError("Memory allocation has failed!");
@@ -1305,9 +1290,7 @@ bool Device::populateOCLDeviceConstants() {
return false;
}
std::stringstream ss;
ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << ","
<< (settings().useLightning_ ? "LC" : "HSAIL");
ss << ")";
ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << ",LC)";
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
@@ -1480,10 +1463,6 @@ bool Device::populateOCLDeviceConstants() {
if (info_.iommuv2_ || isa().versionMajor() >= 8) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
} else if (!settings().useLightning_) {
if (info_.iommuv2_ || (isa().versionMajor() == 8)) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
}
@@ -24,7 +24,6 @@
namespace amd::roc {
#if defined(USE_COMGR_LIBRARY)
bool Kernel::init() { return GetAttrCodePropMetadata(); }
bool Kernel::postLoad() {
@@ -157,6 +156,5 @@ bool Kernel::postLoad() {
program()->rocDevice().AddKernel(*this);
return true;
}
#endif // defined(USE_COMGR_LIBRARY)
} // namespace amd::roc
+6 -49
Ver fichero
@@ -55,6 +55,7 @@ Program::~Program() {
Program::Program(roc::NullDevice& device, amd::Program& owner) : device::Program(device, owner) {
hsaExecutable_.handle = 0;
hsaCodeObjectReader_.handle = 0;
isHIP_ = (owner.language() == amd::Program::HIP);
}
bool Program::initClBinary(char* binaryIn, size_t size) {
@@ -201,58 +202,16 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
return true;
}
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner)
: roc::Program(device, owner) {}
HSAILProgram::~HSAILProgram() {}
bool HSAILProgram::saveBinaryAndSetType(type_t type) { return true; }
bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset,
std::string uri) {
return true;
}
LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
: roc::Program(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
}
bool LightningProgram::createBinary(amd::option::Options* options) {
#if defined(USE_COMGR_LIBRARY)
bool Program::createBinary(amd::option::Options* options) {
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
#endif // defined(USE_COMGR_LIBRARY)
return true;
}
bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) {
#if defined(USE_COMGR_LIBRARY)
// Write binary to memory
if (type == TYPE_EXECUTABLE) { // handle code object binary
assert(rawBinary != nullptr && size != 0 && "must pass in the binary");
} else { // handle LLVM binary
if (llvmBinary_.empty()) {
buildLog_ += "ERROR: Tried to save empty LLVM binary \n";
return false;
}
rawBinary = (void*)llvmBinary_.data();
size = llvmBinary_.size();
}
clBinary()->saveBIFBinary((char*)rawBinary, size);
// Set the type of binary
setType(type);
#endif // defined(USE_COMGR_LIBRARY)
return true;
}
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) {
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
@@ -274,9 +233,8 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo
return true;
}
bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
size_t foffset, std::string uri) {
#if defined(USE_COMGR_LIBRARY)
bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
size_t foffset, std::string uri) {
// Stop compilation if it is an offline device - HSA runtime does not
// support ISA compiled offline
if (!device().isOnline()) {
@@ -330,7 +288,6 @@ bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDes
return false;
}
}
#endif // defined(USE_COMGR_LIBRARY)
return true;
}
+5 -48
Ver fichero
@@ -29,9 +29,6 @@
//! \namespace amd::roc HSA Device Implementation
namespace amd::roc {
class HSAILProgram;
class LightningProgram;
//! \class empty program
class Program : public device::Program {
friend class ClBinary;
@@ -62,15 +59,6 @@ class Program : public device::Program {
virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
const char* global_name) const;
protected:
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
*
* \return The build error code
*/
int compileBinaryToHSAIL(amd::option::Options* options //!< options for compilation
);
virtual bool createBinary(amd::option::Options* options) = 0;
protected:
//! Disable default copy constructor
Program(const Program&) = delete;
@@ -79,48 +67,17 @@ class Program : public device::Program {
virtual bool defineGlobalVar(const char* name, void* dptr);
protected:
/* HSA executable */
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader
};
class HSAILProgram : public roc::Program {
public:
HSAILProgram(roc::NullDevice& device, amd::Program& owner);
virtual ~HSAILProgram();
protected:
bool createBinary(amd::option::Options* options) override { return true; }
virtual bool setKernels(void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
std::string uri = std::string()) override;
private:
std::string codegenOptions(amd::option::Options* options);
bool saveBinaryAndSetType(type_t type) override;
};
class LightningProgram final : public roc::Program {
public:
LightningProgram(roc::NullDevice& device, amd::Program& owner);
virtual ~LightningProgram() {}
protected:
bool createBinary(amd::option::Options* options) final;
bool saveBinaryAndSetType(type_t type) final { return true; }
private:
bool saveBinaryAndSetType(type_t type, void* rawBinary, size_t size);
bool createBinary(amd::option::Options* options) override final;
bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
bool internalKernel) override final;
bool setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc = amd::Os::FDescInit(),
size_t foffset = 0, std::string uri = std::string()) override final;
protected:
/* HSA executable */
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader
};
/*@}*/ // namespace amd::roc
+6 -14
Ver fichero
@@ -71,8 +71,6 @@ Settings::Settings() {
numDeviceEvents_ = 1024;
numWaitEvents_ = 8;
useLightning_ = (!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : true;
lcWavefrontSize64_ = true;
imageBufferWar_ = false;
@@ -116,7 +114,6 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b
pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_);
}
enableXNACK_ = enableXNACK;
hsailExplicitXnack_ = enableXNACK;
// Enable extensions
enableExtension(ClKhrByteAddressableStore);
@@ -146,17 +143,12 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b
enableExtension(ClKhrFp16);
supportDepthsRGB_ = true;
if (useLightning_) {
enableExtension(ClAmdAssemblyProgram);
// enable subnormals for gfx900 and later
if (gfxipMajor >= 9) {
singleFpDenorm_ = true;
enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
}
} else {
// Also enable AMD double precision extension?
enableExtension(ClAmdFp64);
enableExtension(ClAmdAssemblyProgram);
// enable subnormals for gfx900 and later
if (gfxipMajor >= 9) {
singleFpDenorm_ = true;
enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
}
if ((gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10) ||
+2 -5
Ver fichero
@@ -806,11 +806,8 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) &&
"Unsupported address qualifier");
const bool readOnly =
#if defined(USE_COMGR_LIBRARY)
desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST ||
#endif // defined(USE_COMGR_LIBRARY)
(mem->getMemFlags() & CL_MEM_READ_ONLY) != 0;
const bool readOnly = (desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST) ||
((mem->getMemFlags() & CL_MEM_READ_ONLY) != 0);
if (!readOnly) {
mem->signalWrite(&dev());
+2 -2
Ver fichero
@@ -55,8 +55,8 @@ target_include_directories(elf_test
PRIVATE
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL -DWITH_LIGHTNING_COMPILER -DDEBUG)
add_definitions(-DCOMGR_DYN_DLL -DDEBUG)
target_link_libraries(elf_test PRIVATE amdrocclr_static)
#-------------------------------------elf_test--------------------------------------#
#-------------------------------------elf_test--------------------------------------#
+1 -3
Ver fichero
@@ -340,7 +340,6 @@ class Kernel : public RuntimeObject {
virtual ObjectType objectType() const { return ObjectTypeKernel; }
#if defined(USE_COMGR_LIBRARY)
// Templated find function to retrieve the right value based on string
template <typename V, typename T, size_t N>
static V FindValue(const T (&structure)[N], const std::string& name);
@@ -417,8 +416,7 @@ class Kernel : public RuntimeObject {
static const KernelFieldMapV3Type kKernelFieldMapV3[];
static const ArgValueKindV3Type kArgValueKindV3[];
static const ArgFieldMapV3Type kArgFieldMapV3[];
#endif
}; // defined(USE_COMGR_LIBRARY)
};
/*! @}
@@ -20,8 +20,6 @@
#pragma once
#if defined(USE_COMGR_LIBRARY)
// Static values initialization from class Kernel.
const amd::Kernel::ArgFieldMapType amd::Kernel::kArgFieldMap[] = {
{"Name", ArgField::Name},
@@ -189,5 +187,3 @@ cl_int amd::Kernel::FindValue(const T (&structure)[N], const std::string& name)
}
return 0;
}
#endif // defined(USE_COMGR_LIBRARY)
+6 -93
Ver fichero
@@ -23,11 +23,6 @@
#include "platform/program.hpp"
#include "platform/context.hpp"
#include "utils/options.hpp"
#if defined(WITH_COMPILER_LIB)
#include "utils/libUtils.h"
#include "utils/bif_section_labels.hpp"
#include "hsailctx.hpp"
#endif
#include <cstdlib> // for malloc
#include <cstring> // for strcmp
@@ -38,21 +33,6 @@
namespace amd {
#if defined(WITH_COMPILER_LIB)
static aclTargetInfo* aclutGetTargetInfo(aclBinary* binary) {
aclTargetInfo* tgt = NULL;
if (binary->struct_size == sizeof(aclBinary_0_8)) {
tgt = &reinterpret_cast<aclBinary_0_8*>(binary)->target;
} else if (binary->struct_size == sizeof(aclBinary_0_8_1)) {
tgt = &reinterpret_cast<aclBinary_0_8_1*>(binary)->target;
} else {
assert(!"Binary format not supported!");
tgt = &binary->target;
}
return tgt;
}
#endif
static void remove_g_option(std::string& option) {
// Remove " -g " option from application.
// People can still add -g in AMD_OCL_BUILD_OPTIONS_APPEND, if it is so desired.
@@ -114,16 +94,7 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng
amd::option::Options* options, const amd::Program* same_prog,
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
if (image != NULL && !amd::Elf::isElfMagic((const char*)image)) {
if (device.settings().useLightning_) {
return CL_INVALID_BINARY;
}
#if defined(WITH_COMPILER_LIB)
else if (!amd::Hsail::ValidateBinaryImage(
image, length,
language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)) {
return CL_INVALID_BINARY;
}
#endif // !defined(WITH_COMPILER_LIB)
return CL_INVALID_BINARY;
}
// Check if the device is already associated with this program
@@ -138,43 +109,11 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng
return CL_SUCCESS;
}
#if defined(WITH_COMPILER_LIB)
bool emptyOptions = (options == nullptr);
#endif
amd::option::Options emptyOpts;
if (options == NULL) {
options = &emptyOpts;
}
#if defined(WITH_COMPILER_LIB)
if (image != NULL && length != 0 &&
amd::Hsail::ValidateBinaryImage(image, length, BINARY_TYPE_ELF)) {
acl_error errorCode;
aclBinary* binary = amd::Hsail::ReadFromMem(image, length, &errorCode);
if (errorCode != ACL_SUCCESS) {
return CL_INVALID_BINARY;
}
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
assert(symbol && "symbol not found");
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
size_t symSize = 0;
const void* opts = amd::Hsail::ExtractSymbol(device.binCompiler(), binary, &symSize, aclCOMMENT,
symName.c_str(), &errorCode);
// if we have options from binary and input options was not specified
if (opts != NULL && emptyOptions) {
std::string sBinOptions = std::string((char*)opts, symSize);
if (!amd::option::parseAllOptions(sBinOptions, *options, false, false)) {
programLog_ = options->optionsLog();
LogError("Parsing compilation options from binary failed.");
return CL_INVALID_COMPILER_OPTIONS;
}
}
options->oVariables->Legacy = !device.settings().useLightning_
? isAMDILTarget(*amd::aclutGetTargetInfo(binary))
: isHSAILTarget(*amd::aclutGetTargetInfo(binary));
amd::Hsail::BinaryFini(binary);
}
#endif // defined(WITH_COMPILER_LIB)
options->oVariables->BinaryIsSpirv = language_ == SPIRV;
device::Program* program = rootDev.createProgram(*this, options);
if (program == NULL) {
@@ -272,8 +211,7 @@ int32_t Program::compile(const std::vector<Device*>& devices, size_t numHeaders,
for (const auto& it : devices) {
option::Options parsedOptions;
constexpr bool LinkOptsOnly = false;
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
it->settings().useLightning_)) {
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
programLog_ = parsedOptions.optionsLog();
LogError("Parsing compile options failed.");
return CL_INVALID_COMPILER_OPTIONS;
@@ -345,8 +283,7 @@ int32_t Program::link(const std::vector<Device*>& devices, size_t numInputs,
for (const auto& it : devices) {
option::Options parsedOptions;
constexpr bool LinkOptsOnly = true;
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
it->settings().useLightning_)) {
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
programLog_ = parsedOptions.optionsLog();
LogError("Parsing link options failed.");
return CL_INVALID_LINKER_OPTIONS;
@@ -366,29 +303,6 @@ int32_t Program::link(const std::vector<Device*>& devices, size_t numInputs,
continue;
}
inputDevPrograms[i] = findIt->second;
// Check the binary's target for the first found device program.
// TODO: Revise these binary's target checks
// and possibly remove them after switching to HSAIL by default.
#if defined(WITH_COMPILER_LIB)
device::Program::binary_t binary = inputDevPrograms[i]->binary();
if (!found && binary.first != NULL && binary.second > 0 &&
amd::Hsail::ValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) {
acl_error errorCode = ACL_SUCCESS;
void* mem = const_cast<void*>(binary.first);
aclBinary* aclBin = amd::Hsail::ReadFromMem(mem, binary.second, &errorCode);
if (errorCode != ACL_SUCCESS) {
LogWarning("Error while linking: Could not read from raw binary.");
return CL_INVALID_BINARY;
}
if (isHSAILTarget(*amd::aclutGetTargetInfo(aclBin))) {
parsedOptions.oVariables->Frontend = "clang";
parsedOptions.oVariables->Legacy = it->settings().useLightning_;
} else if (isAMDILTarget(*amd::aclutGetTargetInfo(aclBin))) {
parsedOptions.oVariables->Frontend = "edg";
}
amd::Hsail::BinaryFini(aclBin);
}
#endif // defined(WITH_COMPILER_LIB)
found = true;
}
if (inputDevPrograms.size() == 0) {
@@ -522,8 +436,7 @@ int32_t Program::build(const std::vector<Device*>& devices, const char* options,
for (const auto& it : devices) {
option::Options parsedOptions;
constexpr bool LinkOptsOnly = false;
if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
it->settings().useLightning_)) {
if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
programLog_ = parsedOptions.optionsLog();
LogError("Parsing compile options failed.");
return CL_INVALID_COMPILER_OPTIONS;
@@ -677,7 +590,7 @@ int Program::GetOclCVersion(const char* clVer) {
}
bool Program::ParseAllOptions(const std::string& options, option::Options& parsedOptions,
bool optionChangable, bool linkOptsOnly, bool isLC) {
bool optionChangable, bool linkOptsOnly) {
std::string allOpts = options;
if (optionChangable) {
if (linkOptsOnly) {
@@ -704,7 +617,7 @@ bool Program::ParseAllOptions(const std::string& options, option::Options& parse
}
}
}
return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly, isLC);
return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly);
}
bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func) {
+1 -1
Ver fichero
@@ -225,7 +225,7 @@ class Program : public RuntimeObject {
static int GetOclCVersion(const char* clVer);
bool static ParseAllOptions(const std::string& options, option::Options& parsedOptions,
bool optionChangable, bool linkOptsOnly, bool isLC);
bool optionChangable, bool linkOptsOnly);
void setVarInfoCallBack(VarInfoCallback callback) { varcallback = callback; }
-2
Ver fichero
@@ -141,8 +141,6 @@ release(uint, PAL_MALL_POLICY, 0, \
"2 = Allocations will always be put through the MALL") \
release(bool, GPU_ENABLE_WAVE32_MODE, true, \
"Enables Wave32 compilation in HW if available") \
release(bool, GPU_ENABLE_LC, true, \
"Enables LC path") \
release(bool, GPU_ENABLE_HW_P2P, false, \
"Enables HW P2P path") \
release(bool, GPU_ENABLE_COOP_GROUPS, true, \