SWDEV-556684 - Remove HSAIL support (#1183)
这个提交包含在:
@@ -128,47 +128,6 @@ clGetKernelSubGroupInfo
|
||||
clSetDefaultDeviceCommandQueue
|
||||
#endif
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
aclCompilerInit
|
||||
aclCompilerFini
|
||||
aclCompilerVersion
|
||||
aclVersionSize
|
||||
aclGetErrorString
|
||||
aclGetArchInfo
|
||||
aclGetDeviceInfo
|
||||
aclGetTargetInfo
|
||||
aclGetArchitecture
|
||||
aclGetFamily
|
||||
aclGetChip
|
||||
aclBinaryInit
|
||||
aclBinaryFini
|
||||
aclReadFromFile
|
||||
aclReadFromMem
|
||||
aclWriteToFile
|
||||
aclWriteToMem
|
||||
aclCreateFromBinary
|
||||
aclBinaryVersion
|
||||
aclInsertSection
|
||||
aclRemoveSection
|
||||
aclExtractSection
|
||||
aclInsertSymbol
|
||||
aclRemoveSymbol
|
||||
aclExtractSymbol
|
||||
aclDbgAddArgument
|
||||
aclDbgRemoveArgument
|
||||
aclQueryInfo
|
||||
aclCompile
|
||||
aclLink
|
||||
aclGetCompilerLog
|
||||
aclRetrieveType
|
||||
aclSetType
|
||||
aclConvertType
|
||||
aclDisassemble
|
||||
aclInsertKernelStatistics
|
||||
aclGetDeviceBinary
|
||||
aclDumpBinary
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
#if (OPENCL_MAJOR > 2) || (OPENCL_MAJOR == 2 && OPENCL_MINOR >= 1)
|
||||
clCreateProgramWithIL
|
||||
#endif
|
||||
|
||||
@@ -135,9 +135,6 @@ RUNTIME_ENTRY(cl_int, clGetPlatformInfo,
|
||||
"cl_khr_dx9_media_sharing "
|
||||
#endif //_WIN32
|
||||
"cl_amd_event_callback "
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
"cl_amd_offline_devices "
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
;
|
||||
break;
|
||||
case CL_PLATFORM_ICD_SUFFIX_KHR:
|
||||
|
||||
@@ -20,15 +20,9 @@
|
||||
|
||||
# ROCclr abstracts the usage of multiple AMD compilers and runtimes.
|
||||
# It is possible to support multiple backends concurrently in the same binary.
|
||||
option(ROCCLR_ENABLE_HSAIL "Enable support for HSAIL compiler" OFF)
|
||||
option(ROCCLR_ENABLE_LC "Enable support for LC compiler" ON)
|
||||
option(ROCCLR_ENABLE_HSA "Enable support for HSA runtime" ON)
|
||||
option(ROCCLR_ENABLE_PAL "Enable support for PAL runtime" OFF)
|
||||
|
||||
if((NOT ROCCLR_ENABLE_HSAIL) AND (NOT ROCCLR_ENABLE_LC))
|
||||
message(FATAL "Support for at least one compiler needs to be enabled!")
|
||||
endif()
|
||||
|
||||
if((NOT ROCCLR_ENABLE_HSA) AND (NOT ROCCLR_ENABLE_PAL))
|
||||
message(FATAL "Support for at least one runtime needs to be enabled!")
|
||||
endif()
|
||||
@@ -68,7 +62,6 @@ target_sources(rocclr PRIVATE
|
||||
${ROCCLR_SRC_DIR}/device/device.cpp
|
||||
${ROCCLR_SRC_DIR}/device/devkernel.cpp
|
||||
${ROCCLR_SRC_DIR}/device/devprogram.cpp
|
||||
${ROCCLR_SRC_DIR}/device/hsailctx.cpp
|
||||
${ROCCLR_SRC_DIR}/elf/elf.cpp
|
||||
${ROCCLR_SRC_DIR}/os/alloc.cpp
|
||||
${ROCCLR_SRC_DIR}/os/os_posix.cpp
|
||||
@@ -142,13 +135,7 @@ if(UNIX)
|
||||
target_link_libraries(rocclr PUBLIC rt)
|
||||
endif()
|
||||
|
||||
if(ROCCLR_ENABLE_HSAIL)
|
||||
include(ROCclrHSAIL)
|
||||
endif()
|
||||
|
||||
if(ROCCLR_ENABLE_LC)
|
||||
include(ROCclrLC)
|
||||
endif()
|
||||
include(ROCclrLC)
|
||||
|
||||
if(ROCCLR_ENABLE_HSA)
|
||||
include(ROCclrHSA)
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
target_compile_definitions(rocclr PUBLIC WITH_COMPILER_LIB HSAIL_DYN_DLL)
|
||||
@@ -37,7 +37,7 @@ if (NOT amd_comgr_FOUND)
|
||||
endif()
|
||||
|
||||
get_target_property(_amd_comgr_lib_type amd_comgr TYPE)
|
||||
target_compile_definitions(rocclr PUBLIC WITH_LIGHTNING_COMPILER USE_COMGR_LIBRARY)
|
||||
target_compile_definitions(rocclr PUBLIC)
|
||||
if(_amd_comgr_lib_type STREQUAL "SHARED_LIBRARY")
|
||||
target_compile_definitions(rocclr PUBLIC COMGR_DYN_DLL)
|
||||
endif()
|
||||
|
||||
@@ -1,217 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_0_8_H_
|
||||
#define _ACL_0_8_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "aclTypes.h"
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with aclCompiler objects.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
aclCompiler* ACL_API_ENTRY aclCompilerInit(aclCompilerOptions* opts,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
acl_error ACL_API_ENTRY aclCompilerFini(aclCompiler* cl) ACL_API_0_8;
|
||||
aclCLVersion ACL_API_ENTRY aclCompilerVersion(aclCompiler* cl, acl_error* error_code) ACL_API_0_8;
|
||||
uint32_t ACL_API_ENTRY aclVersionSize(aclCLVersion num, acl_error* error_code) ACL_API_0_8;
|
||||
const char* ACL_API_ENTRY aclGetErrorString(acl_error error_code) ACL_API_0_8;
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with target specific information.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
//! Returns in the names argument, if non-NULL, a pointer to each of the arch
|
||||
// names that the compiler supports. If names is NULL and arch_size is
|
||||
// non-NULL, returns the number of arch entries that are required.
|
||||
acl_error ACL_API_ENTRY aclGetArchInfo(const char** arch_names, size_t* arch_size) ACL_API_0_8;
|
||||
|
||||
//! Returns in the arch argument, if non-NULL, a pointer to each device
|
||||
// name that the compiler supports. If device_size is non-NULL,
|
||||
// returns the number of device entries that are used.
|
||||
acl_error ACL_API_ENTRY aclGetDeviceInfo(const char* arch, const char** names,
|
||||
size_t* device_size) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a correctly filled out aclTargetInfo structure based
|
||||
// on the information passed into the kernel.
|
||||
aclTargetInfo ACL_API_ENTRY aclGetTargetInfo(const char* arch, const char* device,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a correctly filled out aclTargetInfo structure based
|
||||
// on the information passed into the kernel.
|
||||
aclTargetInfo ACL_API_ENTRY aclGetTargetInfoFromChipID(const char* arch, const uint32_t chip_id,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a string representation of the target architecture.
|
||||
const char* ACL_API_ENTRY aclGetArchitecture(const aclTargetInfo& target) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a string representation of the target chip options.
|
||||
const uint64_t ACL_API_ENTRY aclGetChipOptions(const aclTargetInfo& target) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a string representation of the target family.
|
||||
const char* ACL_API_ENTRY aclGetFamily(const aclTargetInfo& target) ACL_API_0_8;
|
||||
|
||||
//! Function that returns a string representation of the target chip.
|
||||
const char* ACL_API_ENTRY aclGetChip(const aclTargetInfo& target) ACL_API_0_8;
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with aclBinary objects.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
aclBinary* ACL_API_ENTRY aclBinaryInit(size_t struct_version, const aclTargetInfo* target,
|
||||
const aclBinaryOptions* options,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclBinaryFini(aclBinary* bin) ACL_API_0_8;
|
||||
|
||||
aclBinary* ACL_API_ENTRY aclReadFromFile(const char* str, acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
aclBinary* ACL_API_ENTRY aclReadFromMem(const void* mem, size_t size,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclWriteToFile(aclBinary* bin, const char* str) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclWriteToMem(aclBinary* bin, void** mem, size_t* size) ACL_API_0_8;
|
||||
|
||||
aclBinary* ACL_API_ENTRY aclCreateFromBinary(const aclBinary* binary,
|
||||
aclBIFVersion version) ACL_API_0_8;
|
||||
|
||||
aclBIFVersion ACL_API_ENTRY aclBinaryVersion(const aclBinary* binary) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclInsertSection(aclCompiler* cl, aclBinary* binary, const void* data,
|
||||
size_t data_size, aclSections id) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclInsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data,
|
||||
size_t data_size, aclSections id,
|
||||
const char* symbol) ACL_API_0_8;
|
||||
|
||||
const void* ACL_API_ENTRY aclExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size,
|
||||
aclSections id, acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
const void* ACL_API_ENTRY aclExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size,
|
||||
aclSections id, const char* symbol,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclRemoveSection(aclCompiler* cl, aclBinary* binary,
|
||||
aclSections id) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclRemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id,
|
||||
const char* symbol) ACL_API_0_8;
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with debug/metdata.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
acl_error ACL_API_ENTRY aclQueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query,
|
||||
const char* kernel, void* data_ptr,
|
||||
size_t* ptr_size) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclDbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
|
||||
const char* name, bool byVal) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclDbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
|
||||
const char* name) ACL_API_0_8;
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with various compilation phases.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
acl_error ACL_API_ENTRY aclCompile(aclCompiler* cl, aclBinary* bin, const char* options,
|
||||
aclType from, aclType to,
|
||||
aclLogFunction compile_callback) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclLink(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs,
|
||||
aclBinary** libs, aclType link_mode, const char* options,
|
||||
aclLogFunction link_callback) ACL_API_0_8;
|
||||
|
||||
const char* ACL_API_ENTRY aclGetCompilerLog(aclCompiler* cl) ACL_API_0_8;
|
||||
|
||||
const void* ACL_API_ENTRY aclRetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name,
|
||||
size_t* data_size, aclType type,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclSetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type,
|
||||
const void* data, size_t size) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclConvertType(aclCompiler* cl, aclBinary* bin, const char* name,
|
||||
aclType type) ACL_API_0_8;
|
||||
|
||||
acl_error ACL_API_ENTRY aclDisassemble(aclCompiler* cl, aclBinary* bin, const char* kernel,
|
||||
aclLogFunction disasm_callback) ACL_API_0_8;
|
||||
|
||||
const void* ACL_API_ENTRY aclGetDeviceBinary(aclCompiler* cl, const aclBinary* bin,
|
||||
const char* kernel, size_t* size,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with binary image.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
bool ACL_API_ENTRY aclValidateBinaryImage(const void* binary, size_t length, unsigned) ACL_API_0_8;
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with aclJITObjectImage objects.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCreate(aclCompiler* cl, const void* buffer,
|
||||
size_t length, aclBinary* bin,
|
||||
acl_error* error_code);
|
||||
|
||||
aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCopy(aclCompiler* cl, const void* buffer,
|
||||
size_t length, acl_error* error_code);
|
||||
|
||||
acl_error ACL_API_ENTRY aclJITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer);
|
||||
|
||||
acl_error ACL_API_ENTRY aclJITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image);
|
||||
|
||||
size_t ACL_API_ENTRY aclJITObjectImageSize(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
|
||||
const char* ACL_API_ENTRY aclJITObjectImageData(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
|
||||
size_t ACL_API_ENTRY aclJITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
|
||||
acl_error ACL_API_ENTRY aclJITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image,
|
||||
aclJITSymbolCallback callback, void* data);
|
||||
|
||||
#if defined(LEGACY_COMPLIB)
|
||||
char* ACL_API_ENTRY aclJITObjectImageDisassembleKernel(aclCompiler* cl,
|
||||
constAclJITObjectImage image,
|
||||
const char* kernel, acl_error* error_code);
|
||||
#endif
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Debug functionality
|
||||
//!--------------------------------------------------------------------------!//
|
||||
void aclDumpBinary(const aclBinary* bin);
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with kenel statistics.
|
||||
//!--------------------------------------------------------------------------!//
|
||||
void aclGetKstatsSI(const void* shader, aclKernelStats& kstats);
|
||||
acl_error ACL_API_ENTRY aclInsertKernelStatistics(aclCompiler* cl, aclBinary* bin);
|
||||
//! Define hardware info constants for SI and above devices
|
||||
static constexpr unsigned SI_sgprs_avail = 102;
|
||||
static constexpr unsigned SI_vgprs_avail = 256;
|
||||
static constexpr unsigned SI_ldssize_avail = 32 * 1024;
|
||||
|
||||
//!--------------------------------------------------------------------------!//
|
||||
// Functions that deal with memory.
|
||||
// Free memory allocated by aclWriteToMem
|
||||
//!--------------------------------------------------------------------------!//
|
||||
acl_error ACL_API_ENTRY aclFreeMem(aclBinary* bin, void* mem);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // _ACL_0_8_H_
|
||||
@@ -1,54 +0,0 @@
|
||||
/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_DEFS_0_8_H_
|
||||
#define _ACL_DEFS_0_8_H_
|
||||
|
||||
#ifndef ACL_API_ENTRY
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define ACL_API_ENTRY __stdcall
|
||||
#else
|
||||
#define ACL_API_ENTRY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ACL_API_0_8
|
||||
#define ACL_API_0_8
|
||||
#endif
|
||||
|
||||
#ifndef BIF_API_2_0
|
||||
#define BIF_API_2_0
|
||||
#endif
|
||||
|
||||
#ifndef BIF_API_2_1
|
||||
#define BIF_API_2_1
|
||||
#endif
|
||||
|
||||
#ifndef BIF_API_3_0
|
||||
#define BIF_API_3_0
|
||||
#endif
|
||||
|
||||
#ifndef MAX_HIDDEN_KERNARGS_NUM
|
||||
#define MAX_HIDDEN_KERNARGS_NUM 6
|
||||
#else
|
||||
#error "MAX_HIDDEN_KERNARGS_NUM is already defined"
|
||||
#endif
|
||||
|
||||
#endif // _ACL_DEFS_0_8_H_
|
||||
@@ -1,364 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_ENUMS_0_8_H_
|
||||
#define _ACL_ENUMS_0_8_H_
|
||||
|
||||
typedef enum _acl_error_enum_0_8 {
|
||||
ACL_SUCCESS = 0,
|
||||
ACL_ERROR = 1,
|
||||
ACL_INVALID_ARG = 2,
|
||||
ACL_OUT_OF_MEM = 3,
|
||||
ACL_SYS_ERROR = 4,
|
||||
ACL_UNSUPPORTED = 5,
|
||||
ACL_ELF_ERROR = 6,
|
||||
ACL_INVALID_FILE = 7,
|
||||
ACL_INVALID_COMPILER = 8,
|
||||
ACL_INVALID_TARGET = 9,
|
||||
ACL_INVALID_BINARY = 10,
|
||||
ACL_INVALID_OPTION = 11,
|
||||
ACL_INVALID_TYPE = 12,
|
||||
ACL_INVALID_SECTION = 13,
|
||||
ACL_INVALID_SYMBOL = 14,
|
||||
ACL_INVALID_QUERY = 15,
|
||||
ACL_FRONTEND_FAILURE = 16,
|
||||
ACL_INVALID_BITCODE = 17,
|
||||
ACL_LINKER_ERROR = 18,
|
||||
ACL_OPTIMIZER_ERROR = 19,
|
||||
ACL_CODEGEN_ERROR = 20,
|
||||
ACL_ISAGEN_ERROR = 21,
|
||||
ACL_INVALID_SOURCE = 22,
|
||||
ACL_LIBRARY_ERROR = 23,
|
||||
ACL_INVALID_SPIR = 24,
|
||||
ACL_LWVERIFY_FAIL = 25,
|
||||
ACL_HWVERIFY_FAIL = 26,
|
||||
ACL_SPIRV_LOAD_FAIL = 27,
|
||||
ACL_SPIRV_SAVE_FAIL = 28,
|
||||
ACL_LAST_ERROR = 29
|
||||
} acl_error_0_8;
|
||||
|
||||
typedef enum _comp_device_caps_enum_0_8 {
|
||||
capError = 0,
|
||||
capFMA = 1,
|
||||
capImageSupport = 2,
|
||||
capSaveSOURCE = 3, // input source
|
||||
capSaveLLVMIR = 4, // output LLVMIR from frontend
|
||||
capSaveCG = 5, // output from LLVM-BE
|
||||
capSaveEXE = 6, // output executable
|
||||
capSaveAMDIL = 7, // Save per-kernel AMDIL
|
||||
capSaveHSAIL = 8, // Save per-kernel HSAIL
|
||||
capEncrypted = 9,
|
||||
capSaveDISASM = 10,
|
||||
capSaveAS = 11,
|
||||
capSaveSPIR = 12,
|
||||
capDumpLast = 13
|
||||
} compDeviceCaps_0_8;
|
||||
|
||||
typedef enum _comp_opt_settings_enum_0_8 {
|
||||
optO0 = 0, // No optimization setting.
|
||||
optO1 = 1,
|
||||
optO2 = 2,
|
||||
optO3 = 3,
|
||||
optO4 = 4,
|
||||
optOs = 5,
|
||||
optError = 6, // Invalid optimization set
|
||||
optLast = 7
|
||||
} compOptSettings_0_8;
|
||||
|
||||
#define FLAG_SHIFT_VALUE 5
|
||||
#define FLAG_MASK_VALUE ((1 << capDumpLast) - 1)
|
||||
#define FLAG_BITLOC(A) (1 << ((A) & FLAG_MASK_VALUE))
|
||||
#define FLAG_ARRAY_SIZE 4
|
||||
|
||||
//! An enumeration that defines the possible valid device types that
|
||||
// can be compiled for.
|
||||
typedef enum _acl_dev_type_enum_0_8 {
|
||||
aclError = 0, // aclDevType of 0 is an error.
|
||||
aclX86 = 1, // Targeting a 32bit X86 CPU device.
|
||||
aclAMDIL = 2, // Targeting an AMDIL GPU device.
|
||||
aclHSAIL = 3, // Targeting an HSAIL GPU device.
|
||||
aclX64 = 4, // Targeting a 64bit X86 CPU device.
|
||||
aclHSAIL64 = 5, // Targeting a 64bit HSAIL GPU device.
|
||||
aclAMDIL64 = 6, // Targeting a 64bit AMDIL GPU device
|
||||
aclLast = 7
|
||||
} aclDevType_0_8;
|
||||
|
||||
//! Enum that represents the versions of the compiler
|
||||
typedef enum _acl_cl_version_enum_0_8 {
|
||||
ACL_VERSION_ERROR = 0,
|
||||
ACL_VERSION_0_7 = 1,
|
||||
ACL_VERSION_0_8 = 2,
|
||||
ACL_VERSION_0_8_1 = 3,
|
||||
ACL_VERSION_0_9 = 4,
|
||||
ACL_VERSION_1_0 = 5,
|
||||
ACL_VERSION_LAST = 6
|
||||
} aclCLVersion_0_8;
|
||||
|
||||
//! Enum of the various aclTypes that are supported
|
||||
typedef enum _acl_type_enum_0_8 {
|
||||
ACL_TYPE_DEFAULT = 0,
|
||||
ACL_TYPE_OPENCL = 1,
|
||||
ACL_TYPE_LLVMIR_TEXT = 2,
|
||||
ACL_TYPE_LLVMIR_BINARY = 3,
|
||||
ACL_TYPE_SPIR_TEXT = 4,
|
||||
ACL_TYPE_SPIR_BINARY = 5,
|
||||
ACL_TYPE_AMDIL_TEXT = 6,
|
||||
ACL_TYPE_AMDIL_BINARY = 7,
|
||||
ACL_TYPE_HSAIL_TEXT = 8,
|
||||
ACL_TYPE_HSAIL_BINARY = 9,
|
||||
ACL_TYPE_X86_TEXT = 10,
|
||||
ACL_TYPE_X86_BINARY = 11,
|
||||
ACL_TYPE_CG = 12,
|
||||
ACL_TYPE_SOURCE = 13,
|
||||
ACL_TYPE_ISA = 14,
|
||||
ACL_TYPE_HEADER = 15,
|
||||
ACL_TYPE_RSLLVMIR_BINARY = 16,
|
||||
ACL_TYPE_SPIRV_BINARY = 17,
|
||||
ACL_TYPE_ASM_TEXT = 18,
|
||||
ACL_TYPE_LAST = 19
|
||||
} aclType_0_8;
|
||||
|
||||
//! Enum of the various loader types that are supported.
|
||||
typedef enum _acl_loader_type_enum_0_8 {
|
||||
ACL_LOADER_COMPLIB = 0,
|
||||
ACL_LOADER_FRONTEND = 1,
|
||||
ACL_LOADER_LINKER = 2,
|
||||
ACL_LOADER_OPTIMIZER = 3,
|
||||
ACL_LOADER_CODEGEN = 4,
|
||||
ACL_LOADER_BACKEND = 5,
|
||||
ACL_LOADER_SC = 6,
|
||||
ACL_LOADER_LAST = 7
|
||||
} aclLoaderType_0_8;
|
||||
|
||||
// Enumeration for the various acl versions
|
||||
typedef enum _bif_version_enum_0_8 {
|
||||
aclBIFVersionError = 0, // Error
|
||||
aclBIFVersion20 = 1, // Version 2.0 of the OpenCL BIF
|
||||
aclBIFVersion21 = 2, // Version 2.1 of the OpenCL BIF
|
||||
aclBIFVersion30 = 3, // Version 3.0 of the OpenCL BIF
|
||||
aclBIFVersion31 = 4, // Version 3.1 of the OpenCL BIF
|
||||
aclBIFVersionLatest = aclBIFVersion31, // Most recent version of the BIF
|
||||
aclBIFVersionCAL = 5,
|
||||
aclBIFVersionLast = 6
|
||||
} aclBIFVersion_0_8;
|
||||
|
||||
// Enumeration for the various platform types
|
||||
typedef enum _bif_platform_enum_0_8 {
|
||||
aclPlatformCAL = 0, // For BIF 2.0 backward compatibility
|
||||
aclPlatformCPU = 1, // For BIF 2.0 backward compatibility
|
||||
aclPlatformCompLib = 2,
|
||||
aclPlatformLast = 3
|
||||
} aclPlatform_0_8;
|
||||
|
||||
// Enumeration for the various bif sections
|
||||
typedef enum _bif_sections_enum_0_8 {
|
||||
aclLLVMIR = 0,
|
||||
aclSOURCE = 1,
|
||||
aclILTEXT = 2, // For BIF 2.0 backward compatibility
|
||||
aclASTEXT = 3, // For BIF 2.0 backward compatibility
|
||||
aclCAL = 4, // For BIF 2.0 backward compatibility
|
||||
aclDLL = 5, // For BIF 2.0 backward compatibility
|
||||
aclSTRTAB = 6,
|
||||
aclSYMTAB = 7,
|
||||
aclRODATA = 8,
|
||||
aclSHSTRTAB = 9,
|
||||
aclNOTES = 10,
|
||||
aclCOMMENT = 11,
|
||||
aclILDEBUG = 12, // For BIF 2.0 backward compatibility
|
||||
aclDEBUG_INFO = 13,
|
||||
aclDEBUG_ABBREV = 14,
|
||||
aclDEBUG_LINE = 15,
|
||||
aclDEBUG_PUBNAMES = 16,
|
||||
aclDEBUG_PUBTYPES = 17,
|
||||
aclDEBUG_LOC = 18,
|
||||
aclDEBUG_ARANGES = 19,
|
||||
aclDEBUG_RANGES = 20,
|
||||
aclDEBUG_MACINFO = 21,
|
||||
aclDEBUG_STR = 22,
|
||||
aclDEBUG_FRAME = 23,
|
||||
aclJITBINARY = 24, // For BIF 2.0 backward compatibility
|
||||
aclCODEGEN = 25,
|
||||
aclTEXT = 26,
|
||||
aclINTERNAL = 27,
|
||||
aclSPIR = 28,
|
||||
aclHEADER = 29,
|
||||
aclBRIG = 30,
|
||||
aclBRIGxxx1 = 31,
|
||||
aclBRIGxxx2 = 32,
|
||||
aclBRIGxxx3 = 33,
|
||||
aclHSADEBUG = 34,
|
||||
aclKSTATS = 35, // For storing kernel statistics
|
||||
aclSPIRV = 36,
|
||||
aclLAST = 37
|
||||
} aclSections_0_8;
|
||||
|
||||
//! An enumeration that defines what are valid queries for aclQueryInfo.
|
||||
typedef enum _rt_query_types_enum_0_8 {
|
||||
RT_ABI_VERSION = 0,
|
||||
RT_DEVICE_NAME = 1,
|
||||
RT_MEM_SIZES = 2,
|
||||
RT_GPU_FUNC_CAPS = 3,
|
||||
RT_GPU_FUNC_ID = 4,
|
||||
RT_GPU_DEFAULT_ID = 5,
|
||||
RT_WORK_GROUP_SIZE = 6,
|
||||
RT_WORK_REGION_SIZE = 7,
|
||||
RT_ARGUMENT_ARRAY = 8,
|
||||
RT_GPU_PRINTF_ARRAY = 9,
|
||||
RT_CPU_BARRIER_NAMES = 10,
|
||||
RT_DEVICE_ENQUEUE = 11,
|
||||
RT_KERNEL_INDEX = 12,
|
||||
RT_KERNEL_NAME = 13,
|
||||
RT_KERNEL_NAMES = 14,
|
||||
RT_CONTAINS_LLVMIR = 15,
|
||||
RT_CONTAINS_OPTIONS = 16,
|
||||
RT_CONTAINS_BRIG = 17,
|
||||
RT_CONTAINS_HSAIL = 18,
|
||||
RT_CONTAINS_ISA = 19,
|
||||
RT_CONTAINS_LOADER_MAP = 20,
|
||||
RT_CONTAINS_SPIR = 21,
|
||||
RT_NUM_KERNEL_HIDDEN_ARGS = 22,
|
||||
RT_CONTAINS_SPIRV = 23,
|
||||
RT_WAVES_PER_SIMD_HINT = 24,
|
||||
RT_WORK_GROUP_SIZE_HINT = 25,
|
||||
RT_VEC_TYPE_HINT = 26,
|
||||
RT_LAST_TYPE = 27
|
||||
} aclQueryType_0_8;
|
||||
|
||||
//! An enumeration for the various GPU capabilities
|
||||
typedef enum _rt_gpu_caps_enum_0_8 {
|
||||
RT_COMPILER_WRITE = 1 << 0,
|
||||
RT_DATA_SECTION = 1 << 1,
|
||||
RT_WGS = 1 << 2,
|
||||
RT_LIMIT_WGS = 1 << 3,
|
||||
RT_PACKED_REGS = 1 << 4,
|
||||
RT_64BIT_ABI = 1 << 5,
|
||||
RT_PRINTF = 1 << 6,
|
||||
RT_ARENA_UAV = 1 << 7,
|
||||
RT_LRP_MEM = 1 << 8, // Local/Region/Private Memory
|
||||
RT_INDEX_TEMPS = 1 << 9,
|
||||
RT_WRS = 1 << 10,
|
||||
RT_GWS = 1 << 11,
|
||||
RT_SWGWS = 1 << 12,
|
||||
RT_GPU_CAPS_MASK = 0xFFF
|
||||
} aclGPUCaps_0_8;
|
||||
|
||||
//! An enumeration for the various CPU capabilities.
|
||||
typedef enum _rt_cpu_caps_enum_0_8 {
|
||||
RT_KERNEL_BARRIER = 1 << 0,
|
||||
RT_PROGRAM_BARRIER = 1 << 1,
|
||||
RT_CPU_CAPS_MASK = 0x3
|
||||
} aclCPUCaps_0_8;
|
||||
|
||||
//! An enumeration that maps Resource type to index values
|
||||
typedef enum _rt_gpu_resource_enum_0_8 {
|
||||
RT_RES_UAV = 0, // UAV resources
|
||||
RT_RES_PRI = 1, // Private resources
|
||||
RT_RES_LDS = 2, // LDS resources
|
||||
RT_RES_GDS = 3, // GDS resources
|
||||
RT_RES_CON = 4, // Constant resources
|
||||
RT_RES_LAST = 5
|
||||
} aclGPUResource_0_8;
|
||||
|
||||
//! An enumeration that maps memory types to index values
|
||||
typedef enum _rt_gpu_mem_sizes_enum_0_8 {
|
||||
RT_MEM_HW_LOCAL = 0,
|
||||
RT_MEM_SW_LOCAL = 1,
|
||||
RT_MEM_HW_PRIVATE = 2,
|
||||
RT_MEM_SW_PRIVATE = 3,
|
||||
RT_MEM_HW_REGION = 4,
|
||||
RT_MEM_SW_REGION = 5,
|
||||
RT_MEM_LAST = 6
|
||||
} aclGPUMemSizes_0_8;
|
||||
|
||||
// Enumerations for the various argument types.
|
||||
typedef enum _acl_arg_type_enum_0_8 {
|
||||
ARG_TYPE_ERROR = 0,
|
||||
ARG_TYPE_SAMPLER = 1,
|
||||
ARG_TYPE_IMAGE = 2,
|
||||
ARG_TYPE_COUNTER = 3,
|
||||
ARG_TYPE_VALUE = 4,
|
||||
ARG_TYPE_POINTER = 5,
|
||||
ARG_TYPE_SEMAPHORE = 6,
|
||||
ARG_TYPE_QUEUE = 7, // enum for device enqueue
|
||||
ARG_TYPE_LAST = 8
|
||||
} aclArgType_0_8;
|
||||
|
||||
// Enumerations of the valid data types for pass by value and
|
||||
// pass by pointer kernel arguments.
|
||||
typedef enum _acl_data_type_enum_0_8 {
|
||||
DATATYPE_ERROR = 0,
|
||||
DATATYPE_i1 = 1,
|
||||
DATATYPE_i8 = 2,
|
||||
DATATYPE_i16 = 3,
|
||||
DATATYPE_i32 = 4,
|
||||
DATATYPE_i64 = 5,
|
||||
DATATYPE_u8 = 6,
|
||||
DATATYPE_u16 = 7,
|
||||
DATATYPE_u32 = 8,
|
||||
DATATYPE_u64 = 9,
|
||||
DATATYPE_f16 = 10,
|
||||
DATATYPE_f32 = 11,
|
||||
DATATYPE_f64 = 12,
|
||||
DATATYPE_f80 = 13,
|
||||
DATATYPE_f128 = 14,
|
||||
DATATYPE_struct = 15,
|
||||
DATATYPE_union = 16,
|
||||
DATATYPE_event = 17,
|
||||
DATATYPE_opaque = 18,
|
||||
DATATYPE_unknown = 19,
|
||||
DATATYPE_LAST = 20
|
||||
} aclArgDataType_0_8;
|
||||
|
||||
// Enumerations of the valid memory types for pass by pointer
|
||||
// kernel arguments
|
||||
typedef enum _acl_memory_type_enum_0_8 {
|
||||
PTR_MT_ERROR = 0, // Error
|
||||
PTR_MT_GLOBAL = 1, // global buffer
|
||||
PTR_MT_SCRATCH_EMU = 2, // SW emulated private memory
|
||||
PTR_MT_LDS_EMU = 3, // SW emulated local memory
|
||||
PTR_MT_UAV = 4, // uniformed access vector memory
|
||||
PTR_MT_CONSTANT_EMU = 5, // SW emulated constant memory
|
||||
PTR_MT_GDS_EMU = 6, // SW emulated region memory
|
||||
PTR_MT_LDS = 7, // HW local memory
|
||||
PTR_MT_SCRATCH = 8, // HW private memory
|
||||
PTR_MT_CONSTANT = 9, // HW constant memory
|
||||
PTR_MT_GDS = 10, // HW region memory
|
||||
PTR_MT_UAV_SCRATCH = 11, // SI and later HW private memory
|
||||
PTR_MT_UAV_CONSTANT = 12, // SI and later HW constant memory
|
||||
PTR_MT_LAST = 13
|
||||
} aclMemoryType_0_8;
|
||||
|
||||
// Enumeration that specifies the various access types for a pointer/image.
|
||||
typedef enum _acl_access_type_enum_0_8 {
|
||||
ACCESS_TYPE_ERROR = 0,
|
||||
ACCESS_TYPE_RO = 1,
|
||||
ACCESS_TYPE_WO = 2,
|
||||
ACCESS_TYPE_RW = 3,
|
||||
ACCESS_TYPE_LAST = 4
|
||||
} aclAccessType_0_8;
|
||||
|
||||
// Enumeration that specifies the binary types.
|
||||
typedef enum _acl_binary_image_type_enum_0_8 {
|
||||
BINARY_TYPE_ELF = 1,
|
||||
BINARY_TYPE_LLVM = 2,
|
||||
BINARY_TYPE_SPIRV = 4,
|
||||
} aclBinaryImageType_0_8;
|
||||
|
||||
#endif // _ACL_ENUMS_0_8_H_
|
||||
@@ -1,157 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_FUNCTORS_0_8_H_
|
||||
#define _ACL_FUNCTORS_0_8_H_
|
||||
|
||||
//! Callback for the log function function pointer that many
|
||||
// API calls take to have the calling application receive
|
||||
// information on what errors occur.
|
||||
typedef void (*aclLogFunction_0_8)(const char* msg, size_t size);
|
||||
|
||||
typedef bool (*aclJITSymbolCallback)(const char*, const void*, void*);
|
||||
typedef void* aclJITObjectImage;
|
||||
typedef const void* constAclJITObjectImage;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* InsertSec_0_8)(aclCompiler* cl, aclBinary* binary,
|
||||
const void* data, size_t data_size,
|
||||
aclSections id) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* InsertSym_0_8)(aclCompiler* cl, aclBinary* binary,
|
||||
const void* data, size_t data_size, aclSections id,
|
||||
const char* symbol) ACL_API_0_8;
|
||||
|
||||
typedef const void*(ACL_API_ENTRY* ExtractSec_0_8)(aclCompiler* cl, const aclBinary* binary,
|
||||
size_t* size, aclSections id,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef const void*(ACL_API_ENTRY* ExtractSym_0_8)(aclCompiler* cl, const aclBinary* binary,
|
||||
size_t* size, aclSections id, const char* symbol,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* RemoveSec_0_8)(aclCompiler* cl, aclBinary* binary,
|
||||
aclSections id) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* RemoveSym_0_8)(aclCompiler* cl, aclBinary* binary, aclSections id,
|
||||
const char* symbol) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* QueryInfo_0_8)(aclCompiler* cl, const aclBinary* binary,
|
||||
aclQueryType query, const char* kernel,
|
||||
void* data_ptr, size_t* ptr_size) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* AddDbgArg_0_8)(aclCompiler* cl, aclBinary* bin, const char* kernel,
|
||||
const char* name, bool byVal) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* RemoveDbgArg_0_8)(aclCompiler* cl, aclBinary* bin,
|
||||
const char* kernel,
|
||||
const char* name) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* Compile_0_8)(aclCompiler* cl, aclBinary* bin, const char* options,
|
||||
aclType from, aclType to,
|
||||
aclLogFunction_0_8 compile_callback) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* Link_0_8)(aclCompiler* cl, aclBinary* src_bin,
|
||||
unsigned int num_libs, aclBinary** libs,
|
||||
aclType link_mode, const char* options,
|
||||
aclLogFunction_0_8 link_callback) ACL_API_0_8;
|
||||
|
||||
typedef const char*(ACL_API_ENTRY* CompLog_0_8)(aclCompiler* cl)ACL_API_0_8;
|
||||
|
||||
typedef const void*(ACL_API_ENTRY* RetrieveType_0_8)(aclCompiler* cl, const aclBinary* bin,
|
||||
const char* name, size_t* data_size,
|
||||
aclType type,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* SetType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name,
|
||||
aclType type, const void* data,
|
||||
size_t size) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* ConvertType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name,
|
||||
aclType type) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* Disassemble_0_8)(aclCompiler* cl, aclBinary* bin,
|
||||
const char* kernel,
|
||||
aclLogFunction_0_8 disasm_callback) ACL_API_0_8;
|
||||
|
||||
typedef const void*(ACL_API_ENTRY* GetDevBinary_0_8)(aclCompiler* cl, const aclBinary* bin,
|
||||
const char* kernel, size_t* size,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef aclLoaderData*(ACL_API_ENTRY* LoaderInit_0_8)(aclCompiler* cl, aclBinary* bin,
|
||||
aclLogFunction_0_8 callback,
|
||||
acl_error* error);
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* LoaderFini_0_8)(aclLoaderData* data);
|
||||
|
||||
typedef aclModule*(ACL_API_ENTRY* FEToIR_0_8)(aclLoaderData* ald, const char* source,
|
||||
size_t data_size, aclContext* ctx,
|
||||
acl_error* error)ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* SourceToISA_0_8)(aclLoaderData* ald, const char* source,
|
||||
size_t data_size) ACL_API_0_8;
|
||||
|
||||
typedef aclModule*(ACL_API_ENTRY* IRPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx,
|
||||
acl_error* error)ACL_API_0_8;
|
||||
|
||||
typedef aclModule*(ACL_API_ENTRY* LinkPhase_0_8)(aclLoaderData* data, aclModule* ir,
|
||||
unsigned int num_libs, aclModule** libs,
|
||||
aclContext* ctx, acl_error* error)ACL_API_0_8;
|
||||
|
||||
typedef const void*(ACL_API_ENTRY* CGPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx,
|
||||
acl_error* error)ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* DisasmISA_0_8)(aclLoaderData* data, const char* kernel,
|
||||
const void* isa_code, size_t isa_size) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* SetupLoaderObject_0_8)(aclCompiler* cl) ACL_API_0_8;
|
||||
|
||||
typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCreate_0_8)(
|
||||
const void* buffer, size_t length, aclBinary* bin, acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCopy_0_8)(const void* buffer, size_t length,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* JITObjectImageDestroy_0_8)(aclJITObjectImage image) ACL_API_0_8;
|
||||
|
||||
typedef size_t(ACL_API_ENTRY* JITObjectImageSize_0_8)(aclJITObjectImage image,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
typedef const char*(ACL_API_ENTRY* JITObjectImageData_0_8)(aclJITObjectImage image,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* JITObjectImageFinalize_0_8)(aclJITObjectImage image) ACL_API_0_8;
|
||||
|
||||
typedef size_t(ACL_API_ENTRY* JITObjectImageGetGlobalsSize_0_8)(aclJITObjectImage image,
|
||||
acl_error* error_code) ACL_API_0_8;
|
||||
|
||||
typedef bool (*JITSymbolCallback_0_8)(const char*, const void*, void*);
|
||||
|
||||
typedef acl_error(ACL_API_ENTRY* JITObjectImageIterateSymbols_0_8)(
|
||||
aclJITObjectImage image, JITSymbolCallback_0_8 jit_callback, void* data) ACL_API_0_8;
|
||||
|
||||
typedef char*(ACL_API_ENTRY* JITObjectImageDisassembleKernel_0_8)(constAclJITObjectImage image,
|
||||
const char* kernel,
|
||||
acl_error* error_code)ACL_API_0_8;
|
||||
|
||||
typedef void* (*AllocFunc_0_8)(size_t size)ACL_API_0_8;
|
||||
|
||||
typedef void (*FreeFunc_0_8)(void* ptr) ACL_API_0_8;
|
||||
|
||||
#endif // _ACL_FUNCTORS_0_8_H_
|
||||
@@ -1,365 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_STRUCTS_0_8_H_
|
||||
#define _ACL_STRUCTS_0_8_H_
|
||||
#define ACL_STRUCT_HEADER size_t struct_size
|
||||
//! A structure that holds information on the various types of arguments
|
||||
// The format in memory of this structure is
|
||||
// -------------
|
||||
// | aclArgData |
|
||||
// -------------
|
||||
// |->argStr |
|
||||
// -------------
|
||||
// |->typeStr |
|
||||
// -------------
|
||||
typedef struct _acl_md_arg_type_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
size_t argNameSize;
|
||||
size_t typeStrSize;
|
||||
const char* argStr;
|
||||
const char* typeStr;
|
||||
union {
|
||||
struct { // Struct for sampler arguments
|
||||
unsigned ID;
|
||||
unsigned isKernelDefined;
|
||||
unsigned value;
|
||||
} sampler;
|
||||
struct { // Struct for image arguments
|
||||
unsigned resID;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
aclAccessType type;
|
||||
bool is2D;
|
||||
bool is1D;
|
||||
bool isArray;
|
||||
bool isBuffer;
|
||||
} image;
|
||||
struct { // struct for atomic counter arguments
|
||||
unsigned is32bit;
|
||||
unsigned resID;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
} counter;
|
||||
struct { // struct for semaphore arguments
|
||||
unsigned resID;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
} sema;
|
||||
struct { // struct for pass by value arguments
|
||||
unsigned numElements;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
aclArgDataType data;
|
||||
} value;
|
||||
struct { // struct for pass by pointer arguments
|
||||
unsigned numElements;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
unsigned bufNum;
|
||||
unsigned align;
|
||||
aclArgDataType data;
|
||||
aclMemoryType memory;
|
||||
aclAccessType type;
|
||||
bool isVolatile;
|
||||
bool isRestrict;
|
||||
bool isPipe;
|
||||
} pointer;
|
||||
struct { // Struct for queue arguments
|
||||
unsigned numElements;
|
||||
unsigned cbNum;
|
||||
unsigned cbOffset;
|
||||
aclArgDataType data;
|
||||
aclMemoryType memory;
|
||||
} queue;
|
||||
} arg;
|
||||
aclArgType type;
|
||||
bool isConst;
|
||||
} aclArgData_0_8;
|
||||
|
||||
//! A structure that holds information for printf
|
||||
// The format in memory of this structure is
|
||||
// --------------
|
||||
// | aclPrintfFmt|
|
||||
// --------------
|
||||
// |->argSizes |
|
||||
// --------------
|
||||
// |->fmrStr |
|
||||
// --------------
|
||||
|
||||
typedef struct _acl_md_printf_fmt_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
unsigned ID;
|
||||
size_t numSizes;
|
||||
size_t fmtStrSize;
|
||||
uint32_t* argSizes;
|
||||
const char* fmtStr;
|
||||
} aclPrintfFmt_0_8;
|
||||
|
||||
//! A structure that holds the metadata in the RODATA section.
|
||||
typedef struct _acl_metadata_0_8 {
|
||||
ACL_STRUCT_HEADER; // This holds the size of the structure itself for versioning.
|
||||
size_t data_size; // This holds the size of all the memory allocated for this structure.
|
||||
uint32_t major, minor, revision; // RT_ABI_VERSION
|
||||
uint32_t gpuCaps; // RT_GPU_FUNC_CAPS
|
||||
uint32_t funcID; // RT_GPU_FUNC_ID
|
||||
uint32_t gpuRes[5]; // RT_GPU_DEFAULT_ID
|
||||
size_t wgs[3]; // RT_WORK_GROUP_SIZE
|
||||
uint32_t wrs[3]; // RT_WORK_REGION_SIZE
|
||||
size_t kernelNameSize;
|
||||
size_t deviceNameSize;
|
||||
size_t mem[6]; // RT_MEM_SIZES
|
||||
size_t numArgs;
|
||||
size_t numPrintf;
|
||||
|
||||
aclArgData_0_8* args; // RT_ARGUMENT_ARRAY
|
||||
aclPrintfFmt_0_8* printf; // RT_GPU_PRINTF_ARRAY
|
||||
const char* kernelName; // RT_KERNEL_NAME
|
||||
const char* deviceName; // RT_DEVICE_NAME
|
||||
bool enqueue_kernel; // RT_DEVICE_ENQUEUE
|
||||
uint32_t kernel_index; // RT_KERNEL_INDEX
|
||||
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
|
||||
size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT
|
||||
size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT
|
||||
size_t vecTypeHintSize;
|
||||
const char* vth; // RT_VEC_TYPE_HINT
|
||||
} aclMetadata_0_8;
|
||||
|
||||
//! An structure that holds information on the capabilities of the bif device.
|
||||
typedef struct _acl_device_caps_rec_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
uint32_t flags[4];
|
||||
uint32_t encryptCode;
|
||||
} aclDevCaps_0_8;
|
||||
|
||||
//! Structure that holds information on the target that the source is
|
||||
// being compiled for.
|
||||
typedef struct _acl_target_info_rec_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
aclDevType arch_id; // An identifier for the architecture.
|
||||
uint32_t chip_id; // A identifier for the chip.
|
||||
} aclTargetInfo_0_8;
|
||||
|
||||
// Structure for the version 0.8 of the structure.
|
||||
typedef struct _acl_binary_opts_rec_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
uint32_t elfclass;
|
||||
uint32_t bitness;
|
||||
const char* temp_file;
|
||||
uint32_t kernelArgAlign;
|
||||
} aclBinaryOptions_0_8;
|
||||
|
||||
// Structure for the version 0.8.1 of the structure.
|
||||
// This versions addes in alloc/dealloc functions.
|
||||
typedef struct _acl_binary_opts_rec_0_8_1 {
|
||||
ACL_STRUCT_HEADER;
|
||||
uint32_t elfclass;
|
||||
uint32_t bitness;
|
||||
const char* temp_file;
|
||||
uint32_t kernelArgAlign;
|
||||
AllocFunc_0_8 alloc;
|
||||
FreeFunc_0_8 dealloc;
|
||||
} aclBinaryOptions_0_8_1;
|
||||
|
||||
//! Structure that holds the OpenCL binary information.
|
||||
typedef struct _acl_bif_rec_0_8 {
|
||||
ACL_STRUCT_HEADER;
|
||||
aclTargetInfo_0_8 target; // Information about the target device.
|
||||
aclBIF* bin; // Pointer to the acl.
|
||||
aclOptions* options; // Pointer to acl options.
|
||||
aclBinaryOptions_0_8 binOpts; // Pointer to the binary options.
|
||||
aclDevCaps_0_8 caps; // Capabilities of the BIF.
|
||||
} aclBinary_0_8;
|
||||
|
||||
//! Version of the aclBinary that uses the 0_8_1 version of the aclBinaryOptions.
|
||||
typedef struct _acl_bif_rec_0_8_1 {
|
||||
ACL_STRUCT_HEADER;
|
||||
aclTargetInfo_0_8 target; // Information about the target device.
|
||||
aclBIF* bin; // Pointer to the acl.
|
||||
aclOptions* options; // Pointer to acl options.
|
||||
aclBinaryOptions_0_8_1 binOpts; // Pointer to the binary options.
|
||||
aclDevCaps_0_8 caps; // Capabilities of the BIF.
|
||||
} aclBinary_0_8_1;
|
||||
|
||||
#define ACL_LOADER_COMMON \
|
||||
ACL_STRUCT_HEADER; \
|
||||
bool isBuiltin; \
|
||||
const char* libName; \
|
||||
void* handle; \
|
||||
LoaderInit init; \
|
||||
LoaderFini fini;
|
||||
|
||||
// Struct that maps to the common structure between all loaders.
|
||||
typedef struct _acl_common_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
} aclCommonLoader_0_8;
|
||||
|
||||
typedef struct _acl_cl_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
Compile compile;
|
||||
Link link;
|
||||
CompLog getLog;
|
||||
RetrieveType_0_8 retrieveType;
|
||||
SetType_0_8 setType;
|
||||
ConvertType_0_8 convertType;
|
||||
Disassemble disassemble;
|
||||
GetDevBinary_0_8 devBinary;
|
||||
InsertSec insSec;
|
||||
ExtractSec extSec;
|
||||
RemoveSec remSec;
|
||||
InsertSym insSym;
|
||||
ExtractSym extSym;
|
||||
RemoveSym remSym;
|
||||
QueryInfo getInfo;
|
||||
AddDbgArg addDbg;
|
||||
RemoveDbgArg removeDbg;
|
||||
SetupLoaderObject setupLoaderObject;
|
||||
JITObjectImageCreate jitOICreate;
|
||||
JITObjectImageCopy jitOICopy;
|
||||
JITObjectImageDestroy jitOIDestroy;
|
||||
JITObjectImageSize jitOISize;
|
||||
JITObjectImageData jitOIData;
|
||||
JITObjectImageFinalize jitOIFinalize;
|
||||
JITObjectImageGetGlobalsSize jitOIGlobalSize;
|
||||
JITObjectImageIterateSymbols jitOIIterateSymbols;
|
||||
JITObjectImageDisassembleKernel jitOIDisassembleKernel;
|
||||
} aclCLLoader_0_8;
|
||||
|
||||
//! Structure that holds the required functions
|
||||
// that sc exports for the SCDLL infrastructure.
|
||||
typedef struct _acl_sc_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
uint32_t /*SC_UINT32*/ sc_interface_version;
|
||||
void /**SC_EXPORT_FUNCTIONS**/* scef;
|
||||
// Any version specific fields go here.
|
||||
} aclSCLoader_0_8;
|
||||
|
||||
typedef struct _acl_fe_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
FEToIR toIR; // Used for Source to aclModule containing LLVMIR
|
||||
FEToIR toModule; // Used to convert raw SPIR/LLVM-IR to aclModule
|
||||
SourceToISA toISA; // Used for Source to ISA
|
||||
} aclFELoader_0_8;
|
||||
|
||||
typedef struct _acl_opt_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
IRPhase optimize; // Used for IR to IR transformation
|
||||
} aclOptLoader_0_8;
|
||||
|
||||
typedef struct _acl_link_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
LinkPhase link; // Used for Linking in IR modules
|
||||
IRPhase toLLVMIR; // Used for converting SPIR to LLVMIR
|
||||
IRPhase toSPIR; // Used for converting LLVMIR to SPIR
|
||||
} aclLinkLoader_0_8;
|
||||
|
||||
typedef struct _acl_cg_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
CGPhase codegen; // Used for converting from LLVMIR to target ASM.
|
||||
} aclCGLoader_0_8;
|
||||
|
||||
typedef struct _acl_be_loader_rec_0_8 {
|
||||
ACL_LOADER_COMMON;
|
||||
SourceToISA finalize; // Used for converting from target source to target ISA.
|
||||
SourceToISA assemble; // Used for converting from target text to target binary.
|
||||
DisasmISA disassemble; // Used for converting from target binary to target ISA.
|
||||
} aclBELoader_0_8;
|
||||
|
||||
typedef struct _acl_compiler_opts_rec_0_8 {
|
||||
ACL_STRUCT_HEADER; // Size of the structure for version checking.
|
||||
const char* clLib;
|
||||
const char* feLib;
|
||||
const char* optLib;
|
||||
const char* linkLib;
|
||||
const char* cgLib;
|
||||
const char* beLib;
|
||||
const char* scLib;
|
||||
} aclCompilerOptions_0_8;
|
||||
|
||||
typedef struct _acl_compiler_opts_rec_0_8_1 {
|
||||
ACL_STRUCT_HEADER; // Size of the structure for version checking.
|
||||
const char* clLib;
|
||||
const char* feLib;
|
||||
const char* optLib;
|
||||
const char* linkLib;
|
||||
const char* cgLib;
|
||||
const char* beLib;
|
||||
const char* scLib; // Name or path to the shader compiler shared library
|
||||
AllocFunc alloc;
|
||||
FreeFunc dealloc;
|
||||
} aclCompilerOptions_0_8_1;
|
||||
|
||||
//! Structure that holds the OpenCL compiler and various loaders.
|
||||
typedef struct _acl_compiler_rec_0_8 {
|
||||
ACL_STRUCT_HEADER; // Size of structure for version checking.
|
||||
aclCLLoader clAPI; // Pointer to the compiler API.
|
||||
aclFELoader feAPI; // Pointer to the FE Loader API.
|
||||
aclOptLoader optAPI; // Pointer to the Opt Loader API.
|
||||
aclLinkLoader linkAPI; // Pointer to the Link Loader API.
|
||||
aclCGLoader cgAPI; // Pointer to the CG Loader API.
|
||||
aclBELoader beAPI; // Pointer to the BE Loader API.
|
||||
aclSCLoader scAPI; // Pointer to the SC Loader API.
|
||||
aclCompilerOptions* opts; // The options structure for the compiler.
|
||||
void* llvm_shutdown; // Pointer to the llvm shutdown object.
|
||||
char* buildLog; // Pointer to the current build log.
|
||||
unsigned logSize; // Size of the current build log.
|
||||
aclLoaderData* apiData; // pointer to data store for the compiler API loader.
|
||||
} aclCompilerHandle_0_8;
|
||||
|
||||
//! Structure that holds the OpenCL compiler and various loaders.
|
||||
typedef struct _acl_compiler_rec_0_8_1 {
|
||||
ACL_STRUCT_HEADER;
|
||||
aclCLLoader clAPI; // Pointer to the compiler API.
|
||||
aclFELoader feAPI; // Pointer to the FE Loader API.
|
||||
aclOptLoader optAPI; // Pointer to the Opt Loader API.
|
||||
aclLinkLoader linkAPI; // Pointer to the Link Loader API.
|
||||
aclCGLoader cgAPI; // Pointer to the CG Loader API.
|
||||
aclBELoader beAPI; // Pointer to the BE Loader API.
|
||||
aclSCLoader scAPI; // Pointer to the SC Loader API.
|
||||
AllocFunc alloc;
|
||||
FreeFunc dealloc;
|
||||
aclCompilerOptions* opts; // The options structure for the compiler.
|
||||
void* llvm_shutdown; // Pointer to the llvm shutdown object.
|
||||
char* buildLog; // Pointer to the current build log.
|
||||
unsigned logSize; // Size of the current build log.
|
||||
aclLoaderData* apiData; // pointer to data store for the compiler API loader.
|
||||
} aclCompilerHandle_0_8_1;
|
||||
|
||||
//! Structure to hold kernel statistics obtained from kernel
|
||||
typedef struct _acl_kernel_stats_0_8_1 {
|
||||
unsigned int scratchRegs;
|
||||
unsigned int scratchSize;
|
||||
unsigned int availablevgprs;
|
||||
unsigned int availablesgprs;
|
||||
unsigned int usedvgprs;
|
||||
unsigned int usedsgprs;
|
||||
unsigned int availableldssize;
|
||||
unsigned int usedldssize;
|
||||
unsigned int availablestacksize;
|
||||
unsigned int usedstacksize;
|
||||
unsigned int wavefrontsize;
|
||||
unsigned int wavefrontpersimd;
|
||||
unsigned int threadsperworkgroup;
|
||||
unsigned int reqdworkgroup_x;
|
||||
unsigned int reqdworkgroup_y;
|
||||
unsigned int reqdworkgroup_z;
|
||||
} aclKernelStats;
|
||||
|
||||
#endif // _ACL_STRUCTS_0_8_H_
|
||||
@@ -1,117 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _ACL_API_TYPES_0_8_H_
|
||||
#define _ACL_API_TYPES_0_8_H_
|
||||
#include "aclDefs.h"
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Typedefs that always point to the most recent versions of the objects.
|
||||
typedef struct _acl_md_arg_type_0_8 aclArgData;
|
||||
typedef struct _acl_md_printf_fmt_0_8 aclPrintfFmt;
|
||||
typedef struct _acl_metadata_0_8 aclMetadata;
|
||||
typedef struct _acl_device_caps_rec_0_8 aclDevCaps;
|
||||
typedef struct _acl_target_info_rec_0_8 aclTargetInfo;
|
||||
typedef struct _acl_bif_rec_0_8_1 aclBinary;
|
||||
typedef struct _acl_binary_opts_rec_0_8_1 aclBinaryOptions;
|
||||
typedef struct _acl_compiler_rec_0_8_1 aclCompiler;
|
||||
typedef struct _acl_compiler_opts_rec_0_8_1 aclCompilerOptions;
|
||||
typedef struct _acl_options_0_8* aclOptions; // Opaque pointer to amd::Options
|
||||
typedef struct _acl_binary_0_8* aclBIF; // Opaque pointer to bifbase
|
||||
typedef struct _acl_common_loader_rec_0_8 aclCommonLoader;
|
||||
typedef struct _acl_cl_loader_rec_0_8 aclCLLoader;
|
||||
typedef struct _acl_sc_loader_rec_0_8 aclSCLoader;
|
||||
typedef struct _acl_fe_loader_rec_0_8 aclFELoader;
|
||||
typedef struct _acl_link_loader_rec_0_8 aclLinkLoader;
|
||||
typedef struct _acl_opt_loader_rec_0_8 aclOptLoader;
|
||||
typedef struct _acl_cg_loader_rec_0_8 aclCGLoader;
|
||||
typedef struct _acl_be_loader_rec_0_8 aclBELoader;
|
||||
typedef struct _acl_llvm_module_0_8* aclModule; // Opaque pointer to llvm::Module
|
||||
typedef struct _acl_llvm_context_0_8* aclContext; // Opaque pointer to llvm::Context
|
||||
typedef struct _acl_loader_data_0_8* aclLoaderData; // Opaque pointer to loader data
|
||||
|
||||
#include "aclEnums.h"
|
||||
// Typedefs for enumerations
|
||||
typedef enum _acl_error_enum_0_8 acl_error;
|
||||
typedef enum _comp_device_caps_enum_0_8 compDeviceCaps;
|
||||
typedef enum _comp_opt_settings_enum_0_8 compOptSettings;
|
||||
typedef enum _acl_dev_type_enum_0_8 aclDevType;
|
||||
typedef enum _acl_cl_version_enum_0_8 aclCLVersion;
|
||||
typedef enum _acl_type_enum_0_8 aclType;
|
||||
typedef enum _rt_query_types_enum_0_8 aclQueryType;
|
||||
typedef enum _rt_gpu_caps_enum_0_8 aclGPUCaps;
|
||||
typedef enum _rt_gpu_resource_enum_0_8 aclGPUResource;
|
||||
typedef enum _rt_gpu_mem_sizes_enum_0_8 aclGPUMemSizes;
|
||||
typedef enum _acl_arg_type_enum_0_8 aclArgType;
|
||||
typedef enum _acl_data_type_enum_0_8 aclArgDataType;
|
||||
typedef enum _acl_memory_type_enum_0_8 aclMemoryType;
|
||||
typedef enum _acl_access_type_enum_0_8 aclAccessType;
|
||||
typedef enum _bif_version_enum_0_8 aclBIFVersion;
|
||||
typedef enum _bif_platform_enum_0_8 aclPlatform;
|
||||
typedef enum _bif_sections_enum_0_8 aclSections;
|
||||
typedef enum _acl_loader_type_enum_0_8 aclLoaderType;
|
||||
typedef enum _acl_binary_image_type_enum_0_8 aclBinaryImageType;
|
||||
|
||||
#include "aclFunctors.h"
|
||||
// Typedefs for function pointers
|
||||
typedef aclLogFunction_0_8 aclLogFunction;
|
||||
typedef InsertSec_0_8 InsertSec;
|
||||
typedef RemoveSec_0_8 RemoveSec;
|
||||
typedef ExtractSec_0_8 ExtractSec;
|
||||
typedef InsertSym_0_8 InsertSym;
|
||||
typedef RemoveSym_0_8 RemoveSym;
|
||||
typedef ExtractSym_0_8 ExtractSym;
|
||||
typedef QueryInfo_0_8 QueryInfo;
|
||||
typedef Compile_0_8 Compile;
|
||||
typedef Link_0_8 Link;
|
||||
typedef AddDbgArg_0_8 AddDbgArg;
|
||||
typedef RemoveDbgArg_0_8 RemoveDbgArg;
|
||||
typedef SetupLoaderObject_0_8 SetupLoaderObject;
|
||||
typedef CompLog_0_8 CompLog;
|
||||
typedef RetrieveType_0_8 RetrieveType;
|
||||
typedef SetType_0_8 SetType;
|
||||
typedef ConvertType_0_8 ConvertType;
|
||||
typedef Disassemble_0_8 Disassemble;
|
||||
typedef GetDevBinary_0_8 GetDevBinary;
|
||||
typedef LoaderInit_0_8 LoaderInit;
|
||||
typedef LoaderFini_0_8 LoaderFini;
|
||||
typedef FEToIR_0_8 FEToIR;
|
||||
typedef SourceToISA_0_8 SourceToISA;
|
||||
typedef IRPhase_0_8 IRPhase;
|
||||
typedef LinkPhase_0_8 LinkPhase;
|
||||
typedef CGPhase_0_8 CGPhase;
|
||||
typedef DisasmISA_0_8 DisasmISA;
|
||||
typedef AllocFunc_0_8 AllocFunc;
|
||||
typedef FreeFunc_0_8 FreeFunc;
|
||||
typedef JITObjectImageCreate_0_8 JITObjectImageCreate;
|
||||
typedef JITObjectImageCopy_0_8 JITObjectImageCopy;
|
||||
typedef JITObjectImageDestroy_0_8 JITObjectImageDestroy;
|
||||
typedef JITObjectImageSize_0_8 JITObjectImageSize;
|
||||
typedef JITObjectImageData_0_8 JITObjectImageData;
|
||||
typedef JITObjectImageFinalize_0_8 JITObjectImageFinalize;
|
||||
typedef JITObjectImageGetGlobalsSize_0_8 JITObjectImageGetGlobalsSize;
|
||||
typedef JITSymbolCallback_0_8 JITSymbolCallback;
|
||||
typedef JITObjectImageIterateSymbols_0_8 JITObjectImageIterateSymbols;
|
||||
typedef JITObjectImageDisassembleKernel_0_8 JITObjectImageDisassembleKernel;
|
||||
|
||||
#include "aclStructs.h"
|
||||
|
||||
#endif // _CL_API_TYPES_0_8_H_
|
||||
@@ -1,29 +0,0 @@
|
||||
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _COMPLIB_SPIRV_UTILS_H
|
||||
#define _COMPLIB_SPIRV_UTILS_H
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
bool validateSPIRV(const void* image, size_t length);
|
||||
bool isSPIRVMagic(const void* image, size_t length);
|
||||
|
||||
#endif
|
||||
@@ -1,230 +0,0 @@
|
||||
/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _CL_UTILS_BIF_SECTION_LABELS_HPP_
|
||||
#define _CL_UTILS_BIF_SECTION_LABELS_HPP_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
namespace bif {
|
||||
const unsigned PRE = 0;
|
||||
const unsigned POST = 1;
|
||||
} // namespace bif
|
||||
|
||||
typedef enum {
|
||||
symOpenclCompilerOptions,
|
||||
symAMDILCompilerOptions,
|
||||
symHSACompilerOptions,
|
||||
symOpenclLinkerOptions,
|
||||
symOpenclMeta,
|
||||
symOpenclKernel,
|
||||
symOpenclStub,
|
||||
symOpenclGlobal,
|
||||
symISAMeta,
|
||||
symISABinary,
|
||||
symAMDILText,
|
||||
symAMDILBinary,
|
||||
symHSAILText,
|
||||
symBRIG,
|
||||
symAMDILFMeta,
|
||||
symISAText,
|
||||
symBRIGxxx1,
|
||||
symBRIGxxx2,
|
||||
symBRIGxxx3,
|
||||
symX86Barrier,
|
||||
symAMDILHeader,
|
||||
symDebugInfo,
|
||||
symDebugilText,
|
||||
symDebugilBinary,
|
||||
symAsmText,
|
||||
symDLL,
|
||||
symLast,
|
||||
symKernelStats,
|
||||
symBRIGLoaderMap
|
||||
} oclBIFSymbolID;
|
||||
|
||||
struct oclBIFSymbolStruct {
|
||||
oclBIFSymbolID id;
|
||||
// pre/post fix of the symbol string
|
||||
const char* str[2];
|
||||
// the BIF section that the symbol is stored for GPU/CPU
|
||||
aclSections sections[2];
|
||||
};
|
||||
|
||||
// TODO: analyze the changes since 30 and remove unused anymore symbols,
|
||||
// for example, symISAMeta, update convert functions, check backward compatibility.
|
||||
// These are the symbols that are defined by the BIF 3.1 spec
|
||||
static constexpr oclBIFSymbolStruct BIF31[28] = {
|
||||
// 0: BIF 3.0 compiler options, .comment section via library support.
|
||||
{symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}},
|
||||
// 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil.
|
||||
{symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
|
||||
// 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail.
|
||||
{symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
|
||||
// 3: BIF 3.0 linker options, .comment section via library support.
|
||||
{symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}},
|
||||
// 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU,
|
||||
// .rodata section via -fbin-exe for GPU
|
||||
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}},
|
||||
// 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg.
|
||||
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}},
|
||||
// 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg.
|
||||
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}},
|
||||
// 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe.
|
||||
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}},
|
||||
// 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe.
|
||||
{symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}},
|
||||
// 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe.
|
||||
{symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}},
|
||||
// 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil.
|
||||
{symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}},
|
||||
// 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil.
|
||||
{symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}},
|
||||
// 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail.
|
||||
{symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}},
|
||||
// 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail.
|
||||
{symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}},
|
||||
// 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil.
|
||||
{symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}},
|
||||
// 15: BIF 3.0 per kernel ISA text, .internal section via disassembly.
|
||||
{symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}},
|
||||
// 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig.
|
||||
{symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 17: Unused after changes in HSAIL PRM
|
||||
{symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig.
|
||||
{symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 19: BIF 3.0 per kernel barrier metadata, only valid for X86.
|
||||
{symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}},
|
||||
// 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x)
|
||||
{symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}},
|
||||
// 21: BIF 3.0 HSA BRIG or ISA debug info
|
||||
{symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}},
|
||||
// 22: BIF 3.0 debugil text, .internal section via -g
|
||||
{symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}},
|
||||
// 23: BIF 3.0 debugil binary, .internal section, can be converted from
|
||||
// __debugil_text
|
||||
{symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}},
|
||||
{symAsmText, {"", ""}, {aclLAST, aclCODEGEN}},
|
||||
{symDLL, {"", ""}, {aclLAST, aclTEXT}},
|
||||
// 26: BIF 3.0 HSAIL kernel statistics
|
||||
{symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}},
|
||||
// 27: BIF 3.0 BRIG loader map
|
||||
{symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}},
|
||||
}; // BIF31
|
||||
|
||||
// These are the symbols that are defined by the BIF 3.0 spec
|
||||
static constexpr oclBIFSymbolStruct BIF30[28] = {
|
||||
// 0: BIF 3.0 compiler options, .comment section via library support.
|
||||
{symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}},
|
||||
// 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil.
|
||||
{symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
|
||||
// 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail.
|
||||
{symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}},
|
||||
// 3: BIF 3.0 linker options, .comment section via library support.
|
||||
{symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}},
|
||||
// 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU,
|
||||
// .rodata section via -fbin-exe for GPU
|
||||
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}},
|
||||
// 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg.
|
||||
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}},
|
||||
// 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg.
|
||||
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}},
|
||||
// 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe.
|
||||
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}},
|
||||
// 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe.
|
||||
{symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}},
|
||||
// 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe.
|
||||
{symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}},
|
||||
// 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil.
|
||||
{symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}},
|
||||
// 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil.
|
||||
{symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}},
|
||||
// 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail.
|
||||
{symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}},
|
||||
// 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail.
|
||||
{symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}},
|
||||
// 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil.
|
||||
{symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}},
|
||||
// 15: BIF 3.0 per kernel ISA text, .internal section via disassembly.
|
||||
{symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}},
|
||||
// 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig.
|
||||
{symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 17: Unused after changes in HSAIL PRM
|
||||
{symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig.
|
||||
{symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}},
|
||||
// 19: BIF 3.0 per kernel barrier metadata, only valid for X86.
|
||||
{symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}},
|
||||
// 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x)
|
||||
{symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}},
|
||||
// 21: BIF 3.0 HSA BRIG or ISA debug info
|
||||
{symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}},
|
||||
// 22: BIF 3.0 debugil text, .internal section via -g
|
||||
{symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}},
|
||||
// 23: BIF 3.0 debugil binary, .internal section, can be converted from
|
||||
// __debugil_text
|
||||
{symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}},
|
||||
{symAsmText, {"", ""}, {aclLAST, aclCODEGEN}},
|
||||
{symDLL, {"", ""}, {aclLAST, aclTEXT}},
|
||||
// 26: BIF 3.0 HSAIL kernel statistics
|
||||
{symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}},
|
||||
// 27: BIF 3.0 BRIG loader map
|
||||
{symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}},
|
||||
}; // BIF30
|
||||
|
||||
|
||||
// These are the sections that are defined by the BIF 2.0 spec
|
||||
static constexpr oclBIFSymbolStruct BIF20[13] = {
|
||||
{symOpenclCompilerOptions, {"__OpenCL_compile_options", ""}, {aclCOMMENT, aclCOMMENT}},
|
||||
{symOpenclLinkerOptions, {"__OpenCL_linker_options", ""}, {aclCOMMENT, aclCOMMENT}},
|
||||
{symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclDLL}},
|
||||
{symISABinary, {"__OpenCL_", "_kernel"}, {aclCAL, aclLAST}},
|
||||
{symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclDLL}},
|
||||
{symAMDILHeader, {"__OpenCL_", "_header"}, {aclRODATA, aclLAST}},
|
||||
{symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclLAST}},
|
||||
{symAMDILText, {"__OpenCL_", "_amdil"}, {aclILTEXT, aclLAST}},
|
||||
{symAMDILFMeta, {"__OpenCL_", "_fmetadata"}, {aclRODATA, aclLAST}},
|
||||
{symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclDLL}},
|
||||
{symDebugilText, {"", ""}, {aclILDEBUG, aclLAST}},
|
||||
{symAsmText, {"", ""}, {aclLAST, aclASTEXT}},
|
||||
{symDLL, {"", ""}, {aclLAST, aclDLL}},
|
||||
}; // BIF20
|
||||
|
||||
|
||||
inline const oclBIFSymbolStruct* findBIFSymbolStruct(const oclBIFSymbolStruct* symbols,
|
||||
size_t nSymbols, oclBIFSymbolID id) {
|
||||
for (size_t i = 0; i < nSymbols; ++i) {
|
||||
if (id == symbols[i].id) {
|
||||
return &symbols[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline const oclBIFSymbolStruct* findBIF30SymStruct(oclBIFSymbolID id) {
|
||||
size_t nBIF30Symbol = sizeof(BIF30) / sizeof(oclBIFSymbolStruct);
|
||||
return findBIFSymbolStruct(BIF30, nBIF30Symbol, id);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // _CL_UTILS_BIF_SECTION_LABELS_HPP_
|
||||
@@ -1,383 +0,0 @@
|
||||
/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _CL_LIB_UTILS_0_8_H_
|
||||
#define _CL_LIB_UTILS_0_8_H_
|
||||
#include "acl.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iterator>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include "library.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/options.hpp"
|
||||
using namespace bif;
|
||||
|
||||
// Utility function to set a flag in option structure
|
||||
// of the aclDevCaps.
|
||||
void setFlag(aclDevCaps* elf, compDeviceCaps option);
|
||||
|
||||
// Utility function to flip a flag in option structure
|
||||
// of the aclDevCaps.
|
||||
void flipFlag(aclDevCaps* elf, compDeviceCaps option);
|
||||
|
||||
// Utility function to clear a flag in option structure
|
||||
// of the aclDevCaps.
|
||||
void clearFlag(aclDevCaps* elf, compDeviceCaps option);
|
||||
|
||||
// Utility function to check that a flag in option structure
|
||||
// of the aclDevCaps is set.
|
||||
bool checkFlag(aclDevCaps* elf, compDeviceCaps option);
|
||||
|
||||
// Utility function to initialize and elf device capabilities
|
||||
void initElfDeviceCaps(aclBinary* elf);
|
||||
|
||||
// Append the string to the aclCompiler log string.
|
||||
void appendLogToCL(aclCompiler* cl, const std::string& logStr);
|
||||
|
||||
const char* getDeviceName(const aclTargetInfo& target);
|
||||
|
||||
// Select the correct library from the target information.
|
||||
amd::LibrarySelector getLibraryType(const aclTargetInfo* target);
|
||||
|
||||
// get family_enum from the target information.
|
||||
unsigned getFamilyEnum(const aclTargetInfo* target);
|
||||
|
||||
// get chip_enum from the target information.
|
||||
unsigned getChipEnum(const aclTargetInfo* target);
|
||||
|
||||
// get isa type name (compute capability) from the target information.
|
||||
const std::string& getIsaTypeName(const aclTargetInfo* target);
|
||||
|
||||
// get isa type (compute capability) from the target information.
|
||||
int getIsaType(const aclTargetInfo* target);
|
||||
|
||||
// get Feature String for target.
|
||||
std::string getFeatureString(const aclTargetInfo& target, amd::option::Options* OptionsObj);
|
||||
|
||||
// Create a copy of an ELF and duplicate all sections/symbols
|
||||
aclBinary* createELFCopy(aclBinary* src);
|
||||
|
||||
// Create a BIF2.1 elf from a BIF 2.0 elf
|
||||
aclBinary* convertBIF20ToBIF21(aclBinary* src);
|
||||
|
||||
// Create a BIF3.0 elf from a BIF 2.0 elf
|
||||
aclBinary* convertBIF20ToBIF30(aclBinary* src);
|
||||
|
||||
// Create a BIF3.1 elf from a BIF 2.0 elf
|
||||
aclBinary* convertBIF20ToBIF31(aclBinary* src);
|
||||
|
||||
// Create a BIF2.0 elf from a BIF 2.1 elf
|
||||
aclBinary* convertBIF21ToBIF20(aclBinary* src);
|
||||
|
||||
// Create a BIF3.0 elf from a BIF 2.1 elf
|
||||
aclBinary* convertBIF21ToBIF30(aclBinary* src);
|
||||
|
||||
// Create a BIF3.1 elf from a BIF 2.1 elf
|
||||
aclBinary* convertBIF21ToBIF31(aclBinary* src);
|
||||
|
||||
// Create a BIF2.0 elf from a BIF 3.0 elf
|
||||
aclBinary* convertBIF30ToBIF20(aclBinary* src);
|
||||
|
||||
// Create a BIF2.1 elf from a BIF 3.0 elf
|
||||
aclBinary* convertBIF30ToBIF21(aclBinary* src);
|
||||
|
||||
// Create a BIF3.1 elf from a BIF 3.0 elf
|
||||
aclBinary* convertBIF30ToBIF31(aclBinary* src);
|
||||
|
||||
// Create a BIF2.0 elf from a BIF 3.1 elf
|
||||
aclBinary* convertBIF31ToBIF20(aclBinary* src);
|
||||
|
||||
// Create a BIF2.1 elf from a BIF 3.1 elf
|
||||
aclBinary* convertBIF31ToBIF21(aclBinary* src);
|
||||
|
||||
// Create a BIF3.0 elf from a BIF 3.1 elf
|
||||
aclBinary* convertBIF31ToBIF30(aclBinary* src);
|
||||
|
||||
// get a pointer to the aclBIF irrespective of the
|
||||
// binary version.
|
||||
aclBIF* aclutGetBIF(aclBinary*);
|
||||
|
||||
// Get a pointer to the aclOptions irrespective of
|
||||
// the binary version.
|
||||
aclOptions* aclutGetOptions(aclBinary*);
|
||||
|
||||
// Get a pointer to the aclBinaryOptions struct
|
||||
// irrespective of the binary version.
|
||||
aclBinaryOptions* aclutGetBinOpts(aclBinary*);
|
||||
|
||||
// Get a pointer to the target info struct
|
||||
// irrespective of the binary version.
|
||||
aclTargetInfo* aclutGetTargetInfo(aclBinary*);
|
||||
|
||||
// Get a pointer to the device caps
|
||||
// irrespective of the binary version.
|
||||
aclDevCaps* aclutGetCaps(aclBinary*);
|
||||
|
||||
// Copy two binary option structures irrespective
|
||||
// of the binary version and uses defaults when
|
||||
// things don't match up.
|
||||
void aclutCopyBinOpts(aclBinaryOptions* dst, const aclBinaryOptions* src, bool is64bit);
|
||||
|
||||
// Retrieve kernel statistics from binary
|
||||
// and insert to elf as symbol
|
||||
acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*);
|
||||
|
||||
// Returns target chip name.
|
||||
std::string aclutGetCodegenName(const aclTargetInfo& tgtInfo);
|
||||
|
||||
// Helper function that returns the
|
||||
// allocation function from the binary.
|
||||
AllocFunc aclutAlloc(const aclBinary* bin);
|
||||
|
||||
// Helper function that returns the
|
||||
// de-allocation function from the binary.
|
||||
FreeFunc aclutFree(const aclBinary* bin);
|
||||
|
||||
|
||||
// Helper function that returns the
|
||||
// allocation function from the compiler.
|
||||
AllocFunc aclutAlloc(const aclCompiler* bin);
|
||||
|
||||
// Helper function that returns the
|
||||
// de-allocation function from the compiler.
|
||||
FreeFunc aclutFree(const aclCompiler* bin);
|
||||
|
||||
// Helper function that returns the
|
||||
// allocation function from the compiler options.
|
||||
AllocFunc aclutAlloc(const aclCompilerOptions* bin);
|
||||
|
||||
// Helper function that returns the
|
||||
// de-allocation function from the compiler options.
|
||||
FreeFunc aclutFree(const aclCompilerOptions* bin);
|
||||
|
||||
inline std::vector<std::string> splitSpaceSeparatedString(char* str) {
|
||||
std::string s(str);
|
||||
std::stringstream ss(s);
|
||||
std::istream_iterator<std::string> beg(ss), end;
|
||||
std::vector<std::string> vec(beg, end);
|
||||
return vec;
|
||||
}
|
||||
|
||||
// Helper function that returns OpenCL mangled kernel name.
|
||||
inline std::string aclutOpenclMangledKernelName(const std::string& kernel_name) {
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclKernel);
|
||||
assert(sym && "symbol not found");
|
||||
return std::string("&") + sym->str[PRE] + kernel_name + sym->str[POST];
|
||||
}
|
||||
|
||||
// Helper function that returns OpenCL mangled kernel metadata symbol name.
|
||||
inline std::string aclutOpenclMangledKernelMetadataName(const std::string& kernel_name) {
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
|
||||
assert(sym && "symbol not found");
|
||||
return sym->str[PRE] + aclutOpenclMangledKernelName(kernel_name) + sym->str[POST];
|
||||
}
|
||||
|
||||
#ifdef WITH_TARGET_HSAIL
|
||||
// Helper function that updates metadata for all the kernels in binary;
|
||||
// the updated attribute is the number of hidden kernel arguments.
|
||||
inline acl_error aclutUpdateMetadataWithHiddenKernargsNum(aclCompiler* cl, aclBinary* bin,
|
||||
uint32_t num) {
|
||||
if (num == MAX_HIDDEN_KERNARGS_NUM) {
|
||||
return ACL_SUCCESS;
|
||||
}
|
||||
const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta);
|
||||
assert(sym && "symbol not found");
|
||||
aclSections secID = sym->sections[0];
|
||||
size_t kernelNamesSize = 0;
|
||||
acl_error error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize);
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
char* kernelNames = new char[kernelNamesSize];
|
||||
error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize);
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
delete[] kernelNames;
|
||||
return error_code;
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
||||
delete[] kernelNames;
|
||||
size_t roSize = 0;
|
||||
for (auto it = vKernels.begin(); it != vKernels.end(); ++it) {
|
||||
std::string symbol = aclutOpenclMangledKernelMetadataName(*it);
|
||||
void* roSec =
|
||||
const_cast<void*>(aclExtractSymbol(cl, bin, &roSize, secID, symbol.c_str(), &error_code));
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
if (!roSec || roSize == 0) {
|
||||
error_code = ACL_ELF_ERROR;
|
||||
return error_code;
|
||||
}
|
||||
aclMetadata* md = reinterpret_cast<aclMetadata*>(roSec);
|
||||
md->numHiddenKernelArgs = num;
|
||||
error_code = aclRemoveSymbol(cl, bin, secID, symbol.c_str());
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
error_code = aclInsertSymbol(cl, bin, md, roSize, secID, symbol.c_str());
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
return error_code;
|
||||
}
|
||||
}
|
||||
return error_code;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct _target_mappings_rec;
|
||||
typedef _target_mappings_rec TargetMapping;
|
||||
|
||||
// Returns the TargetMapping for the specific target device.
|
||||
const TargetMapping& getTargetMapping(const aclTargetInfo& target);
|
||||
|
||||
inline bool is64BitTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id == aclX64 || target.arch_id == aclAMDIL64 || target.arch_id == aclHSAIL64);
|
||||
}
|
||||
|
||||
inline bool isCpuTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id == aclX64 || target.arch_id == aclX86);
|
||||
}
|
||||
|
||||
inline bool isGpuTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64 ||
|
||||
target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64);
|
||||
}
|
||||
|
||||
inline bool isAMDILTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64);
|
||||
}
|
||||
|
||||
inline bool isHSAILTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64);
|
||||
}
|
||||
|
||||
const std::string& getLegacyLibName();
|
||||
|
||||
inline bool isValidTarget(const aclTargetInfo& target) {
|
||||
return (target.arch_id && target.chip_id);
|
||||
}
|
||||
|
||||
bool isChipSupported(const aclTargetInfo& target);
|
||||
|
||||
enum scId {
|
||||
SC_AMDIL = 0,
|
||||
SC_HSAIL = 0,
|
||||
SC_LAST,
|
||||
};
|
||||
|
||||
// Helper function that allocates an aligned memory.
|
||||
inline void* alignedMalloc(size_t size, size_t alignment) {
|
||||
#if defined(_WIN32)
|
||||
return ::_aligned_malloc(size, alignment);
|
||||
#else
|
||||
void* ptr = NULL;
|
||||
if (0 == ::posix_memalign(&ptr, alignment, size)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Helper function that frees an aligned memory.
|
||||
inline void alignedFree(void* ptr) {
|
||||
#if defined(_WIN32)
|
||||
::_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
inline void convertLongAbsFilePathIfNeeded(std::string& filename) {
|
||||
if (filename.empty()) {
|
||||
return;
|
||||
}
|
||||
std::wstring ws(filename.begin(), filename.end());
|
||||
wchar_t abs_path[_MAX_ENV];
|
||||
_wfullpath(abs_path, ws.c_str(), _MAX_ENV);
|
||||
std::wstring ws_abs = std::wstring(abs_path);
|
||||
if (ws_abs.size() >= _MAX_PATH) {
|
||||
std::string s(ws_abs.begin(), ws_abs.end());
|
||||
filename = "\\\\?\\" + s;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
inline char* readFile(std::string source_filename, size_t& size) {
|
||||
#if defined(_WIN32)
|
||||
convertLongAbsFilePathIfNeeded(source_filename);
|
||||
#endif
|
||||
FILE* fp = ::fopen(source_filename.c_str(), "rb");
|
||||
unsigned int length;
|
||||
size_t offset = 0;
|
||||
char* ptr;
|
||||
if (!fp) {
|
||||
return NULL;
|
||||
}
|
||||
// obtain file size
|
||||
::fseek(fp, 0, SEEK_END);
|
||||
length = ::ftell(fp);
|
||||
::rewind(fp);
|
||||
ptr = reinterpret_cast<char*>(::malloc(offset + length + 1));
|
||||
if (length != fread(&ptr[offset], 1, length, fp)) {
|
||||
::free(ptr);
|
||||
::fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
ptr[offset + length] = '\0';
|
||||
size = offset + length;
|
||||
::fclose(fp);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
inline bool writeFile(std::string source_filename, const char* source, size_t size) {
|
||||
#if defined(_WIN32)
|
||||
convertLongAbsFilePathIfNeeded(source_filename);
|
||||
#endif
|
||||
FILE* fp = ::fopen(source_filename.c_str(), "wb");
|
||||
if (!fp) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!::fwrite(source, size, 1, fp)) {
|
||||
::fclose(fp);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
::fclose(fp);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#if !defined(BCMAG)
|
||||
#define BCMAG "BC"
|
||||
#define SBCMAG 2
|
||||
#endif
|
||||
// Helper predicate returns true if p starts with bit code signature.
|
||||
// TODO: Move it into Compiler Lib back in new 1_0 API
|
||||
inline static bool isBcMagic(const char* p) {
|
||||
if (p == NULL || strncmp(p, BCMAG, SBCMAG) != 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void dump(aclBinary* bin);
|
||||
|
||||
#endif // _CL_LIB_UTILS_0_8_H_
|
||||
@@ -593,7 +593,7 @@ int getOptionDesc(std::string& options, size_t StartPos, bool IsShortForm, Optio
|
||||
}
|
||||
|
||||
bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, bool IsPrefixOption,
|
||||
bool IsOffFlag, bool IsLC) {
|
||||
bool IsOffFlag) {
|
||||
OptionVariables* ovars = Opts.oVariables;
|
||||
OptionDescriptor* od = &OptDescTable[OptDescTableIx];
|
||||
|
||||
@@ -733,9 +733,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
|
||||
|
||||
Opts.clcOptions.append(" -D__FAST_RELAXED_MATH__=1");
|
||||
Opts.clangOptions.push_back("-D__FAST_RELAXED_MATH__=1");
|
||||
if (IsLC) { // w/a for SWDEV-116690
|
||||
Opts.clangOptions.push_back("-cl-fast-relaxed-math");
|
||||
}
|
||||
Opts.clangOptions.push_back("-cl-fast-relaxed-math");
|
||||
|
||||
// fall-through to handle UnsafeMathOpt
|
||||
case OID_UnsafeMathOpt:
|
||||
@@ -861,10 +859,8 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
|
||||
break;
|
||||
|
||||
case OID_OptUseNative:
|
||||
if (IsLC) {
|
||||
Opts.llvmOptions.append(" -mllvm -amdgpu-use-native=");
|
||||
Opts.llvmOptions.append(sval);
|
||||
}
|
||||
Opts.llvmOptions.append(" -mllvm -amdgpu-use-native=");
|
||||
Opts.llvmOptions.append(sval);
|
||||
break;
|
||||
|
||||
case OID_WFComma:
|
||||
@@ -886,9 +882,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value,
|
||||
Opts.clangOptions.push_back(sval);
|
||||
} else if (((OptionIdentifier)OptDescTableIx) == OID_WBComma) {
|
||||
Opts.llvmOptions.append(" ");
|
||||
if (IsLC) {
|
||||
Opts.llvmOptions.append("-mllvm ");
|
||||
}
|
||||
Opts.llvmOptions.append("-mllvm ");
|
||||
Opts.llvmOptions.append(sval);
|
||||
} else if (((OptionIdentifier)OptDescTableIx) == OID_WHComma) {
|
||||
Opts.finalizerOptions.push_back(sval);
|
||||
@@ -953,7 +947,7 @@ namespace amd {
|
||||
|
||||
namespace option {
|
||||
|
||||
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC) {
|
||||
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly) {
|
||||
Opts.origOptionStr = options;
|
||||
OptionVariables* ovars = Opts.oVariables;
|
||||
OptionDescriptor* od = OptDescTable;
|
||||
@@ -1084,8 +1078,7 @@ bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, boo
|
||||
if (!(OPTION_info(od) & OA_RUNTIME)) continue;
|
||||
}
|
||||
|
||||
if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno),
|
||||
isLC)) {
|
||||
if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno))) {
|
||||
// Keep the optionsLog set in processOption().
|
||||
std::string tmpStr("Invalid option: ");
|
||||
tmpStr += options.substr(bpos, (pos == std::string::npos) ? pos : pos - bpos);
|
||||
|
||||
@@ -323,9 +323,9 @@ class Options {
|
||||
OptionDescriptor* getOptDescTable();
|
||||
bool init();
|
||||
bool teardown();
|
||||
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC);
|
||||
inline bool parseLinkOptions(std::string& options, Options& Opts, bool isLC) {
|
||||
return parseAllOptions(options, Opts, true /*linkOptsOnly*/, isLC);
|
||||
bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly);
|
||||
inline bool parseLinkOptions(std::string& options, Options& Opts) {
|
||||
return parseAllOptions(options, Opts, true /*linkOptsOnly*/);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "os/os.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "comgrctx.hpp"
|
||||
@@ -129,4 +128,3 @@ bool Comgr::LoadLib(bool is_versioned) {
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
#endif
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "top.hpp"
|
||||
#include "amd_comgr/amd_comgr.h"
|
||||
|
||||
@@ -446,4 +445,3 @@ class Comgr : public amd::AllStatic {
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
#endif
|
||||
|
||||
@@ -51,12 +51,6 @@ extern void PalDeviceUnload();
|
||||
#include "blowfish/oclcrypt.hpp"
|
||||
#endif
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/libUtils.h"
|
||||
#include "spirv/spirvUtils.h"
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
@@ -641,10 +635,6 @@ bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKe
|
||||
|
||||
// Build all kernels
|
||||
std::string opt = "-cl-internal-kernel ";
|
||||
if (!device->settings().useLightning_) {
|
||||
opt += "-Wf,--force_disable_spir ";
|
||||
}
|
||||
|
||||
if (!extraOptions.empty()) {
|
||||
opt += extraOptions;
|
||||
}
|
||||
@@ -786,29 +776,10 @@ Device::~Device() {
|
||||
}
|
||||
|
||||
bool Device::ValidateComgr() {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Check if Lightning compiler was requested
|
||||
if (settings_->useLightning_) {
|
||||
constexpr bool kComgrVersioned = false;
|
||||
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned);
|
||||
// Use Lightning only if it's available
|
||||
settings_->useLightning_ = amd::Comgr::IsReady();
|
||||
return settings_->useLightning_;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Device::ValidateHsail() {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// Check if HSAIL compiler was requested
|
||||
if (!settings_->useLightning_) {
|
||||
std::call_once(amd::Hsail::initialized, amd::Hsail::LoadLib);
|
||||
// Use Hsail only if it's available
|
||||
return amd::Hsail::IsReady();
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
constexpr bool kComgrVersioned = false;
|
||||
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned);
|
||||
return amd::Comgr::IsReady();
|
||||
}
|
||||
|
||||
size_t GetMaxStackSize(const std::string& procName) {
|
||||
@@ -1272,43 +1243,6 @@ bool ClBinary::setElfTarget() {
|
||||
return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM);
|
||||
}
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
std::string ClBinary::getBIFSymbol(unsigned int symbolID) const {
|
||||
size_t nSymbols = 0;
|
||||
// Due to PRE & POST defines in bif_section_labels.hpp conflict with
|
||||
// PRE & POST struct members in sp3-si-chip-registers.h
|
||||
// unable to include bif_section_labels.hpp in device.hpp
|
||||
//! @todo: resolve conflict by renaming defines,
|
||||
// then include bif_section_labels.hpp in device.hpp &
|
||||
// use oclBIFSymbolID instead of unsigned int as a parameter
|
||||
const oclBIFSymbolID symID = static_cast<oclBIFSymbolID>(symbolID);
|
||||
switch (format_) {
|
||||
case BIF_VERSION2: {
|
||||
nSymbols = sizeof(BIF20) / sizeof(oclBIFSymbolStruct);
|
||||
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
|
||||
assert(symb && "BIF20 symbol with symbolID not found");
|
||||
if (symb) {
|
||||
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BIF_VERSION3: {
|
||||
nSymbols = sizeof(BIF30) / sizeof(oclBIFSymbolStruct);
|
||||
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
|
||||
assert(symb && "BIF30 symbol with symbolID not found");
|
||||
if (symb) {
|
||||
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0 && "unexpected BIF type");
|
||||
return "";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
#endif
|
||||
|
||||
void ClBinary::init(amd::option::Options* optionsObj) {
|
||||
// option has higher priority than environment variable.
|
||||
if ((flags_ & BinarySourceMask) != BinaryRemoveSource) {
|
||||
@@ -1588,52 +1522,6 @@ bool ClBinary::loadLlvmBinary(std::string& llvmBinary,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ClBinary::loadCompileOptions(std::string& compileOptions) const {
|
||||
char* options = nullptr;
|
||||
size_t sz;
|
||||
compileOptions.clear();
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), &options,
|
||||
&sz)) {
|
||||
if (sz > 0) {
|
||||
compileOptions.append(options, sz);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ClBinary::loadLinkOptions(std::string& linkOptions) const {
|
||||
char* options = nullptr;
|
||||
size_t sz;
|
||||
linkOptions.clear();
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), &options,
|
||||
&sz)) {
|
||||
if (sz > 0) {
|
||||
linkOptions.append(options, sz);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
void ClBinary::storeCompileOptions(const std::string& compileOptions) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(),
|
||||
compileOptions.c_str(), compileOptions.length());
|
||||
#endif
|
||||
}
|
||||
|
||||
void ClBinary::storeLinkOptions(const std::string& linkOptions) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(),
|
||||
linkOptions.c_str(), linkOptions.length());
|
||||
#endif
|
||||
}
|
||||
|
||||
bool ClBinary::isSPIR() const {
|
||||
char* section = nullptr;
|
||||
size_t sz = 0;
|
||||
|
||||
@@ -34,9 +34,6 @@
|
||||
#include "devprogram.hpp"
|
||||
#include "devkernel.hpp"
|
||||
#include "amdocl/cl_profile_amd.h"
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "hsailctx.hpp"
|
||||
#endif
|
||||
#include "devsignal.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
@@ -692,8 +689,6 @@ class Settings : public amd::HeapObject {
|
||||
// that replaces generic OS allocation routines
|
||||
uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format
|
||||
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
|
||||
uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device
|
||||
uint useLightning_ : 1; //!< Enable LC path for this device
|
||||
uint enableWgpMode_ : 1; //!< Enable WGP mode for this device
|
||||
uint enableWave32Mode_ : 1; //!< Enable Wave32 mode for this device
|
||||
uint lcWavefrontSize64_ : 1; //!< Enable Wave64 mode for this device
|
||||
@@ -705,7 +700,7 @@ class Settings : public amd::HeapObject {
|
||||
uint gwsInitSupported_ : 1; //!< Check if GWS is supported on this machine.
|
||||
uint kernel_arg_opt_ : 1; //!< Enables kernel arg optimization for blit kernels
|
||||
uint kernel_arg_impl_ : 2; //!< Kernel argument implementation
|
||||
uint reserved_ : 12;
|
||||
uint reserved_ : 14;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
@@ -968,7 +963,7 @@ class Memory : public amd::HeapObject {
|
||||
HostMemoryRegistered = 0x00000010, //!< Host memory was registered
|
||||
MemoryCpuUncached = 0x00000020, //!< Memory is uncached on CPU access(slow read)
|
||||
AllowedPeerAccess = 0x00000040, //!< Memory can be accessed from peer
|
||||
PersistentMap = 0x00000080 //!< Map Peristent memory
|
||||
PersistentMap = 0x00000080 //!< Map Persistent memory
|
||||
};
|
||||
uint flags_; //!< Memory object flags
|
||||
|
||||
@@ -1086,22 +1081,6 @@ class ClBinary : public amd::HeapObject {
|
||||
amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format
|
||||
) const;
|
||||
|
||||
//! Loads compile options from OCL binary file
|
||||
bool loadCompileOptions(std::string& compileOptions //!< return the compile options loaded
|
||||
) const;
|
||||
|
||||
//! Loads link options from OCL binary file
|
||||
bool loadLinkOptions(std::string& linkOptions //!< return the link options loaded
|
||||
) const;
|
||||
|
||||
//! Store compile options into OCL binary file
|
||||
void storeCompileOptions(const std::string& compileOptions //!< the compile options to be stored
|
||||
);
|
||||
|
||||
//! Store link options into OCL binary file
|
||||
void storeLinkOptions(const std::string& linkOptions //!< the link options to be stored
|
||||
);
|
||||
|
||||
//! Check if the binary is recompilable
|
||||
bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform);
|
||||
|
||||
@@ -1165,12 +1144,6 @@ class ClBinary : public amd::HeapObject {
|
||||
//! Returns TRUE if binary file was allocated
|
||||
bool isBinaryAllocated() const { return (flags_ & BinaryAllocated) ? true : false; }
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
//! Returns BIF symbol name by symbolID,
|
||||
//! returns empty string if not found or if BIF version is unsupported
|
||||
std::string getBIFSymbol(unsigned int symbolID) const;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
const amd::Device& dev_; //!< Device object
|
||||
|
||||
@@ -1377,10 +1350,7 @@ class VirtualDevice : public amd::HeapObject {
|
||||
mutable std::atomic<uint64_t> queued_async_handlers_ = 0; //!< Outstanding HSA async handlers
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
extern bool getValueFromIsaMeta(const std::string& isa, const char* key, std::string& retValue);
|
||||
#endif
|
||||
|
||||
} // namespace amd::device
|
||||
|
||||
namespace amd {
|
||||
@@ -1615,9 +1585,6 @@ class Isa {
|
||||
*/
|
||||
class Device : public RuntimeObject {
|
||||
protected:
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
typedef aclCompiler Compiler;
|
||||
#endif
|
||||
|
||||
public:
|
||||
// The structures below for MGPU launch match the device library format
|
||||
@@ -1692,11 +1659,6 @@ class Device : public RuntimeObject {
|
||||
);
|
||||
};
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
virtual Compiler* compiler() const = 0;
|
||||
virtual Compiler* binCompiler() const { return compiler(); }
|
||||
#endif
|
||||
|
||||
Device();
|
||||
virtual ~Device();
|
||||
|
||||
@@ -2115,9 +2077,6 @@ class Device : public RuntimeObject {
|
||||
//! Checks if OCL runtime can use code object manager for compilation
|
||||
bool ValidateComgr();
|
||||
|
||||
//! Checks if OCL runtime can use hsail for compilation
|
||||
bool ValidateHsail();
|
||||
|
||||
bool IpcCreate(void* dev_ptr, size_t* mem_size, char* handle, size_t* mem_offset) const;
|
||||
|
||||
bool IpcAttach(const char* handle, size_t mem_size, size_t mem_offset, unsigned int flags,
|
||||
|
||||
@@ -25,20 +25,12 @@
|
||||
#include "devkernel.hpp"
|
||||
#include "utils/macros.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/libUtils.h"
|
||||
#endif
|
||||
#include "comgrctx.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "hsailctx.hpp"
|
||||
#endif
|
||||
|
||||
namespace amd::device {
|
||||
|
||||
// ================================================================================================
|
||||
@@ -51,8 +43,6 @@ static constexpr clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16},
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
|
||||
// ================================================================================================
|
||||
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
|
||||
std::string* str) {
|
||||
@@ -593,7 +583,6 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
|
||||
|
||||
return status;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
Kernel::Kernel(const amd::Device& dev, const std::string& name, const Program& prog)
|
||||
@@ -674,15 +663,6 @@ bool Kernel::createSignature(const parameters_t& params, uint32_t numParameters,
|
||||
// ================================================================================================
|
||||
Kernel::~Kernel() { delete signature_; }
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
std::string Kernel::openclMangledName(const std::string& name) {
|
||||
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
|
||||
assert(bifSym && "symbol not found");
|
||||
return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST];
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
amd::NDRange& lclWorkSize) const {
|
||||
@@ -772,300 +752,6 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) {
|
||||
if (argInfo->argStr[0] == '_' && argInfo->argStr[1] == '.') {
|
||||
*isHidden = true;
|
||||
if (strcmp(&argInfo->argStr[2], "global_offset_0") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetX;
|
||||
} else if (strcmp(&argInfo->argStr[2], "global_offset_1") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetY;
|
||||
} else if (strcmp(&argInfo->argStr[2], "global_offset_2") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetZ;
|
||||
} else if (strcmp(&argInfo->argStr[2], "printf_buffer") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenPrintfBuffer;
|
||||
} else if (strcmp(&argInfo->argStr[2], "hostcall_buffer") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenHostcallBuffer;
|
||||
} else if (strcmp(&argInfo->argStr[2], "vqueue_pointer") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenDefaultQueue;
|
||||
} else if (strcmp(&argInfo->argStr[2], "aqlwrap_pointer") == 0) {
|
||||
return amd::KernelParameterDescriptor::HiddenCompletionAction;
|
||||
}
|
||||
return amd::KernelParameterDescriptor::HiddenNone;
|
||||
}
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return amd::KernelParameterDescriptor::MemoryObject;
|
||||
case ARG_TYPE_QUEUE:
|
||||
return amd::KernelParameterDescriptor::QueueObject;
|
||||
case ARG_TYPE_VALUE:
|
||||
return (argInfo->arg.value.data == DATATYPE_struct)
|
||||
? amd::KernelParameterDescriptor::ReferenceObject
|
||||
: amd::KernelParameterDescriptor::ValueObject;
|
||||
case ARG_TYPE_IMAGE:
|
||||
return amd::KernelParameterDescriptor::ImageObject;
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return amd::KernelParameterDescriptor::SamplerObject;
|
||||
case ARG_TYPE_ERROR:
|
||||
default:
|
||||
return amd::KernelParameterDescriptor::HiddenNone;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) {
|
||||
uint sizeType;
|
||||
uint numElements;
|
||||
if (argInfo->type == ARG_TYPE_QUEUE) {
|
||||
return T_QUEUE;
|
||||
} else if (argInfo->type == ARG_TYPE_POINTER || argInfo->type == ARG_TYPE_IMAGE) {
|
||||
return T_POINTER;
|
||||
} else if (argInfo->type == ARG_TYPE_VALUE) {
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
sizeType = 0;
|
||||
numElements = size;
|
||||
break;
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_u16:
|
||||
sizeType = 1;
|
||||
numElements = size / 2;
|
||||
break;
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_u32:
|
||||
sizeType = 2;
|
||||
numElements = size / 4;
|
||||
break;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
sizeType = 3;
|
||||
numElements = size / 8;
|
||||
break;
|
||||
case DATATYPE_f16:
|
||||
sizeType = 4;
|
||||
numElements = size / 2;
|
||||
break;
|
||||
case DATATYPE_f32:
|
||||
sizeType = 4;
|
||||
numElements = size / 4;
|
||||
break;
|
||||
case DATATYPE_f64:
|
||||
sizeType = 5;
|
||||
numElements = size / 8;
|
||||
break;
|
||||
case DATATYPE_struct:
|
||||
case DATATYPE_opaque:
|
||||
case DATATYPE_ERROR:
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
|
||||
switch (numElements) {
|
||||
case 1:
|
||||
return ClkValueMapType[sizeType][0];
|
||||
case 2:
|
||||
return ClkValueMapType[sizeType][1];
|
||||
case 3:
|
||||
return ClkValueMapType[sizeType][2];
|
||||
case 4:
|
||||
return ClkValueMapType[sizeType][3];
|
||||
case 8:
|
||||
return ClkValueMapType[sizeType][4];
|
||||
case 16:
|
||||
return ClkValueMapType[sizeType][5];
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
} else if (argInfo->type == ARG_TYPE_SAMPLER) {
|
||||
return T_SAMPLER;
|
||||
} else {
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return sizeof(void*);
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
return 1;
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16:
|
||||
return 2;
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32:
|
||||
return 4;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64:
|
||||
return 8;
|
||||
case DATATYPE_struct:
|
||||
return 128;
|
||||
case DATATYPE_ERROR:
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
case ARG_TYPE_IMAGE:
|
||||
return sizeof(cl_mem);
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return sizeof(cl_sampler);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
return argInfo->arg.pointer.align;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
return (argInfo->arg.pointer.type == ACCESS_TYPE_RO) ? true : false;
|
||||
} else if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
return (argInfo->arg.image.type == ACCESS_TYPE_RO) ? true : false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
inline static int GetArgSizeOCL(const aclArgData* argInfo) {
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return sizeof(void*);
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
case DATATYPE_struct:
|
||||
return 1 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16:
|
||||
return 2 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32:
|
||||
return 4 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64:
|
||||
return 8 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_ERROR:
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
case ARG_TYPE_IMAGE:
|
||||
case ARG_TYPE_SAMPLER:
|
||||
case ARG_TYPE_QUEUE:
|
||||
return sizeof(void*);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) {
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
case PTR_MT_CONSTANT:
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
case PTR_MT_UAV:
|
||||
case PTR_MT_GLOBAL:
|
||||
case PTR_MT_SCRATCH_EMU:
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
case PTR_MT_LDS_EMU:
|
||||
case PTR_MT_LDS:
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
case PTR_MT_ERROR:
|
||||
default:
|
||||
LogError("Unsupported address type");
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
} else if ((argInfo->type == ARG_TYPE_IMAGE) || (argInfo->type == ARG_TYPE_QUEUE)) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
|
||||
// default for all other cases
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) {
|
||||
if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
switch (argInfo->arg.image.type) {
|
||||
case ACCESS_TYPE_RO:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case ACCESS_TYPE_WO:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
}
|
||||
}
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) {
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
if (argInfo->arg.pointer.isVolatile) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
if (argInfo->arg.pointer.isRestrict) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||||
}
|
||||
if (argInfo->arg.pointer.isPipe) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_PIPE;
|
||||
}
|
||||
if (argInfo->isConst) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
}
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Kernel::GetAttrCodePropMetadata() {
|
||||
amd_comgr_metadata_node_t kernelMetaNode;
|
||||
if (!prog().getKernelMetadata(name(), &kernelMetaNode)) {
|
||||
@@ -1313,89 +999,8 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
device::Kernel::parameters_t hiddenParams;
|
||||
amd::KernelParameterDescriptor desc;
|
||||
size_t offset = 0;
|
||||
size_t offsetStruct = argBufferSize;
|
||||
|
||||
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
|
||||
size_t size = GetArgSizeOCL(aclArg);
|
||||
size_t alignment = GetArgAlignmentOCL(aclArg);
|
||||
bool isHidden = false;
|
||||
desc.info_.oclObject_ = GetOclArgumentTypeOCL(aclArg, &isHidden);
|
||||
|
||||
// Allocate the hidden arguments, but abstraction layer will skip them
|
||||
if (isHidden) {
|
||||
offset = amd::alignUp(offset, alignment);
|
||||
desc.offset_ = offset;
|
||||
desc.size_ = size;
|
||||
offset += size;
|
||||
hiddenParams.push_back(desc);
|
||||
continue;
|
||||
}
|
||||
|
||||
desc.name_ = aclArg->argStr;
|
||||
desc.typeName_ = aclArg->typeStr;
|
||||
desc.type_ = GetOclTypeOCL(aclArg, size);
|
||||
|
||||
desc.addressQualifier_ = GetOclAddrQualOCL(aclArg);
|
||||
desc.accessQualifier_ = GetOclAccessQualOCL(aclArg);
|
||||
desc.typeQualifier_ = GetOclTypeQualOCL(aclArg);
|
||||
desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(aclArg);
|
||||
desc.size_ = size;
|
||||
|
||||
// Check if HSAIL expects data by reference and allocate it behind
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) {
|
||||
desc.offset_ = offsetStruct;
|
||||
// Align the offset reference
|
||||
offset = amd::alignUp(offset, sizeof(size_t));
|
||||
patchReferences_.insert({desc.offset_, offset});
|
||||
offsetStruct += size;
|
||||
// Adjust the offset of arguments
|
||||
offset += sizeof(size_t);
|
||||
} else {
|
||||
// These objects have forced data size to uint64_t
|
||||
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
|
||||
offset = amd::alignUp(offset, sizeof(uint64_t));
|
||||
desc.offset_ = offset;
|
||||
offset += sizeof(uint64_t);
|
||||
} else {
|
||||
offset = amd::alignUp(offset, alignment);
|
||||
desc.offset_ = offset;
|
||||
offset += size;
|
||||
}
|
||||
}
|
||||
// Update read only flag
|
||||
desc.info_.readOnly_ = GetReadOnlyOCL(aclArg);
|
||||
|
||||
params.push_back(desc);
|
||||
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
|
||||
flags_.imageEna_ = true;
|
||||
if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
|
||||
flags_.imageWriteEna_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Save the number of OCL arguments
|
||||
uint32_t numParams = params.size();
|
||||
// Append the hidden arguments to the OCL arguments
|
||||
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
size_t HIPPrintfInfoID = 0;
|
||||
for (auto str : printfInfoStrings) {
|
||||
@@ -1501,76 +1106,4 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
// ]
|
||||
}
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
uint index = 0, HIPIndex = 0;
|
||||
for (; aclPrintf->struct_size != 0; aclPrintf++) {
|
||||
if (amd::IS_HIP) {
|
||||
index = HIPIndex++;
|
||||
printf_.resize(HIPIndex);
|
||||
} else {
|
||||
index = aclPrintf->ID;
|
||||
if (printf_.size() <= index) {
|
||||
printf_.resize(index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
PrintfInfo& info = printf_[index];
|
||||
const std::string& pfmt = aclPrintf->fmtStr;
|
||||
bool need_nl = true;
|
||||
for (size_t pos = 0; pos < pfmt.size(); ++pos) {
|
||||
char symbol = pfmt[pos];
|
||||
need_nl = true;
|
||||
if (symbol == '\\') {
|
||||
switch (pfmt[pos + 1]) {
|
||||
case 'a':
|
||||
pos++;
|
||||
symbol = '\a';
|
||||
break;
|
||||
case 'b':
|
||||
pos++;
|
||||
symbol = '\b';
|
||||
break;
|
||||
case 'f':
|
||||
pos++;
|
||||
symbol = '\f';
|
||||
break;
|
||||
case 'n':
|
||||
pos++;
|
||||
symbol = '\n';
|
||||
need_nl = false;
|
||||
break;
|
||||
case 'r':
|
||||
pos++;
|
||||
symbol = '\r';
|
||||
break;
|
||||
case 'v':
|
||||
pos++;
|
||||
symbol = '\v';
|
||||
break;
|
||||
case '7':
|
||||
if (pfmt[pos + 2] == '2') {
|
||||
pos += 2;
|
||||
symbol = '\72';
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
info.fmtString_.push_back(symbol);
|
||||
}
|
||||
if (need_nl && !amd::IS_HIP) {
|
||||
info.fmtString_ += "\n";
|
||||
}
|
||||
uint32_t* tmp_ptr = const_cast<uint32_t*>(aclPrintf->argSizes);
|
||||
for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) {
|
||||
info.arguments_.push_back(*tmp_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
} // namespace amd::device
|
||||
|
||||
@@ -20,9 +20,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "aclTypes.h"
|
||||
#endif
|
||||
#include "platform/context.hpp"
|
||||
#include "platform/object.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
@@ -98,7 +95,6 @@ struct KernelParameterDescriptor {
|
||||
};
|
||||
} // namespace amd
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
//! Runtime handle structure for device enqueue
|
||||
struct RuntimeHandle {
|
||||
uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
|
||||
@@ -174,8 +170,6 @@ enum class KernelField : uint8_t {
|
||||
MaxSize = 18
|
||||
};
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
@@ -290,10 +284,6 @@ class Kernel : public amd::HeapObject {
|
||||
//! Return the build log
|
||||
const std::string& buildLog() const { return buildLog_; }
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static std::string openclMangledName(const std::string& name);
|
||||
#endif
|
||||
|
||||
const std::unordered_map<size_t, size_t>& patch() const { return patchReferences_; }
|
||||
|
||||
//! Returns TRUE if kernel uses dynamic parallelism
|
||||
@@ -356,7 +346,6 @@ class Kernel : public amd::HeapObject {
|
||||
|
||||
protected:
|
||||
//! Initializes the abstraction layer kernel parameters
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
|
||||
|
||||
//! Retrieve kernel attribute and code properties metadata
|
||||
@@ -372,13 +361,7 @@ class Kernel : public amd::HeapObject {
|
||||
const uint32_t codeObjectVer() const { return prog().codeObjectVer(); }
|
||||
//! Initializes HSAIL Printf metadata and info for LC
|
||||
void InitPrintf(const std::vector<std::string>& printfInfoStrings);
|
||||
#endif
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
void InitParameters(const aclArgData* aclArg, //!< List of ACL arguments
|
||||
uint32_t argBufferSize);
|
||||
//! Initializes HSAIL Printf metadata and info
|
||||
void InitPrintf(const aclPrintfFmt* aclPrintf);
|
||||
#endif
|
||||
|
||||
//! Returns program associated with this kernel
|
||||
const Program& prog() const { return prog_; }
|
||||
|
||||
@@ -426,7 +409,5 @@ class Kernel : public amd::HeapObject {
|
||||
KernelKind kind_{Normal}; //!< Kernel kind, is normal unless specified otherwise
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string* str);
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
} // namespace amd::device
|
||||
|
||||
文件差异内容过多而无法显示
加载差异
@@ -20,16 +20,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "aclTypes.h"
|
||||
#endif
|
||||
#include "platform/context.hpp"
|
||||
#include "platform/object.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "amd_comgr/amd_comgr.h"
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
@@ -120,16 +114,11 @@ class Program : public amd::HeapObject {
|
||||
|
||||
bool runInitFiniKernel(const std::vector<const Kernel*>& kernels) const;
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static amd::Monitor buildLock_; //!< Global build lock for HSAIL which isn't thread-safe
|
||||
#endif
|
||||
|
||||
protected:
|
||||
union {
|
||||
struct {
|
||||
uint32_t isNull_ : 1; //!< Null program no memory allocations
|
||||
uint32_t internal_ : 1; //!< Internal blit program
|
||||
uint32_t isLC_ : 1; //!< LC was used for the program compilation
|
||||
uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables
|
||||
uint32_t isHIP_ : 1; //!< Determine if the program is for HIP
|
||||
uint32_t coLoaded_ : 1; //!< Has the code objected been loaded
|
||||
@@ -143,30 +132,20 @@ class Program : public amd::HeapObject {
|
||||
amd::Elf::ElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format
|
||||
std::string compileOptions_; //!< compile/build options.
|
||||
std::string linkOptions_; //!< link options.
|
||||
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//! or clBuildProgram(), whichever is called last
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
|
||||
aclBinary* binaryElf_; //!< Binary for the new compiler library
|
||||
#endif
|
||||
|
||||
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//!< or clBuildProgram(), whichever is called last
|
||||
std::string lastBuildOptionsArg_;
|
||||
mutable std::string buildLog_; //!< build log.
|
||||
int32_t buildStatus_; //!< build status.
|
||||
int32_t buildError_; //!< build error
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
aclTargetInfo info_; //!< The info target for this binary.
|
||||
#endif
|
||||
size_t globalVariableTotalSize_;
|
||||
amd::option::Options* programOptions_;
|
||||
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata
|
||||
uint32_t codeObjectVer_; //!< version of code object
|
||||
std::map<std::string, amd_comgr_metadata_node_t> kernelMetadataMap_; //!< Map of kernel metadata
|
||||
#endif
|
||||
//! Sanitizer lock - lock when launching init/fini kernels
|
||||
static amd::Monitor initFiniLock_;
|
||||
|
||||
@@ -249,20 +228,12 @@ class Program : public amd::HeapObject {
|
||||
|
||||
size_t globalVariableTotalSize() const { return globalVariableTotalSize_; }
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
//! Returns the aclBinary associated with the program
|
||||
aclBinary* binaryElf() const { return static_cast<aclBinary*>(binaryElf_); }
|
||||
#endif
|
||||
|
||||
//! Returns TRUE if the program just compiled
|
||||
bool isNull() const { return isNull_; }
|
||||
|
||||
//! Returns TRUE if the program used internally by runtime
|
||||
bool isInternal() const { return internal_; }
|
||||
|
||||
//! Returns TRUE if Lightning compiler was used for this program
|
||||
bool isLC() const { return isLC_; }
|
||||
|
||||
//! Global variables are a part of the code segment
|
||||
bool hasGlobalStores() const { return hasGlobalStores_; }
|
||||
|
||||
@@ -272,7 +243,6 @@ class Program : public amd::HeapObject {
|
||||
//! Returns TRUE if the program is a trap handler for debugger support
|
||||
bool isTrapHandler() const { return trapHandler_; }
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_metadata_node_t metadata() const { return metadata_; }
|
||||
|
||||
//! Get the kernel metadata
|
||||
@@ -286,7 +256,6 @@ class Program : public amd::HeapObject {
|
||||
}
|
||||
|
||||
const uint32_t codeObjectVer() const { return codeObjectVer_; }
|
||||
#endif
|
||||
|
||||
//! Check if program is HIP based
|
||||
const bool isHIP() const { return (isHIP_ == 1); }
|
||||
@@ -324,18 +293,18 @@ class Program : public amd::HeapObject {
|
||||
*
|
||||
* \return True if we successefully compiled a GPU program
|
||||
*/
|
||||
virtual bool compileImpl(const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
bool compileImpl(const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
);
|
||||
|
||||
//! Link the device program.
|
||||
virtual bool linkImpl(amd::option::Options* options);
|
||||
bool linkImpl(amd::option::Options* options);
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options) = 0;
|
||||
|
||||
@@ -347,15 +316,9 @@ class Program : public amd::HeapObject {
|
||||
//! Initialize Binary
|
||||
virtual bool initClBinary();
|
||||
|
||||
virtual bool saveBinaryAndSetType(type_t type) = 0;
|
||||
|
||||
//! Release the Binary
|
||||
void releaseClBinary();
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
//! return target info
|
||||
virtual const aclTargetInfo& info() = 0;
|
||||
#endif
|
||||
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
return true;
|
||||
@@ -402,44 +365,13 @@ class Program : public amd::HeapObject {
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool getSymbolsFromCodeObj(std::vector<std::string>* var_names,
|
||||
amd_comgr_symbol_type_t sym_type) const;
|
||||
#endif
|
||||
bool getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size);
|
||||
bool defineUndefinedVars();
|
||||
|
||||
private:
|
||||
//! Compile the device program with LC path
|
||||
bool compileImplLC(const std::string& sourceCode, const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, amd::option::Options* options);
|
||||
|
||||
//! Compile the device program with HSAIL path
|
||||
bool compileImplHSAIL(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, amd::option::Options* options);
|
||||
|
||||
//! Link the device programs with LC path
|
||||
bool linkImplLC(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
//! Link the device programs with HSAIL path
|
||||
bool linkImplHSAIL(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
//! Link the device program with LC path
|
||||
bool linkImplLC(amd::option::Options* options);
|
||||
|
||||
//! Link the device program with HSAIL path
|
||||
bool linkImplHSAIL(amd::option::Options* options);
|
||||
|
||||
//! Load the device program with LC path
|
||||
bool loadLC();
|
||||
|
||||
//! Load the device program with HSAIL path
|
||||
bool loadHSAIL();
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
//! Dump the log data object to the build log, if a log data object is present
|
||||
void extractBuildLog(amd_comgr_data_set_t dataSet);
|
||||
//! Dump the code object data
|
||||
@@ -477,7 +409,6 @@ class Program : public amd::HeapObject {
|
||||
|
||||
//! Create the map for the kernel name and its metadata for fast access
|
||||
bool createKernelMetadataMap(void* binary, size_t binSize);
|
||||
#endif
|
||||
|
||||
bool trySubstObjFile(const char* SubstCfgFile, const std::string& sourceCode,
|
||||
const amd::option::Options* options);
|
||||
@@ -489,8 +420,6 @@ class Program : public amd::HeapObject {
|
||||
Program& operator=(const Program&);
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
|
||||
class ComgrBinaryData {
|
||||
public:
|
||||
ComgrBinaryData() : binaryData_({0}), created_(false) {}
|
||||
@@ -503,6 +432,4 @@ class ComgrBinaryData {
|
||||
bool created_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace amd::device
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "os/os.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "hsailctx.hpp"
|
||||
|
||||
namespace amd {
|
||||
std::once_flag Hsail::initialized;
|
||||
HsailEntryPoints Hsail::cep_;
|
||||
bool Hsail::is_ready_ = false;
|
||||
|
||||
bool Hsail::LoadLib() {
|
||||
#if defined(HSAIL_DYN_DLL)
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loading HSAIL library.");
|
||||
static constexpr const char* HsailLibName =
|
||||
LP64_SWITCH(WINDOWS_SWITCH("amdhsail32.dll", "libamdhsail32.so"),
|
||||
WINDOWS_SWITCH("amdhsail64.dll", "libamdhsail64.so"));
|
||||
cep_.handle = Os::loadLibrary(HsailLibName);
|
||||
if (nullptr == cep_.handle) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
GET_HSAIL_SYMBOL(aclCompilerInit)
|
||||
GET_HSAIL_SYMBOL(aclCompilerFini)
|
||||
GET_HSAIL_SYMBOL(aclCompilerVersion)
|
||||
GET_HSAIL_SYMBOL(aclVersionSize)
|
||||
GET_HSAIL_SYMBOL(aclGetErrorString)
|
||||
GET_HSAIL_SYMBOL(aclGetArchInfo)
|
||||
GET_HSAIL_SYMBOL(aclGetDeviceInfo)
|
||||
GET_HSAIL_SYMBOL(aclGetTargetInfo)
|
||||
GET_HSAIL_SYMBOL(aclGetTargetInfoFromChipID)
|
||||
GET_HSAIL_SYMBOL(aclGetArchitecture)
|
||||
GET_HSAIL_SYMBOL(aclGetChipOptions)
|
||||
GET_HSAIL_SYMBOL(aclGetFamily)
|
||||
GET_HSAIL_SYMBOL(aclGetChip)
|
||||
GET_HSAIL_SYMBOL(aclBinaryInit)
|
||||
GET_HSAIL_SYMBOL(aclBinaryFini)
|
||||
GET_HSAIL_SYMBOL(aclReadFromFile)
|
||||
GET_HSAIL_SYMBOL(aclReadFromMem)
|
||||
GET_HSAIL_SYMBOL(aclWriteToFile)
|
||||
GET_HSAIL_SYMBOL(aclWriteToMem)
|
||||
GET_HSAIL_SYMBOL(aclCreateFromBinary)
|
||||
GET_HSAIL_SYMBOL(aclBinaryVersion)
|
||||
GET_HSAIL_SYMBOL(aclInsertSection)
|
||||
GET_HSAIL_SYMBOL(aclInsertSymbol)
|
||||
GET_HSAIL_SYMBOL(aclExtractSection)
|
||||
GET_HSAIL_SYMBOL(aclExtractSymbol)
|
||||
GET_HSAIL_SYMBOL(aclRemoveSection)
|
||||
GET_HSAIL_SYMBOL(aclRemoveSymbol)
|
||||
GET_HSAIL_SYMBOL(aclQueryInfo)
|
||||
GET_HSAIL_SYMBOL(aclDbgAddArgument)
|
||||
GET_HSAIL_SYMBOL(aclDbgRemoveArgument)
|
||||
GET_HSAIL_SYMBOL(aclCompile)
|
||||
GET_HSAIL_SYMBOL(aclLink)
|
||||
GET_HSAIL_SYMBOL(aclGetCompilerLog)
|
||||
GET_HSAIL_SYMBOL(aclRetrieveType)
|
||||
GET_HSAIL_SYMBOL(aclSetType)
|
||||
GET_HSAIL_SYMBOL(aclConvertType)
|
||||
GET_HSAIL_SYMBOL(aclDisassemble)
|
||||
GET_HSAIL_SYMBOL(aclGetDeviceBinary)
|
||||
GET_HSAIL_SYMBOL(aclValidateBinaryImage)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageCreate)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageCopy)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageDestroy)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageFinalize)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageSize)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageData)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageGetGlobalsSize)
|
||||
GET_HSAIL_SYMBOL(aclJITObjectImageIterateSymbols)
|
||||
GET_HSAIL_SYMBOL(aclDumpBinary)
|
||||
GET_HSAIL_SYMBOL(aclGetKstatsSI)
|
||||
GET_HSAIL_SYMBOL(aclInsertKernelStatistics)
|
||||
GET_HSAIL_SYMBOL(aclFreeMem)
|
||||
is_ready_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
#endif
|
||||
@@ -1,394 +0,0 @@
|
||||
/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "top.hpp"
|
||||
#include "acl.h"
|
||||
|
||||
#ifndef ACL_API_ENTRY
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define ACL_API_ENTRY __stdcall
|
||||
#else
|
||||
#define ACL_API_ENTRY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace amd {
|
||||
typedef aclCompiler*(ACL_API_ENTRY* t_aclCompilerInit)(aclCompilerOptions* opts,
|
||||
acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclCompilerFini)(aclCompiler* cl);
|
||||
typedef aclCLVersion(ACL_API_ENTRY* t_aclCompilerVersion)(aclCompiler* cl, acl_error* error_code);
|
||||
typedef uint32_t(ACL_API_ENTRY* t_aclVersionSize)(aclCLVersion num, acl_error* error_code);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclGetErrorString)(acl_error error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclGetArchInfo)(const char** arch_names, size_t* arch_size);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclGetDeviceInfo)(const char* arch, const char** names,
|
||||
size_t* device_size);
|
||||
typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfo)(const char* arch, const char* device,
|
||||
acl_error* error_code);
|
||||
typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfoFromChipID)(const char* arch,
|
||||
const uint32_t chip_id,
|
||||
acl_error* error_code);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclGetArchitecture)(const aclTargetInfo& target);
|
||||
typedef const uint64_t(ACL_API_ENTRY* t_aclGetChipOptions)(const aclTargetInfo& target);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclGetFamily)(const aclTargetInfo& target);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclGetChip)(const aclTargetInfo& target);
|
||||
typedef aclBinary*(ACL_API_ENTRY* t_aclBinaryInit)(size_t struct_version,
|
||||
const aclTargetInfo* target,
|
||||
const aclBinaryOptions* options,
|
||||
acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclBinaryFini)(aclBinary* bin);
|
||||
typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromFile)(const char* str, acl_error* error_code);
|
||||
typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromMem)(const void* mem, size_t size,
|
||||
acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclWriteToFile)(aclBinary* bin, const char* str);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclWriteToMem)(aclBinary* bin, void** mem, size_t* size);
|
||||
typedef aclBinary*(ACL_API_ENTRY* t_aclCreateFromBinary)(const aclBinary* binary,
|
||||
aclBIFVersion version);
|
||||
typedef aclBIFVersion(ACL_API_ENTRY* t_aclBinaryVersion)(const aclBinary* binary);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclInsertSection)(aclCompiler* cl, aclBinary* binary,
|
||||
const void* data, size_t data_size,
|
||||
aclSections id);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclInsertSymbol)(aclCompiler* cl, aclBinary* binary,
|
||||
const void* data, size_t data_size,
|
||||
aclSections id, const char* symbol);
|
||||
typedef const void*(ACL_API_ENTRY* t_aclExtractSection)(aclCompiler* cl, const aclBinary* binary,
|
||||
size_t* size, aclSections id,
|
||||
acl_error* error_code);
|
||||
typedef const void*(ACL_API_ENTRY* t_aclExtractSymbol)(aclCompiler* cl, const aclBinary* binary,
|
||||
size_t* size, aclSections id,
|
||||
const char* symbol, acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclRemoveSection)(aclCompiler* cl, aclBinary* binary,
|
||||
aclSections id);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclRemoveSymbol)(aclCompiler* cl, aclBinary* binary,
|
||||
aclSections id, const char* symbol);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclQueryInfo)(aclCompiler* cl, const aclBinary* binary,
|
||||
aclQueryType query, const char* kernel,
|
||||
void* data_ptr, size_t* ptr_size);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclDbgAddArgument)(aclCompiler* cl, aclBinary* binary,
|
||||
const char* kernel, const char* name,
|
||||
bool byVal);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclDbgRemoveArgument)(aclCompiler* cl, aclBinary* binary,
|
||||
const char* kernel, const char* name);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclCompile)(aclCompiler* cl, aclBinary* bin, const char* options,
|
||||
aclType from, aclType to,
|
||||
aclLogFunction compile_callback);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclLink)(aclCompiler* cl, aclBinary* src_bin,
|
||||
unsigned int num_libs, aclBinary** libs,
|
||||
aclType link_mode, const char* options,
|
||||
aclLogFunction link_callback);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclGetCompilerLog)(aclCompiler* cl);
|
||||
typedef const void*(ACL_API_ENTRY* t_aclRetrieveType)(aclCompiler* cl, const aclBinary* bin,
|
||||
const char* name, size_t* data_size,
|
||||
aclType type, acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclSetType)(aclCompiler* cl, aclBinary* bin, const char* name,
|
||||
aclType type, const void* data, size_t size);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclConvertType)(aclCompiler* cl, aclBinary* bin,
|
||||
const char* name, aclType type);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclDisassemble)(aclCompiler* cl, aclBinary* bin,
|
||||
const char* kernel,
|
||||
aclLogFunction disasm_callback);
|
||||
typedef const void*(ACL_API_ENTRY* t_aclGetDeviceBinary)(aclCompiler* cl, const aclBinary* bin,
|
||||
const char* kernel, size_t* size,
|
||||
acl_error* error_code);
|
||||
typedef bool(ACL_API_ENTRY* t_aclValidateBinaryImage)(const void* binary, size_t length,
|
||||
unsigned type);
|
||||
typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCreate)(aclCompiler* cl,
|
||||
const void* buffer,
|
||||
size_t length, aclBinary* bin,
|
||||
acl_error* error_code);
|
||||
typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCopy)(aclCompiler* cl,
|
||||
const void* buffer, size_t length,
|
||||
acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageDestroy)(aclCompiler* cl,
|
||||
aclJITObjectImage buffer);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageFinalize)(aclCompiler* cl,
|
||||
aclJITObjectImage image);
|
||||
typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageSize)(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
typedef const char*(ACL_API_ENTRY* t_aclJITObjectImageData)(aclCompiler* cl,
|
||||
aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageGetGlobalsSize)(aclCompiler* cl,
|
||||
aclJITObjectImage image,
|
||||
acl_error* error_code);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageIterateSymbols)(aclCompiler* cl,
|
||||
aclJITObjectImage image,
|
||||
aclJITSymbolCallback callback,
|
||||
void* data);
|
||||
typedef void(ACL_API_ENTRY* t_aclDumpBinary)(const aclBinary* bin);
|
||||
typedef void(ACL_API_ENTRY* t_aclGetKstatsSI)(const void* shader, aclKernelStats& kstats);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclInsertKernelStatistics)(aclCompiler* cl, aclBinary* bin);
|
||||
typedef acl_error(ACL_API_ENTRY* t_aclFreeMem)(aclBinary* bin, void* mem);
|
||||
|
||||
struct HsailEntryPoints {
|
||||
void* handle;
|
||||
t_aclCompilerInit aclCompilerInit;
|
||||
t_aclCompilerFini aclCompilerFini;
|
||||
t_aclCompilerVersion aclCompilerVersion;
|
||||
t_aclVersionSize aclVersionSize;
|
||||
t_aclGetErrorString aclGetErrorString;
|
||||
t_aclGetArchInfo aclGetArchInfo;
|
||||
t_aclGetDeviceInfo aclGetDeviceInfo;
|
||||
t_aclGetTargetInfo aclGetTargetInfo;
|
||||
t_aclGetTargetInfoFromChipID aclGetTargetInfoFromChipID;
|
||||
t_aclGetArchitecture aclGetArchitecture;
|
||||
t_aclGetChipOptions aclGetChipOptions;
|
||||
t_aclGetFamily aclGetFamily;
|
||||
t_aclGetChip aclGetChip;
|
||||
t_aclBinaryInit aclBinaryInit;
|
||||
t_aclBinaryFini aclBinaryFini;
|
||||
t_aclReadFromFile aclReadFromFile;
|
||||
t_aclReadFromMem aclReadFromMem;
|
||||
t_aclWriteToFile aclWriteToFile;
|
||||
t_aclWriteToMem aclWriteToMem;
|
||||
t_aclCreateFromBinary aclCreateFromBinary;
|
||||
t_aclBinaryVersion aclBinaryVersion;
|
||||
t_aclInsertSection aclInsertSection;
|
||||
t_aclInsertSymbol aclInsertSymbol;
|
||||
t_aclExtractSection aclExtractSection;
|
||||
t_aclExtractSymbol aclExtractSymbol;
|
||||
t_aclRemoveSection aclRemoveSection;
|
||||
t_aclRemoveSymbol aclRemoveSymbol;
|
||||
t_aclQueryInfo aclQueryInfo;
|
||||
t_aclDbgAddArgument aclDbgAddArgument;
|
||||
t_aclDbgRemoveArgument aclDbgRemoveArgument;
|
||||
t_aclCompile aclCompile;
|
||||
t_aclLink aclLink;
|
||||
t_aclGetCompilerLog aclGetCompilerLog;
|
||||
t_aclRetrieveType aclRetrieveType;
|
||||
t_aclSetType aclSetType;
|
||||
t_aclConvertType aclConvertType;
|
||||
t_aclDisassemble aclDisassemble;
|
||||
t_aclGetDeviceBinary aclGetDeviceBinary;
|
||||
t_aclValidateBinaryImage aclValidateBinaryImage;
|
||||
t_aclJITObjectImageCreate aclJITObjectImageCreate;
|
||||
t_aclJITObjectImageCopy aclJITObjectImageCopy;
|
||||
t_aclJITObjectImageDestroy aclJITObjectImageDestroy;
|
||||
t_aclJITObjectImageFinalize aclJITObjectImageFinalize;
|
||||
t_aclJITObjectImageSize aclJITObjectImageSize;
|
||||
t_aclJITObjectImageData aclJITObjectImageData;
|
||||
t_aclJITObjectImageGetGlobalsSize aclJITObjectImageGetGlobalsSize;
|
||||
t_aclJITObjectImageIterateSymbols aclJITObjectImageIterateSymbols;
|
||||
t_aclDumpBinary aclDumpBinary;
|
||||
t_aclGetKstatsSI aclGetKstatsSI;
|
||||
t_aclInsertKernelStatistics aclInsertKernelStatistics;
|
||||
t_aclFreeMem aclFreeMem;
|
||||
};
|
||||
|
||||
#ifdef HSAIL_DYN_DLL
|
||||
#define HSAIL_DYN(NAME) cep_.NAME
|
||||
#define GET_HSAIL_SYMBOL(NAME) \
|
||||
cep_.NAME = reinterpret_cast<t_##NAME>(Os::getSymbol(cep_.handle, #NAME)); \
|
||||
if (nullptr == cep_.NAME) { \
|
||||
return false; \
|
||||
}
|
||||
#else
|
||||
#define HSAIL_DYN(NAME) NAME
|
||||
#define GET_HSAIL_SYMBOL(NAME)
|
||||
#endif
|
||||
|
||||
class Hsail : public amd::AllStatic {
|
||||
public:
|
||||
static std::once_flag initialized;
|
||||
|
||||
static bool LoadLib();
|
||||
|
||||
static bool IsReady() { return is_ready_; }
|
||||
|
||||
static aclCompiler* CompilerInit(aclCompilerOptions* opts, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclCompilerInit)(opts, error_code);
|
||||
}
|
||||
static acl_error CompilerFini(aclCompiler* cl) { return HSAIL_DYN(aclCompilerFini)(cl); }
|
||||
static aclCLVersion CompilerVersion(aclCompiler* cl, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclCompilerVersion)(cl, error_code);
|
||||
}
|
||||
static uint32_t VersionSize(aclCLVersion num, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclVersionSize)(num, error_code);
|
||||
}
|
||||
static const char* GetErrorString(acl_error error_code) {
|
||||
return HSAIL_DYN(aclGetErrorString)(error_code);
|
||||
}
|
||||
static acl_error GetArchInfo(const char** arch_names, size_t* arch_size) {
|
||||
return HSAIL_DYN(aclGetArchInfo)(arch_names, arch_size);
|
||||
}
|
||||
static acl_error GetDeviceInfo(const char* arch, const char** names, size_t* device_size) {
|
||||
return HSAIL_DYN(aclGetDeviceInfo)(arch, names, device_size);
|
||||
}
|
||||
static aclTargetInfo GetTargetInfo(const char* arch, const char* device, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclGetTargetInfo)(arch, device, error_code);
|
||||
}
|
||||
static aclTargetInfo GetTargetInfoFromChipID(const char* arch, const uint32_t chip_id,
|
||||
acl_error* error_code) {
|
||||
return HSAIL_DYN(aclGetTargetInfoFromChipID)(arch, chip_id, error_code);
|
||||
}
|
||||
static const char* GetArchitecture(const aclTargetInfo& target) {
|
||||
return HSAIL_DYN(aclGetArchitecture)(target);
|
||||
}
|
||||
static uint64_t GetChipOptions(const aclTargetInfo& target) {
|
||||
return HSAIL_DYN(aclGetChipOptions)(target);
|
||||
}
|
||||
static const char* GetFamily(const aclTargetInfo& target) {
|
||||
return HSAIL_DYN(aclGetFamily)(target);
|
||||
}
|
||||
static const char* GetChip(const aclTargetInfo& target) { return HSAIL_DYN(aclGetChip)(target); }
|
||||
static aclBinary* BinaryInit(size_t struct_version, const aclTargetInfo* target,
|
||||
const aclBinaryOptions* options, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclBinaryInit)(struct_version, target, options, error_code);
|
||||
}
|
||||
static acl_error BinaryFini(aclBinary* bin) { return HSAIL_DYN(aclBinaryFini)(bin); }
|
||||
static aclBinary* ReadFromFile(const char* str, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclReadFromFile)(str, error_code);
|
||||
}
|
||||
static aclBinary* ReadFromMem(const void* mem, size_t size, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclReadFromMem)(mem, size, error_code);
|
||||
}
|
||||
static acl_error WriteToFile(aclBinary* bin, const char* str) {
|
||||
return HSAIL_DYN(aclWriteToFile)(bin, str);
|
||||
}
|
||||
static acl_error WriteToMem(aclBinary* bin, void** mem, size_t* size) {
|
||||
return HSAIL_DYN(aclWriteToMem)(bin, mem, size);
|
||||
}
|
||||
static aclBinary* CreateFromBinary(const aclBinary* binary, aclBIFVersion version) {
|
||||
return HSAIL_DYN(aclCreateFromBinary)(binary, version);
|
||||
}
|
||||
static aclBIFVersion BinaryVersion(const aclBinary* binary) {
|
||||
return HSAIL_DYN(aclBinaryVersion)(binary);
|
||||
}
|
||||
static acl_error InsertSection(aclCompiler* cl, aclBinary* binary, const void* data,
|
||||
size_t data_size, aclSections id) {
|
||||
return HSAIL_DYN(aclInsertSection)(cl, binary, data, data_size, id);
|
||||
}
|
||||
static const acl_error InsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data,
|
||||
size_t data_size, aclSections id, const char* symbol) {
|
||||
return HSAIL_DYN(aclInsertSymbol)(cl, binary, data, data_size, id, symbol);
|
||||
}
|
||||
static const void* ExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size,
|
||||
aclSections id, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclExtractSection)(cl, binary, size, id, error_code);
|
||||
}
|
||||
static const void* ExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size,
|
||||
aclSections id, const char* symbol, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclExtractSymbol)(cl, binary, size, id, symbol, error_code);
|
||||
}
|
||||
static acl_error RemoveSection(aclCompiler* cl, aclBinary* binary, aclSections id) {
|
||||
return HSAIL_DYN(aclRemoveSection)(cl, binary, id);
|
||||
}
|
||||
static acl_error RemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id,
|
||||
const char* symbol) {
|
||||
return HSAIL_DYN(aclRemoveSymbol)(cl, binary, id, symbol);
|
||||
}
|
||||
static acl_error QueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query,
|
||||
const char* kernel, void* data_ptr, size_t* ptr_size) {
|
||||
return HSAIL_DYN(aclQueryInfo)(cl, binary, query, kernel, data_ptr, ptr_size);
|
||||
}
|
||||
static acl_error DbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
|
||||
const char* name, bool byVal) {
|
||||
return HSAIL_DYN(aclDbgAddArgument)(cl, binary, kernel, name, byVal);
|
||||
}
|
||||
static acl_error DbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel,
|
||||
const char* name) {
|
||||
return HSAIL_DYN(aclDbgRemoveArgument)(cl, binary, kernel, name);
|
||||
}
|
||||
static acl_error Compile(aclCompiler* cl, aclBinary* bin, const char* options, aclType from,
|
||||
aclType to, aclLogFunction compile_callback) {
|
||||
return HSAIL_DYN(aclCompile)(cl, bin, options, from, to, compile_callback);
|
||||
}
|
||||
static acl_error Link(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs,
|
||||
aclBinary** libs, aclType link_mode, const char* options,
|
||||
aclLogFunction link_callback) {
|
||||
return HSAIL_DYN(aclLink)(cl, src_bin, num_libs, libs, link_mode, options, link_callback);
|
||||
}
|
||||
static const char* GetCompilerLog(aclCompiler* cl) { return HSAIL_DYN(aclGetCompilerLog)(cl); }
|
||||
static const void* RetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name,
|
||||
size_t* data_size, aclType type, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclRetrieveType)(cl, bin, name, data_size, type, error_code);
|
||||
}
|
||||
static acl_error SetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type,
|
||||
const void* data, size_t size) {
|
||||
return HSAIL_DYN(aclSetType)(cl, bin, name, type, data, size);
|
||||
}
|
||||
static acl_error ConvertType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type) {
|
||||
return HSAIL_DYN(aclConvertType)(cl, bin, name, type);
|
||||
}
|
||||
static acl_error Disassemble(aclCompiler* cl, aclBinary* bin, const char* kernel,
|
||||
aclLogFunction disasm_callback) {
|
||||
return HSAIL_DYN(aclDisassemble)(cl, bin, kernel, disasm_callback);
|
||||
}
|
||||
static const void* GetDeviceBinary(aclCompiler* cl, const aclBinary* bin, const char* kernel,
|
||||
size_t* size, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclGetDeviceBinary)(cl, bin, kernel, size, error_code);
|
||||
}
|
||||
static const bool ValidateBinaryImage(const void* binary, size_t length, unsigned type) {
|
||||
#if defined(HSAIL_DYN_DLL)
|
||||
if (cep_.aclValidateBinaryImage == nullptr) {
|
||||
return false;
|
||||
}
|
||||
#endif // defined(HSAIL_DYN_DLL)
|
||||
return HSAIL_DYN(aclValidateBinaryImage)(binary, length, type);
|
||||
}
|
||||
static aclJITObjectImage JITObjectImageCreate(aclCompiler* cl, const void* buffer, size_t length,
|
||||
aclBinary* bin, acl_error* error_code) {
|
||||
return HSAIL_DYN(aclJITObjectImageCreate)(cl, buffer, length, bin, error_code);
|
||||
}
|
||||
static aclJITObjectImage JITObjectImageCopy(aclCompiler* cl, const void* buffer, size_t length,
|
||||
acl_error* error_code) {
|
||||
return HSAIL_DYN(aclJITObjectImageCopy)(cl, buffer, length, error_code);
|
||||
}
|
||||
static acl_error JITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer) {
|
||||
return HSAIL_DYN(aclJITObjectImageDestroy)(cl, buffer);
|
||||
}
|
||||
static acl_error JITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image) {
|
||||
return HSAIL_DYN(aclJITObjectImageFinalize)(cl, image);
|
||||
}
|
||||
static size_t JITObjectImageSize(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code) {
|
||||
return HSAIL_DYN(aclJITObjectImageSize)(cl, image, error_code);
|
||||
}
|
||||
static const char* JITObjectImageData(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code) {
|
||||
return HSAIL_DYN(aclJITObjectImageData)(cl, image, error_code);
|
||||
}
|
||||
static size_t JITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image,
|
||||
acl_error* error_code) {
|
||||
return HSAIL_DYN(aclJITObjectImageGetGlobalsSize)(cl, image, error_code);
|
||||
}
|
||||
static acl_error JITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image,
|
||||
aclJITSymbolCallback callback, void* data) {
|
||||
return HSAIL_DYN(aclJITObjectImageIterateSymbols)(cl, image, callback, data);
|
||||
}
|
||||
static void DumpBinary(const aclBinary* bin) { HSAIL_DYN(aclDumpBinary)(bin); }
|
||||
static void GetKstatsSI(const void* shader, aclKernelStats& kstats) {
|
||||
return HSAIL_DYN(aclGetKstatsSI)(shader, kstats);
|
||||
}
|
||||
static acl_error InsertKernelStatistics(aclCompiler* cl, aclBinary* bin) {
|
||||
return HSAIL_DYN(aclInsertKernelStatistics)(cl, bin);
|
||||
}
|
||||
static acl_error FreeMem(aclBinary* bin, void* mem) { return HSAIL_DYN(aclFreeMem)(bin, mem); }
|
||||
|
||||
private:
|
||||
static HsailEntryPoints cep_;
|
||||
static bool is_ready_;
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
#endif
|
||||
@@ -49,7 +49,7 @@ extern void __amd_scheduler_pal(__global void*, __global void*, uint);
|
||||
* The trap handler source is copied from the above URL, with the following
|
||||
* modifications:
|
||||
* - Add the following directive to declare the trap_entry symbol (this is
|
||||
* later used by LightningProgram::GetTrapHandlerAddress to locate the load
|
||||
* later used by pal::Program::GetTrapHandlerAddress to locate the load
|
||||
* address of the trap handler):
|
||||
*
|
||||
* .globl trap_entry
|
||||
|
||||
@@ -27,7 +27,7 @@ namespace amd::pal {
|
||||
|
||||
class Device;
|
||||
class VirtualGPU;
|
||||
class HSAILKernel;
|
||||
class Kernel;
|
||||
|
||||
// ================================================================================================
|
||||
// RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
|
||||
@@ -256,7 +256,7 @@ class ICaptureMgr {
|
||||
public:
|
||||
virtual bool Update(Pal::IPlatform* platform) = 0;
|
||||
|
||||
virtual void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
|
||||
virtual void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
|
||||
size_t z) = 0;
|
||||
virtual void PostDispatch(VirtualGPU* gpu) = 0;
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#include "palPlatform.h"
|
||||
#include "palDevice.h"
|
||||
#include "palQueueSemaphore.h"
|
||||
#include "hsailctx.hpp"
|
||||
|
||||
#include "vdi_common.hpp"
|
||||
|
||||
@@ -167,10 +166,6 @@ namespace amd::pal {
|
||||
Util::GenericAllocator NullDevice::allocator_;
|
||||
char* Device::platformObj_;
|
||||
Pal::IPlatform* Device::platform_;
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
NullDevice::Compiler* NullDevice::compiler_;
|
||||
#endif
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
Pal::IDevice* gDeviceList[Pal::MaxDevices] = {};
|
||||
@@ -261,25 +256,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve
|
||||
LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
if (!settings().useLightning_) {
|
||||
if ((isa.hsailName() != nullptr)) {
|
||||
palName_ = isa.hsailName();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ValidateComgr()) {
|
||||
LogPrintfError("Code object manager initialization failed for offline PAL device %s",
|
||||
isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateHsail()) {
|
||||
LogPrintfError("HSAIL initialization failed for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::Device::create(isa)) {
|
||||
LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId());
|
||||
return false;
|
||||
@@ -293,37 +275,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve
|
||||
|
||||
info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64;
|
||||
|
||||
if (!settings().useLightning_) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
const char* library = getenv("HSA_COMPILER_LIBRARY");
|
||||
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
|
||||
library,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr};
|
||||
// Initialize the compiler handle
|
||||
acl_error error;
|
||||
compiler_ = amd::Hsail::CompilerInit(&opts, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) {
|
||||
device::Program* program;
|
||||
if (settings().useLightning_) {
|
||||
program = new LightningProgram(*this, owner);
|
||||
} else {
|
||||
program = new HSAILProgram(*this, owner);
|
||||
}
|
||||
program = new pal::Program(*this, owner);
|
||||
|
||||
if (program == nullptr) {
|
||||
LogError("Memory allocation has failed!");
|
||||
@@ -471,10 +428,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
info_.globalMemSize_ = std::min(4 * info_.maxMemAllocSize_, info_.globalMemSize_);
|
||||
|
||||
// Use 64 bit pointers
|
||||
if (settings().use64BitPtr_) {
|
||||
info_.addressBits_ = 64;
|
||||
} else {
|
||||
info_.addressBits_ = (settings().useLightning_) ? 64 : 32;
|
||||
info_.addressBits_ = 64;
|
||||
if (!settings().use64BitPtr_) {
|
||||
// Limit total size with 3GB for 32 bit
|
||||
info_.globalMemSize_ = std::min(info_.globalMemSize_, uint64_t(3 * Gi));
|
||||
}
|
||||
@@ -531,11 +486,10 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
|
||||
info_.platform_ = AMD_PLATFORM;
|
||||
|
||||
::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_,
|
||||
sizeof(info_.name_));
|
||||
::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_));
|
||||
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s",
|
||||
settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]");
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1,
|
||||
AMD_BUILD_STRING " (PAL,LC)%s", isOnline() ? "" : " [Offline]");
|
||||
|
||||
info_.profile_ = "FULL_PROFILE";
|
||||
info_.spirVersions_ = "";
|
||||
@@ -1037,11 +991,6 @@ bool Device::create(Pal::IDevice* device) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateHsail()) {
|
||||
LogError("Hsail initialization failed!");
|
||||
return false;
|
||||
}
|
||||
|
||||
computeEnginesId_.resize(std::min(numComputeEngines(), settings().numComputeRings_));
|
||||
|
||||
amd::Context::Info info = {0};
|
||||
@@ -1086,27 +1035,6 @@ bool Device::create(Pal::IDevice* device) {
|
||||
allocedMem[i] = 0;
|
||||
}
|
||||
|
||||
if (!settings().useLightning_) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
const char* library = getenv("HSA_COMPILER_LIBRARY");
|
||||
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
|
||||
library,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr};
|
||||
// Initialize the compiler handle
|
||||
acl_error error;
|
||||
compiler_ = amd::Hsail::CompilerInit(&opts, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error initializing the compiler");
|
||||
return false;
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
}
|
||||
|
||||
// Allocate SRD manager
|
||||
srdManager_ = new SrdManager(*this, std::max(HsaImageObjectSize, HsaSamplerObjectSize), 64 * Ki);
|
||||
if (srdManager_ == nullptr) {
|
||||
@@ -1117,7 +1045,7 @@ bool Device::create(Pal::IDevice* device) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
// Master function that handles developer callbacks from PAL.
|
||||
// Primary function that handles developer callbacks from PAL.
|
||||
void PAL_STDCALL Device::PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
|
||||
Pal::Developer::CallbackType type, void* pCbData) {
|
||||
#ifdef PAL_GPUOPEN_OCL
|
||||
@@ -1248,7 +1176,7 @@ bool Device::initializeHeapResources() {
|
||||
// Setup trap handler if available
|
||||
if (trap_handler_ != nullptr) {
|
||||
auto program =
|
||||
reinterpret_cast<pal::LightningProgram*>(trap_handler_->getDeviceProgram(*this));
|
||||
reinterpret_cast<pal::Program*>(trap_handler_->getDeviceProgram(*this));
|
||||
if (program != nullptr) {
|
||||
Pal::Result result{Pal::Result::Success};
|
||||
Pal::GpuMemoryRef memRef = {};
|
||||
@@ -1314,12 +1242,7 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) {
|
||||
}
|
||||
|
||||
device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) {
|
||||
device::Program* program;
|
||||
if (settings().useLightning_) {
|
||||
program = new LightningProgram(*this, owner);
|
||||
} else {
|
||||
program = new HSAILProgram(*this, owner);
|
||||
}
|
||||
device::Program* program = new pal::Program(*this, owner);
|
||||
if (program == nullptr) {
|
||||
LogError("We failed memory allocation for program!");
|
||||
}
|
||||
@@ -1534,12 +1457,6 @@ void Device::tearDown() {
|
||||
delete platformObj_;
|
||||
platform_ = nullptr;
|
||||
}
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (compiler_ != nullptr) {
|
||||
amd::Hsail::CompilerFini(compiler_);
|
||||
compiler_ = nullptr;
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
}
|
||||
|
||||
Memory* Device::getGpuMemory(amd::Memory* mem) const {
|
||||
@@ -2361,7 +2278,7 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi
|
||||
}
|
||||
}
|
||||
|
||||
const HSAILKernel* hsaKernel = static_cast<const HSAILKernel*>(devKernel);
|
||||
const pal::Kernel* hsaKernel = static_cast<const pal::Kernel*>(devKernel);
|
||||
if (hsaKernel->dynamicParallelism()) {
|
||||
if (settings().useDeviceQueue_) {
|
||||
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(*this);
|
||||
@@ -2805,39 +2722,33 @@ bool Device::createBlitProgram() {
|
||||
} else {
|
||||
if (settings().oclVersion_ >= OpenCL20) {
|
||||
extraBlits = iDev()->GetDispatchKernelSource();
|
||||
if (settings().useLightning_) {
|
||||
extraBlits.append(SchedulerSourceCode20);
|
||||
} else {
|
||||
extraBlits.append(SchedulerSourceCode);
|
||||
}
|
||||
extraBlits.append(SchedulerSourceCode20);
|
||||
ocl20 = "-cl-std=CL2.0";
|
||||
}
|
||||
}
|
||||
|
||||
if (settings().useLightning_) {
|
||||
const std::string TrapHandlerAsm = TrapHandlerCode;
|
||||
// Create a program for trap handler
|
||||
// note: It's not critical for runtime functionality to fail trap handler initialization
|
||||
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
|
||||
if (asm_program != nullptr) {
|
||||
std::vector<amd::Device*> devices;
|
||||
devices.push_back(this);
|
||||
std::string opt = "-cl-internal-kernel ";
|
||||
if (auto retval =
|
||||
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
|
||||
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
|
||||
asm_program->release();
|
||||
} else {
|
||||
if (asm_program->load()) {
|
||||
trap_handler_ = asm_program;
|
||||
} else {
|
||||
DevLogError("Could not load the trap handler \n");
|
||||
asm_program->release();
|
||||
}
|
||||
}
|
||||
const std::string TrapHandlerAsm = TrapHandlerCode;
|
||||
// Create a program for trap handler
|
||||
// note: It's not critical for runtime functionality to fail trap handler initialization
|
||||
auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly);
|
||||
if (asm_program != nullptr) {
|
||||
std::vector<amd::Device*> devices;
|
||||
devices.push_back(this);
|
||||
std::string opt = "-cl-internal-kernel ";
|
||||
if (auto retval =
|
||||
asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) {
|
||||
DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval);
|
||||
asm_program->release();
|
||||
} else {
|
||||
DevLogError("Trap handler creation failed\n");
|
||||
if (asm_program->load()) {
|
||||
trap_handler_ = asm_program;
|
||||
} else {
|
||||
DevLogError("Could not load the trap handler \n");
|
||||
asm_program->release();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DevLogError("Trap handler creation failed\n");
|
||||
}
|
||||
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
|
||||
@@ -38,7 +38,6 @@
|
||||
#include "device/pal/palappprofile.hpp"
|
||||
#include "device/pal/palcapturemgr.hpp"
|
||||
#include "device/pal/palsignal.hpp"
|
||||
#include "acl.h"
|
||||
#include "memory"
|
||||
|
||||
#include <atomic>
|
||||
@@ -58,16 +57,6 @@ namespace amd::pal {
|
||||
|
||||
//! A nil device object
|
||||
class NullDevice : public amd::Device {
|
||||
protected:
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static Compiler* compiler_;
|
||||
#endif
|
||||
|
||||
public:
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
Compiler* compiler() const { return compiler_; }
|
||||
#endif
|
||||
|
||||
public:
|
||||
static bool init(void);
|
||||
|
||||
|
||||
@@ -358,7 +358,7 @@ Pal::Result RgpCaptureMgr::CheckForTraceResults() {
|
||||
// ================================================================================================
|
||||
// Called after a swap chain presents. This signals a (next) frame-begin boundary and is
|
||||
// used to coordinate RGP trace start/stop.
|
||||
void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
|
||||
void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
|
||||
size_t z) {
|
||||
// Wait for the driver to be resumed in case it's been paused.
|
||||
WaitForDriverResume();
|
||||
|
||||
@@ -41,7 +41,7 @@ namespace amd::pal {
|
||||
class Settings;
|
||||
class Device;
|
||||
class VirtualGPU;
|
||||
class HSAILKernel;
|
||||
class Kernel;
|
||||
|
||||
// ================================================================================================
|
||||
enum class RgpSqqtBarrierReason : uint32_t {
|
||||
@@ -99,7 +99,7 @@ class RgpCaptureMgr final : public ICaptureMgr {
|
||||
|
||||
static RgpCaptureMgr* Create(Pal::IPlatform* platform, const Device& device);
|
||||
|
||||
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
|
||||
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
|
||||
size_t z) override;
|
||||
|
||||
void PostDispatch(VirtualGPU* gpu) override;
|
||||
@@ -230,7 +230,7 @@ class RgpCaptureMgr {
|
||||
Pal::SubmitInfo& submitInfo) const {
|
||||
return Pal::Result::Success;
|
||||
}
|
||||
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) {}
|
||||
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) {}
|
||||
void PostDispatch(VirtualGPU* gpu) {}
|
||||
void FinishRGPTrace(VirtualGPU* gpu, bool aborted) {}
|
||||
bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const {
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include "device/pal/palsched.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "hsailctx.hpp"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
@@ -36,9 +35,9 @@
|
||||
|
||||
namespace amd::pal {
|
||||
|
||||
void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
|
||||
const uint16_t numVGPRs) {
|
||||
void Kernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
|
||||
const uint16_t numVGPRs) {
|
||||
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t);
|
||||
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
|
||||
constexpr uint32_t ScratchRegAlignment = 256;
|
||||
@@ -71,7 +70,7 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
static_cast<int>(workGroupInfo_.availableLDSSize_ - workGroupInfo_.localMemSize_);
|
||||
}
|
||||
|
||||
bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) {
|
||||
bool Kernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) {
|
||||
if (!sym) {
|
||||
return false;
|
||||
}
|
||||
@@ -86,185 +85,94 @@ bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t
|
||||
return true;
|
||||
}
|
||||
|
||||
HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel)
|
||||
Kernel::Kernel(std::string name, pal::Program* prog, bool internalKernel)
|
||||
: device::Kernel(prog->device(), name, *prog), index_(0), code_(0), codeSize_(0) {
|
||||
flags_.hsa_ = true;
|
||||
flags_.internalKernel_ = internalKernel;
|
||||
}
|
||||
|
||||
HSAILKernel::~HSAILKernel() {}
|
||||
Kernel::~Kernel() {}
|
||||
|
||||
bool HSAILKernel::postLoad() { return true; }
|
||||
bool Kernel::postLoad() {
|
||||
if (codeObjectVer() == 2) {
|
||||
symbolName_ = name();
|
||||
}
|
||||
|
||||
bool HSAILKernel::init() {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// Copy codeobject of this kernel from the program CPU segment
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
std::string openClKernelName = openclMangledName(name());
|
||||
amd::hsa::loader::Symbol* sym = prog().getSymbol(openClKernelName.c_str(), &agent);
|
||||
if (!sym) {
|
||||
LogPrintfError("Error: Getting kernel ISA code symbol %s from AMD HSA Code Object failed.\n",
|
||||
openClKernelName.c_str());
|
||||
|
||||
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
|
||||
|
||||
if (!setKernelDescriptor(sym, &akd_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
amd_kernel_code_t* akc = &akc_;
|
||||
|
||||
if (!setKernelCode(sym, akc)) {
|
||||
LogError("Error: setKernelCode() failed.");
|
||||
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK,
|
||||
reinterpret_cast<void*>(&kernelHasDynamicCallStack_))) {
|
||||
return false;
|
||||
}
|
||||
if (!prog().isNull()) {
|
||||
codeSize_ = prog().codeSegGpu().owner()->getSize();
|
||||
|
||||
if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE,
|
||||
reinterpret_cast<void*>(&codeSize_))) {
|
||||
LogError("Error: sym->GetInfo() failed.");
|
||||
return false;
|
||||
// handle device enqueue
|
||||
if (!RuntimeHandle().empty()) {
|
||||
amd::hsa::loader::Symbol* rth_symbol;
|
||||
|
||||
// Get the runtime handle symbol GPU address
|
||||
rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent);
|
||||
uint64_t symbol_address;
|
||||
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
|
||||
|
||||
// Copy the kernel_object pointer to the runtime handle symbol GPU address
|
||||
const Memory& codeSegGpu = prog().codeSegGpu();
|
||||
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
|
||||
uint64_t kernel_object = gpuAqlCode();
|
||||
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
|
||||
|
||||
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
|
||||
|
||||
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the the workgroup info
|
||||
setWorkGroupInfo(akc->workitem_private_segment_byte_size, akc->workgroup_group_segment_byte_size,
|
||||
akc->wavefront_sgpr_count, akc->workitem_vgpr_count);
|
||||
|
||||
workgroupGroupSegmentByteSize_ = workGroupInfo_.usedLDSSize_;
|
||||
kernargSegmentByteSize_ = akc->kernarg_segment_byte_size;
|
||||
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
acl_error error =
|
||||
amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfArgList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char* aclArgList = new char[sizeOfArgList];
|
||||
if (nullptr == aclArgList) {
|
||||
return false;
|
||||
}
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(), aclArgList, &sizeOfArgList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
// Set the argList
|
||||
InitParameters(reinterpret_cast<const aclArgData*>(aclArgList), argsBufferSize());
|
||||
delete[] aclArgList;
|
||||
|
||||
size_t sizeOfWorkGroupSize;
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(), workGroupInfo_.compileSize_,
|
||||
&sizeOfWorkGroupSize);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
|
||||
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
|
||||
// Find total workgroup size
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
|
||||
workGroupInfo_.compileSize_[2];
|
||||
} else {
|
||||
workGroupInfo_.size_ = device().info().preferredWorkGroupSize_;
|
||||
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
}
|
||||
if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) {
|
||||
workGroupInfo_.scratchRegs_ =
|
||||
std::max<uint32_t>(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t));
|
||||
workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t);
|
||||
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
// Pull out printf metadata from the ELF
|
||||
size_t sizeOfPrintfList;
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), nullptr, &sizeOfPrintfList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
// handle the printf metadata if any
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(&printfStr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure kernel has any printf info
|
||||
if (0 != sizeOfPrintfList) {
|
||||
char* aclPrintfList = new char[sizeOfPrintfList];
|
||||
if (nullptr == aclPrintfList) {
|
||||
return false;
|
||||
}
|
||||
error =
|
||||
amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the PrintfList
|
||||
InitPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList));
|
||||
delete[] aclPrintfList;
|
||||
if (!printfStr.empty()) {
|
||||
InitPrintf(printfStr);
|
||||
}
|
||||
|
||||
aclMetadata md;
|
||||
md.enqueue_kernel = false;
|
||||
size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel);
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE,
|
||||
openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
flags_.dynamicParallelism_ = md.enqueue_kernel;
|
||||
|
||||
md.kernel_index = -1;
|
||||
size_t sizeOfIndex = sizeof(md.kernel_index);
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX,
|
||||
openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
index_ = md.kernel_index;
|
||||
|
||||
size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(),
|
||||
RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(),
|
||||
&workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(),
|
||||
RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(),
|
||||
workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t sizeOfVecTypeHint;
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
openClKernelName.c_str(), NULL, &sizeOfVecTypeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (0 != sizeOfVecTypeHint) {
|
||||
char* VecTypeHint = new char[sizeOfVecTypeHint + 1];
|
||||
if (NULL == VecTypeHint) {
|
||||
return false;
|
||||
}
|
||||
error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT,
|
||||
openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint);
|
||||
if (error != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
VecTypeHint[sizeOfVecTypeHint] = '\0';
|
||||
workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint);
|
||||
delete[] VecTypeHint;
|
||||
}
|
||||
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
return true;
|
||||
}
|
||||
|
||||
const HSAILProgram& HSAILKernel::prog() const {
|
||||
return reinterpret_cast<const HSAILProgram&>(prog_);
|
||||
bool Kernel::init() {
|
||||
return GetAttrCodePropMetadata();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
|
||||
const pal::Program& Kernel::prog() const {
|
||||
return reinterpret_cast<const pal::Program&>(prog_);
|
||||
}
|
||||
|
||||
hsa_kernel_dispatch_packet_t* Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
|
||||
const amd::NDRangeContainer& sizes,
|
||||
const_address params, size_t ldsAddress,
|
||||
uint64_t vmDefQueue,
|
||||
@@ -496,86 +404,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
|
||||
return hsaDisp;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
const LightningProgram& LightningKernel::prog() const {
|
||||
return reinterpret_cast<const LightningProgram&>(prog_);
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool LightningKernel::init() { return GetAttrCodePropMetadata(); }
|
||||
|
||||
bool LightningKernel::postLoad() {
|
||||
if (codeObjectVer() == 2) {
|
||||
symbolName_ = name();
|
||||
}
|
||||
|
||||
// Copy codeobject of this kernel from the program CPU segment
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
|
||||
auto sym = prog().getSymbol(symbolName().c_str(), &agent);
|
||||
|
||||
if (!setKernelDescriptor(sym, &akd_)) {
|
||||
return false;
|
||||
}
|
||||
if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK,
|
||||
reinterpret_cast<void*>(&kernelHasDynamicCallStack_))) {
|
||||
return false;
|
||||
}
|
||||
if (!prog().isNull()) {
|
||||
codeSize_ = prog().codeSegGpu().owner()->getSize();
|
||||
|
||||
// handle device enqueue
|
||||
if (!RuntimeHandle().empty()) {
|
||||
amd::hsa::loader::Symbol* rth_symbol;
|
||||
|
||||
// Get the runtime handle symbol GPU address
|
||||
rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent);
|
||||
uint64_t symbol_address;
|
||||
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
|
||||
|
||||
// Copy the kernel_object pointer to the runtime handle symbol GPU address
|
||||
const Memory& codeSegGpu = prog().codeSegGpu();
|
||||
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
|
||||
uint64_t kernel_object = gpuAqlCode();
|
||||
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
|
||||
|
||||
const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
|
||||
|
||||
codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the the workgroup info
|
||||
setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
|
||||
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_;
|
||||
workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
}
|
||||
if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) {
|
||||
workGroupInfo_.scratchRegs_ =
|
||||
std::max<uint32_t>(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t));
|
||||
workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t);
|
||||
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
// handle the printf metadata if any
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(&printfStr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!printfStr.empty()) {
|
||||
InitPrintf(printfStr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
|
||||
bool Kernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
|
||||
llvm::amdhsa::kernel_descriptor_t* akd) {
|
||||
if (!sym) {
|
||||
return false;
|
||||
@@ -591,6 +420,4 @@ bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym,
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -52,17 +52,16 @@ namespace amd::pal {
|
||||
class VirtualGPU;
|
||||
class Device;
|
||||
class NullDevice;
|
||||
class HSAILProgram;
|
||||
class LightningProgram;
|
||||
class Program;
|
||||
|
||||
/*! \addtogroup pal PAL Device Implementation
|
||||
* @{
|
||||
*/
|
||||
class HSAILKernel : public device::Kernel {
|
||||
class Kernel : public device::Kernel {
|
||||
public:
|
||||
HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel);
|
||||
Kernel(std::string name, pal::Program* prog, bool internalKernel);
|
||||
|
||||
virtual ~HSAILKernel();
|
||||
virtual ~Kernel();
|
||||
|
||||
//! Initializes the metadata required for this kernel,
|
||||
bool init();
|
||||
@@ -80,7 +79,7 @@ class HSAILKernel : public device::Kernel {
|
||||
}
|
||||
|
||||
//! Returns HSA program associated with this kernel
|
||||
const HSAILProgram& prog() const;
|
||||
const pal::Program& prog() const;
|
||||
|
||||
//! Returns LDS size used in this kernel
|
||||
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
|
||||
@@ -119,12 +118,15 @@ class HSAILKernel : public device::Kernel {
|
||||
//! Returns the kernel index in the program
|
||||
uint index() const { return index_; }
|
||||
|
||||
//! Get the kernel descriptor and copy the code object from the program CPU segment
|
||||
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
HSAILKernel(const HSAILKernel&);
|
||||
Kernel(const pal::Kernel&);
|
||||
|
||||
//! Disable operator=
|
||||
HSAILKernel& operator=(const HSAILKernel&);
|
||||
Kernel& operator=(const pal::Kernel&);
|
||||
|
||||
protected:
|
||||
//! Get the kernel code and copy the code object from the program CPU segment
|
||||
@@ -144,24 +146,5 @@ class HSAILKernel : public device::Kernel {
|
||||
size_t codeSize_; //!< Size of ISA code
|
||||
};
|
||||
|
||||
class LightningKernel : public HSAILKernel {
|
||||
public:
|
||||
LightningKernel(const std::string& name, HSAILProgram* prog, bool internalKernel)
|
||||
: HSAILKernel(name, prog, internalKernel) {}
|
||||
|
||||
//! Returns Lightning program associated with this kernel
|
||||
const LightningProgram& prog() const;
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
//! Get the kernel descriptor and copy the code object from the program CPU segment
|
||||
bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd);
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init();
|
||||
|
||||
//! Setup after code object loading
|
||||
bool postLoad();
|
||||
#endif
|
||||
};
|
||||
|
||||
/*@}*/ // namespace amd::pal
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
#include "os/os.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "aclTypes.h"
|
||||
#include "device/pal/palprogram.hpp"
|
||||
#include "device/pal/palblit.hpp"
|
||||
#include "utils/options.hpp"
|
||||
@@ -67,7 +66,7 @@ bool Segment::gpuAddressOffset(uint64_t offAddr, size_t* offset) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
|
||||
bool Segment::alloc(pal::Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
|
||||
bool zero) {
|
||||
if (prog.isNull()) {
|
||||
LogError("[OCL] cannot create a mem object on an offline device!");
|
||||
@@ -174,8 +173,9 @@ bool Segment::freeze(bool destroySysmem) {
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
: Program(device, owner),
|
||||
// ================================================================================================
|
||||
Program::Program(Device& device, amd::Program& owner)
|
||||
: device::Program(device, owner),
|
||||
rawBinary_(nullptr),
|
||||
kernels_(nullptr),
|
||||
codeSegGpu_(nullptr),
|
||||
@@ -186,10 +186,11 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
loaderContext_(this) {
|
||||
assert(device.isOnline());
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
|
||||
: Program(device, owner),
|
||||
Program::Program(NullDevice& device, amd::Program& owner)
|
||||
: device::Program(device, owner),
|
||||
rawBinary_(nullptr),
|
||||
kernels_(nullptr),
|
||||
codeSegGpu_(nullptr),
|
||||
@@ -201,26 +202,14 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
|
||||
assert(!device.isOnline());
|
||||
isNull_ = true;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
|
||||
HSAILProgram::~HSAILProgram() {
|
||||
Program::~Program() {
|
||||
// Destroy internal static samplers
|
||||
for (auto& it : staticSamplers_) {
|
||||
delete it;
|
||||
}
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (rawBinary_ != nullptr) {
|
||||
amd::Hsail::FreeMem(binaryElf_, rawBinary_);
|
||||
}
|
||||
acl_error error;
|
||||
// Free the elf binary
|
||||
if (binaryElf_ != nullptr) {
|
||||
error = amd::Hsail::BinaryFini(binaryElf_);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogWarning("Error while destroying the acl binary \n");
|
||||
}
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
releaseClBinary();
|
||||
if (executable_) {
|
||||
loader_->DestroyExecutable(executable_);
|
||||
@@ -233,15 +222,6 @@ HSAILProgram::~HSAILProgram() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline static std::vector<std::string> splitSpaceSeparatedString(char* str) {
|
||||
std::string s(str);
|
||||
std::stringstream ss(s);
|
||||
std::istream_iterator<std::string> beg(ss), end;
|
||||
std::vector<std::string> vec(beg, end);
|
||||
return vec;
|
||||
}
|
||||
|
||||
inline static std::string GetUriFromMemoryAddress(const void* memory, size_t size) {
|
||||
int pid = amd::Os::getProcessId();
|
||||
std::ostringstream uri_stream;
|
||||
@@ -250,100 +230,7 @@ inline static std::string GetUriFromMemoryAddress(const void* memory, size_t siz
|
||||
return uri_stream.str();
|
||||
}
|
||||
|
||||
bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// ACL_TYPE_CG stage is not performed for offline compilation
|
||||
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
|
||||
if (executable_ == nullptr) {
|
||||
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
|
||||
return false;
|
||||
}
|
||||
size_t size = binSize;
|
||||
hsa_code_object_t code_object;
|
||||
code_object.handle = reinterpret_cast<uint64_t>(binary);
|
||||
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
auto uri = GetUriFromMemoryAddress(binary, binSize);
|
||||
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
|
||||
return false;
|
||||
}
|
||||
status = loader_->FreezeExecutable(executable_, nullptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error: AMD HSA Code Object freeze failed.\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t kernelNamesSize = 0;
|
||||
acl_error errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_,
|
||||
RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
|
||||
return false;
|
||||
}
|
||||
if (kernelNamesSize > 0) {
|
||||
std::vector<char> kernelNames(kernelNamesSize);
|
||||
errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES,
|
||||
nullptr, kernelNames.data(), &kernelNamesSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
|
||||
return false;
|
||||
}
|
||||
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames.data());
|
||||
for (const auto& it : vKernels) {
|
||||
std::string kernelName(it);
|
||||
|
||||
HSAILKernel* aKernel = new HSAILKernel(kernelName, this, internalKernel);
|
||||
addKernel(aKernel);
|
||||
|
||||
if (!aKernel->init()) {
|
||||
buildLog_ += "Error: Kernel initialization failed.\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
|
||||
}
|
||||
}
|
||||
|
||||
DestroySegmentCpuAccess();
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset,
|
||||
std::string uri) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (!device().isOnline()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dynamicParallelism = false;
|
||||
for (auto& kit : kernels()) {
|
||||
HSAILKernel* aKernel = static_cast<HSAILKernel*>(kit.second);
|
||||
if (!aKernel->postLoad()) {
|
||||
return false;
|
||||
}
|
||||
dynamicParallelism |= aKernel->dynamicParallelism();
|
||||
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
|
||||
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
|
||||
maxScratchRegs_ =
|
||||
std::max(static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
|
||||
maxVgprs_ = std::max(static_cast<uint>(aKernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
|
||||
}
|
||||
|
||||
// Allocate kernel table for device enqueuing
|
||||
if (!isNull() && dynamicParallelism && !allocKernelTable()) {
|
||||
return false;
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
|
||||
|
||||
bool HSAILProgram::allocKernelTable() {
|
||||
bool Program::allocKernelTable() {
|
||||
if (isNull()) {
|
||||
// Cannot create a kernel table for offline devices.
|
||||
return false;
|
||||
@@ -359,7 +246,7 @@ bool HSAILProgram::allocKernelTable() {
|
||||
} else {
|
||||
size_t* table = reinterpret_cast<size_t*>(kernels_->map(nullptr, pal::Resource::WriteOnly));
|
||||
for (auto& it : kernels()) {
|
||||
HSAILKernel* kernel = static_cast<HSAILKernel*>(it.second);
|
||||
pal::Kernel* kernel = static_cast<pal::Kernel*>(it.second);
|
||||
table[kernel->index()] = static_cast<size_t>(kernel->gpuAqlCode());
|
||||
}
|
||||
kernels_->unmap(nullptr);
|
||||
@@ -367,41 +254,9 @@ bool HSAILProgram::allocKernelTable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
|
||||
void Program::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
const aclTargetInfo& HSAILProgram::info() {
|
||||
acl_error err;
|
||||
info_ = amd::Hsail::GetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
|
||||
device().isa().hsailName(), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
return info_;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// Write binary to memory
|
||||
if (rawBinary_ != nullptr) {
|
||||
// Free memory containing rawBinary
|
||||
amd::Hsail::FreeMem(binaryElf_, rawBinary_);
|
||||
rawBinary_ = nullptr;
|
||||
}
|
||||
size_t size = 0;
|
||||
if (amd::Hsail::WriteToMem(binaryElf_, &rawBinary_, &size) != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to write binary to memory \n";
|
||||
return false;
|
||||
}
|
||||
setBinary(static_cast<char*>(rawBinary_), size);
|
||||
// Set the type of binary
|
||||
setType(type);
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) {
|
||||
bool Program::defineGlobalVar(const char* name, void* dptr) {
|
||||
if (!device().isOnline()) {
|
||||
return false;
|
||||
}
|
||||
@@ -419,7 +274,7 @@ bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
|
||||
bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
|
||||
const char* global_name) const {
|
||||
if (!device().isOnline()) {
|
||||
return false;
|
||||
@@ -528,6 +383,107 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::createBinary(amd::option::Options* options) {
|
||||
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
||||
LogError("Failed to create ELF binary image!");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
// Skip metadata look-up and kernel creation for assembly and internal kernel.
|
||||
// @note: Runtime compiles only the second level trap handler from assembly
|
||||
if ((owner()->language() != amd::Program::Assembly) || !internal_) {
|
||||
// Find the size of global variables from the binary
|
||||
if (!FindGlobalVarSize(binary, binSize)) {
|
||||
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto& kernelMeta : kernelMetadataMap_) {
|
||||
auto kernelName = kernelMeta.first;
|
||||
auto kernel = new pal::Kernel(kernelName, this, internalKernel);
|
||||
if (kernel == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (!kernel->init()) {
|
||||
buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n";
|
||||
return false;
|
||||
}
|
||||
addKernel(kernel);
|
||||
|
||||
if (codeObjectVer() < 5) {
|
||||
kernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
|
||||
if (executable_ == nullptr) {
|
||||
LogError("Error: Executable for AMD HSA Code Object isn't created.");
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_code_object_t code_object;
|
||||
code_object.handle = reinterpret_cast<uint64_t>(binary);
|
||||
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
auto uri = GetUriFromMemoryAddress(binary, binSize);
|
||||
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error: AMD HSA Code Object loading failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isInternal() && (owner()->language() == amd::Program::Assembly)) {
|
||||
// Don't register trap handler with the debugger, since user shouldn't see this kernel
|
||||
status = executable_->Freeze(nullptr);
|
||||
trapHandler_ = true;
|
||||
} else {
|
||||
status = loader_->FreezeExecutable(executable_, nullptr);
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error: Freezing the executable failed.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
|
||||
size_t foffset, std::string uri) {
|
||||
// Collect the information about compiled binary, except the trap handler
|
||||
if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) {
|
||||
apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize,
|
||||
codeSegGpu_->iMem(), codeSegGpu_->offset());
|
||||
}
|
||||
|
||||
for (auto& kit : kernels()) {
|
||||
pal::Kernel* kernel = static_cast<pal::Kernel*>(kit.second);
|
||||
if (!kernel->postLoad()) {
|
||||
return false;
|
||||
}
|
||||
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
|
||||
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
|
||||
maxScratchRegs_ =
|
||||
std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
|
||||
maxVgprs_ = std::max(static_cast<uint>(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
|
||||
}
|
||||
DestroySegmentCpuAccess();
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t Program::GetTrapHandlerAddress() const {
|
||||
uint64_t address = 0;
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
auto trap_sym = executable_->GetSymbol("trap_entry", &agent);
|
||||
if (trap_sym != nullptr) {
|
||||
trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
|
||||
const amd::Isa* isa_p = amd::Isa::findIsa(name);
|
||||
return {amd::Isa::toHandle(isa_p)};
|
||||
@@ -696,12 +652,10 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent,
|
||||
if (!sampler_handle.handle) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
// Samplers will be destroyed by the pal::HSAILProgam destructor.
|
||||
// Samplers will be destroyed by the pal::Program destructor.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
|
||||
static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol,
|
||||
void* data) {
|
||||
auto symbol = amd::hsa::loader::Symbol::Object(hSymbol);
|
||||
@@ -729,115 +683,4 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
bool LightningProgram::createBinary(amd::option::Options* options) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
||||
LogError("Failed to create ELF binary image!");
|
||||
return false;
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Skip metadata look-up and kernel creation for assembly and internal kernel.
|
||||
// @note: Runtime compiles only the second level trap handler from assembly
|
||||
if ((owner()->language() != amd::Program::Assembly) || !internal_) {
|
||||
// Find the size of global variables from the binary
|
||||
if (!FindGlobalVarSize(binary, binSize)) {
|
||||
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto& kernelMeta : kernelMetadataMap_) {
|
||||
auto kernelName = kernelMeta.first;
|
||||
auto kernel = new LightningKernel(kernelName, this, internalKernel);
|
||||
if (kernel == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (!kernel->init()) {
|
||||
buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n";
|
||||
return false;
|
||||
}
|
||||
addKernel(kernel);
|
||||
|
||||
if (codeObjectVer() < 5) {
|
||||
kernel->setUniformWorkGroupSize(useUniformWorkGroupSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
|
||||
if (executable_ == nullptr) {
|
||||
LogError("Error: Executable for AMD HSA Code Object isn't created.");
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_code_object_t code_object;
|
||||
code_object.handle = reinterpret_cast<uint64_t>(binary);
|
||||
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
auto uri = GetUriFromMemoryAddress(binary, binSize);
|
||||
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error: AMD HSA Code Object loading failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isInternal() && (owner()->language() == amd::Program::Assembly)) {
|
||||
// Don't register trap handler with the debugger, since user shouldn't see this kernel
|
||||
status = executable_->Freeze(nullptr);
|
||||
trapHandler_ = true;
|
||||
} else {
|
||||
status = loader_->FreezeExecutable(executable_, nullptr);
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error: Freezing the executable failed.");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
|
||||
size_t foffset, std::string uri) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Collect the information about compiled binary, except the trap handler
|
||||
if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) {
|
||||
apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize,
|
||||
codeSegGpu_->iMem(), codeSegGpu_->offset());
|
||||
}
|
||||
|
||||
for (auto& kit : kernels()) {
|
||||
LightningKernel* kernel = static_cast<LightningKernel*>(kit.second);
|
||||
if (!kernel->postLoad()) {
|
||||
return false;
|
||||
}
|
||||
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
|
||||
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
|
||||
maxScratchRegs_ =
|
||||
std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
|
||||
maxVgprs_ = std::max(static_cast<uint>(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_);
|
||||
}
|
||||
DestroySegmentCpuAccess();
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
uint64_t LightningProgram::GetTrapHandlerAddress() const {
|
||||
uint64_t address = 0;
|
||||
hsa_agent_t agent = {amd::Device::toHandle(&(device()))};
|
||||
auto trap_sym = executable_->GetSymbol("trap_entry", &agent);
|
||||
if (trap_sym != nullptr) {
|
||||
trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -44,7 +44,7 @@ namespace amd::pal {
|
||||
*/
|
||||
|
||||
using namespace amd::hsa::loader;
|
||||
class HSAILProgram;
|
||||
class Program;
|
||||
|
||||
class Segment : public amd::HeapObject {
|
||||
public:
|
||||
@@ -52,7 +52,7 @@ class Segment : public amd::HeapObject {
|
||||
~Segment();
|
||||
|
||||
//! Allocates a segment
|
||||
bool alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
|
||||
bool alloc(Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align,
|
||||
bool zero);
|
||||
|
||||
//! Copies data from host to the segment
|
||||
@@ -81,7 +81,7 @@ class Segment : public amd::HeapObject {
|
||||
|
||||
class PALHSALoaderContext final : public hsa::loader::Context {
|
||||
public:
|
||||
PALHSALoaderContext(HSAILProgram* program) : program_(program) {}
|
||||
PALHSALoaderContext(pal::Program* program) : program_(program) {}
|
||||
|
||||
virtual ~PALHSALoaderContext() {}
|
||||
|
||||
@@ -127,26 +127,26 @@ class PALHSALoaderContext final : public hsa::loader::Context {
|
||||
const hsa_ext_sampler_descriptor_t* sampler_descriptor,
|
||||
hsa_ext_sampler_t* sampler_handle) override;
|
||||
|
||||
//! All samplers are owned by HSAILProgram and are deleted in its destructor.
|
||||
//! All samplers are owned by pal program and are deleted in its destructor.
|
||||
hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override;
|
||||
|
||||
private:
|
||||
PALHSALoaderContext(const PALHSALoaderContext& c);
|
||||
PALHSALoaderContext& operator=(const PALHSALoaderContext& c);
|
||||
|
||||
pal::HSAILProgram* program_;
|
||||
pal::Program* program_;
|
||||
};
|
||||
|
||||
//! \class HSAIL program
|
||||
class HSAILProgram : public device::Program {
|
||||
//! \class pal program
|
||||
class Program : public device::Program {
|
||||
friend class ClBinary;
|
||||
|
||||
public:
|
||||
//! Default constructor
|
||||
HSAILProgram(Device& device, amd::Program& owner);
|
||||
HSAILProgram(NullDevice& device, amd::Program& owner);
|
||||
Program(Device& device, amd::Program& owner);
|
||||
Program(NullDevice& device, amd::Program& owner);
|
||||
//! Default destructor
|
||||
virtual ~HSAILProgram();
|
||||
virtual ~Program();
|
||||
|
||||
void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); }
|
||||
|
||||
@@ -201,14 +201,9 @@ class HSAILProgram : public device::Program {
|
||||
//! Returns API hash value of the program for RGP thread trace
|
||||
uint64_t ApiHash() const { return apiHash_; }
|
||||
|
||||
protected:
|
||||
bool saveBinaryAndSetType(type_t type);
|
||||
//! Returns the load address of the trap handler
|
||||
uint64_t GetTrapHandlerAddress() const;
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options);
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
virtual const aclTargetInfo& info();
|
||||
#endif
|
||||
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) override;
|
||||
|
||||
@@ -216,6 +211,9 @@ class HSAILProgram : public device::Program {
|
||||
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
|
||||
std::string uri = std::string()) override;
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options) override;
|
||||
|
||||
protected:
|
||||
//! Destroys CPU allocations in the code segment
|
||||
void DestroySegmentCpuAccess() const {
|
||||
if (codeSegment_ != nullptr) {
|
||||
@@ -229,10 +227,10 @@ class HSAILProgram : public device::Program {
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
HSAILProgram(const HSAILProgram&);
|
||||
Program(const Program&);
|
||||
|
||||
//! Disable operator=
|
||||
HSAILProgram& operator=(const HSAILProgram&);
|
||||
Program& operator=(const Program&);
|
||||
|
||||
protected:
|
||||
//! Allocate kernel table
|
||||
@@ -256,31 +254,5 @@ class HSAILProgram : public device::Program {
|
||||
PALHSALoaderContext loaderContext_; //!< Context for HSA Loader
|
||||
};
|
||||
|
||||
//! \class Lightning Compiler Program
|
||||
class LightningProgram : public HSAILProgram {
|
||||
public:
|
||||
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
|
||||
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
virtual ~LightningProgram() {}
|
||||
uint64_t GetTrapHandlerAddress() const;
|
||||
|
||||
protected:
|
||||
virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) override;
|
||||
|
||||
virtual bool setKernels(void* binary, size_t binSize,
|
||||
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
|
||||
std::string uri = std::string()) override;
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options) override;
|
||||
};
|
||||
|
||||
/*@}*/ // namespace amd::pal
|
||||
} // namespace amd::pal
|
||||
|
||||
@@ -119,10 +119,8 @@ Settings::Settings() {
|
||||
std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
|
||||
|
||||
maxCmdBuffers_ = 12;
|
||||
useLightning_ = amd::IS_HIP ? true : ((!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : false);
|
||||
enableWgpMode_ = false;
|
||||
enableWave32Mode_ = false;
|
||||
hsailExplicitXnack_ = false;
|
||||
lcWavefrontSize64_ = true;
|
||||
enableHwP2P_ = false;
|
||||
imageBufferWar_ = false;
|
||||
@@ -152,7 +150,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
}
|
||||
|
||||
enableXNACK_ = (isa.xnack() == amd::Isa::Feature::Enabled);
|
||||
hsailExplicitXnack_ = enableXNACK_;
|
||||
bool useWavefront64 = false;
|
||||
|
||||
std::string appName = {};
|
||||
@@ -192,11 +189,8 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
case Pal::AsicRevision::Navi14:
|
||||
case Pal::AsicRevision::Navi12:
|
||||
case Pal::AsicRevision::Navi10:
|
||||
useLightning_ = GPU_ENABLE_LC;
|
||||
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
|
||||
if (useLightning_) {
|
||||
enableWave32Mode_ = true;
|
||||
}
|
||||
enableWave32Mode_ = true;
|
||||
if (!flagIsDefault(GPU_ENABLE_WAVE32_MODE)) {
|
||||
enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE;
|
||||
}
|
||||
@@ -211,9 +205,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
enableHwP2P_ = true;
|
||||
enableCoopGroups_ = IS_LINUX;
|
||||
enableCoopMultiDeviceGroups_ = IS_LINUX;
|
||||
if (useLightning_) {
|
||||
singleFpDenorm_ = true;
|
||||
}
|
||||
singleFpDenorm_ = true;
|
||||
enableExtension(ClKhrFp16);
|
||||
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
|
||||
// Cache line size is 64 bytes
|
||||
@@ -279,11 +271,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
enableExtension(ClAmdCopyBufferP2P);
|
||||
}
|
||||
|
||||
if (!useLightning_) {
|
||||
enableExtension(ClAmdPopcnt);
|
||||
enableExtension(ClAmdVec3);
|
||||
enableExtension(ClAmdPrintf);
|
||||
}
|
||||
// Enable some platform extensions
|
||||
enableExtension(ClAmdDeviceAttributeQuery);
|
||||
|
||||
@@ -306,12 +293,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
enableExtension(ClKhrFp64);
|
||||
}
|
||||
|
||||
if (!useLightning_) {
|
||||
// Enable AMD double precision extension
|
||||
doublePrecision_ = true;
|
||||
enableExtension(ClAmdFp64);
|
||||
}
|
||||
|
||||
if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) {
|
||||
// Enable bus addressable memory extension
|
||||
enableExtension(ClAMDBusAddressableMemory);
|
||||
|
||||
@@ -216,7 +216,7 @@ bool UberTraceCaptureMgr::Init(Pal::IPlatform* platform) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x,
|
||||
void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x,
|
||||
size_t y, size_t z) {
|
||||
// Wait for the driver to be resumed in case it's been paused.
|
||||
WaitForDriverResume();
|
||||
|
||||
@@ -45,7 +45,7 @@ class UberTraceCaptureMgr final : public ICaptureMgr {
|
||||
|
||||
bool Update(Pal::IPlatform* platform) override;
|
||||
|
||||
void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
|
||||
void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y,
|
||||
size_t z) override;
|
||||
|
||||
void PostDispatch(VirtualGPU* gpu) override;
|
||||
|
||||
@@ -2346,7 +2346,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQueue) {
|
||||
void VirtualGPU::PrintChildren(const pal::Kernel& hsaKernel, VirtualGPU* gpuDefQueue) {
|
||||
AmdAqlWrap* wraps = (AmdAqlWrap*)(&((AmdVQueueHeader*)gpuDefQueue->virtualQueue_->data())[1]);
|
||||
uint p = 0;
|
||||
for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) {
|
||||
@@ -2381,11 +2381,11 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
|
||||
print << wraps[i].aql.grid_size_y << ", ";
|
||||
print << wraps[i].aql.grid_size_z << "]\n";
|
||||
|
||||
HSAILKernel* child = nullptr;
|
||||
pal::Kernel* child = nullptr;
|
||||
for (auto it = hsaKernel.prog().kernels().begin(); it != hsaKernel.prog().kernels().end();
|
||||
++it) {
|
||||
if (wraps[i].aql.kernel_object == static_cast<HSAILKernel*>(it->second)->gpuAqlCode()) {
|
||||
child = static_cast<HSAILKernel*>(it->second);
|
||||
if (wraps[i].aql.kernel_object == static_cast<pal::Kernel*>(it->second)->gpuAqlCode()) {
|
||||
child = static_cast<pal::Kernel*>(it->second);
|
||||
}
|
||||
}
|
||||
if (child == nullptr) {
|
||||
@@ -2449,7 +2449,7 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
|
||||
bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel,
|
||||
VirtualGPU** gpuDefQueue, uint64_t* vmDefQueue) {
|
||||
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
|
||||
if (nullptr == defQueue) {
|
||||
@@ -2482,7 +2482,7 @@ bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel&
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
|
||||
void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel,
|
||||
VirtualGPU* gpuDefQueue, uint64_t vmDefQueue,
|
||||
uint64_t vmParentWrap, GpuEvent* gpuEvent) {
|
||||
uint32_t id = gpuEvent->id_;
|
||||
@@ -2628,7 +2628,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
state_.anyOrder_ = anyOrder;
|
||||
|
||||
// Get the HSA kernel object
|
||||
const HSAILKernel& hsaKernel = static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
|
||||
const pal::Kernel& hsaKernel = static_cast<const pal::Kernel&>(*(kernel.getDeviceKernel(dev())));
|
||||
|
||||
// If RGP capturing is enabled, then start SQTT trace
|
||||
if (rgpCaptureEna()) {
|
||||
@@ -2696,7 +2696,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
assert((nullptr != aqlPkt) && "Couldn't load kernel arguments");
|
||||
|
||||
// Dynamic call stack size is considered to calculate private segment size and scratch regs
|
||||
// in LightningKernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike
|
||||
// in pal::Kernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike
|
||||
// hipLaunchKernel/hipLaunchKernelGGL, Updated value is passed to dispatch packet.
|
||||
size_t privateMemSize = hsaKernel.spillSegSize();
|
||||
if ((hsaKernel.workGroupInfo()->usedStackSize_ & 0x1) == 0x1) {
|
||||
@@ -2725,13 +2725,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
}
|
||||
dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlKd();
|
||||
dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress();
|
||||
if (!hsaKernel.prog().isLC() && hsaKernel.workGroupInfo()->wavesPerSimdHint_ != 0) {
|
||||
constexpr uint32_t kWavesPerSimdLimit = 4;
|
||||
dispatchParam.wavesPerSh =
|
||||
kWavesPerSimdLimit * dev().info().cuPerShaderArray_ * dev().info().simdPerCU_;
|
||||
} else {
|
||||
dispatchParam.wavesPerSh = 0;
|
||||
}
|
||||
dispatchParam.wavesPerSh = 0;
|
||||
dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false;
|
||||
dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize();
|
||||
dispatchParam.aqlPacketIndex = aql_index;
|
||||
@@ -3584,7 +3578,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
bool srdResource = false;
|
||||
amd::Memory* const* memories =
|
||||
reinterpret_cast<amd::Memory* const*>(params + kernelParams.memoryObjOffset());
|
||||
const HSAILKernel& hsaKernel = static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
|
||||
const pal::Kernel& hsaKernel = static_cast<const pal::Kernel&>(*(kernel.getDeviceKernel(dev())));
|
||||
const amd::KernelSignature& signature = kernel.signature();
|
||||
ldsAddress = hsaKernel.ldsSize();
|
||||
|
||||
|
||||
@@ -45,14 +45,13 @@
|
||||
namespace amd::pal {
|
||||
|
||||
class Device;
|
||||
class Kernel;
|
||||
class Memory;
|
||||
class CalCounterReference;
|
||||
class VirtualGPU;
|
||||
class Program;
|
||||
class BlitManager;
|
||||
class ThreadTrace;
|
||||
class HSAILKernel;
|
||||
class Kernel;
|
||||
|
||||
struct AqlPacketMgmt : public amd::EmbeddedObject {
|
||||
static constexpr uint32_t kAqlPacketsListSize = 4 * Ki;
|
||||
@@ -693,19 +692,19 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData
|
||||
);
|
||||
|
||||
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
|
||||
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel
|
||||
VirtualGPU* gpuDefQueue //!< Device queue for children execution
|
||||
);
|
||||
|
||||
bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object
|
||||
const HSAILKernel& hsaKernel, //!< Parent HSAIL object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
|
||||
VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue
|
||||
uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue
|
||||
);
|
||||
|
||||
void PostDeviceEnqueue(
|
||||
const amd::Kernel& kernel, //!< Parent amd kernel object
|
||||
const HSAILKernel& hsaKernel, //!< Parent HSAIL object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
|
||||
VirtualGPU* gpuDefQueue, //!< GPU default queue
|
||||
uint64_t vmDefQueue, //!< VM handle to the virtual queue
|
||||
uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location
|
||||
|
||||
@@ -125,8 +125,7 @@ bool NullDevice::create(const amd::Isa& isa) {
|
||||
info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " ";
|
||||
info_.spirVersions_ = "";
|
||||
std::stringstream ss;
|
||||
ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL");
|
||||
ss << ") [Offline]";
|
||||
ss << AMD_BUILD_STRING " (HSA,LC) [Offline]";
|
||||
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
|
||||
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
|
||||
return true;
|
||||
@@ -704,12 +703,7 @@ bool Device::create() {
|
||||
|
||||
// ================================================================================================
|
||||
device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) {
|
||||
device::Program* program;
|
||||
if (settings().useLightning_) {
|
||||
program = new LightningProgram(*this, owner);
|
||||
} else {
|
||||
program = new HSAILProgram(*this, owner);
|
||||
}
|
||||
device::Program* program = new roc::Program(*this, owner);
|
||||
|
||||
if (program == nullptr) {
|
||||
LogError("Memory allocation has failed!");
|
||||
@@ -722,19 +716,15 @@ bool Device::createBlitProgram() {
|
||||
bool result = true;
|
||||
std::string extraKernel;
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
if (settings().useLightning_) {
|
||||
if (amd::IS_HIP) {
|
||||
if (settings().gwsInitSupported_) {
|
||||
extraKernel = device::HipExtraSourceCode;
|
||||
} else {
|
||||
extraKernel = device::HipExtraSourceCodeNoGWS;
|
||||
}
|
||||
if (amd::IS_HIP) {
|
||||
if (settings().gwsInitSupported_) {
|
||||
extraKernel = device::HipExtraSourceCode;
|
||||
} else {
|
||||
extraKernel = SchedulerSourceCode;
|
||||
extraKernel = device::HipExtraSourceCodeNoGWS;
|
||||
}
|
||||
} else {
|
||||
extraKernel = SchedulerSourceCode;
|
||||
}
|
||||
#endif // USE_COMGR_LIBRARY
|
||||
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
// Create blit programs
|
||||
@@ -749,12 +739,7 @@ bool Device::createBlitProgram() {
|
||||
}
|
||||
|
||||
device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) {
|
||||
device::Program* program;
|
||||
if (settings().useLightning_) {
|
||||
program = new LightningProgram(*this, owner);
|
||||
} else {
|
||||
program = new HSAILProgram(*this, owner);
|
||||
}
|
||||
device::Program* program = new roc::Program(*this, owner);
|
||||
|
||||
if (program == nullptr) {
|
||||
LogError("Memory allocation has failed!");
|
||||
@@ -1305,9 +1290,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
return false;
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << ","
|
||||
<< (settings().useLightning_ ? "LC" : "HSAIL");
|
||||
ss << ")";
|
||||
ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << ",LC)";
|
||||
|
||||
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
|
||||
|
||||
@@ -1480,10 +1463,6 @@ bool Device::populateOCLDeviceConstants() {
|
||||
if (info_.iommuv2_ || isa().versionMajor() >= 8) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
} else if (!settings().useLightning_) {
|
||||
if (info_.iommuv2_ || (isa().versionMajor() == 8)) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
|
||||
namespace amd::roc {
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Kernel::init() { return GetAttrCodePropMetadata(); }
|
||||
|
||||
bool Kernel::postLoad() {
|
||||
@@ -157,6 +156,5 @@ bool Kernel::postLoad() {
|
||||
program()->rocDevice().AddKernel(*this);
|
||||
return true;
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
} // namespace amd::roc
|
||||
|
||||
@@ -55,6 +55,7 @@ Program::~Program() {
|
||||
Program::Program(roc::NullDevice& device, amd::Program& owner) : device::Program(device, owner) {
|
||||
hsaExecutable_.handle = 0;
|
||||
hsaCodeObjectReader_.handle = 0;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
|
||||
bool Program::initClBinary(char* binaryIn, size_t size) {
|
||||
@@ -201,58 +202,16 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
return true;
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner)
|
||||
: roc::Program(device, owner) {}
|
||||
|
||||
HSAILProgram::~HSAILProgram() {}
|
||||
|
||||
bool HSAILProgram::saveBinaryAndSetType(type_t type) { return true; }
|
||||
|
||||
bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset,
|
||||
std::string uri) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
|
||||
: roc::Program(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
}
|
||||
|
||||
bool LightningProgram::createBinary(amd::option::Options* options) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Program::createBinary(amd::option::Options* options) {
|
||||
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
||||
LogError("Failed to create ELF binary image!");
|
||||
return false;
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Write binary to memory
|
||||
if (type == TYPE_EXECUTABLE) { // handle code object binary
|
||||
assert(rawBinary != nullptr && size != 0 && "must pass in the binary");
|
||||
} else { // handle LLVM binary
|
||||
if (llvmBinary_.empty()) {
|
||||
buildLog_ += "ERROR: Tried to save empty LLVM binary \n";
|
||||
return false;
|
||||
}
|
||||
rawBinary = (void*)llvmBinary_.data();
|
||||
size = llvmBinary_.size();
|
||||
}
|
||||
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
||||
|
||||
// Set the type of binary
|
||||
setType(type);
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) {
|
||||
// Find the size of global variables from the binary
|
||||
if (!FindGlobalVarSize(binary, binSize)) {
|
||||
buildLog_ += "Error: Cannot Find Global Var Sizes\n";
|
||||
@@ -274,9 +233,8 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
|
||||
size_t foffset, std::string uri) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc,
|
||||
size_t foffset, std::string uri) {
|
||||
// Stop compilation if it is an offline device - HSA runtime does not
|
||||
// support ISA compiled offline
|
||||
if (!device().isOnline()) {
|
||||
@@ -330,7 +288,6 @@ bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDes
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,9 +29,6 @@
|
||||
//! \namespace amd::roc HSA Device Implementation
|
||||
namespace amd::roc {
|
||||
|
||||
class HSAILProgram;
|
||||
class LightningProgram;
|
||||
|
||||
//! \class empty program
|
||||
class Program : public device::Program {
|
||||
friend class ClBinary;
|
||||
@@ -62,15 +59,6 @@ class Program : public device::Program {
|
||||
virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
|
||||
const char* global_name) const;
|
||||
|
||||
protected:
|
||||
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
|
||||
*
|
||||
* \return The build error code
|
||||
*/
|
||||
int compileBinaryToHSAIL(amd::option::Options* options //!< options for compilation
|
||||
);
|
||||
virtual bool createBinary(amd::option::Options* options) = 0;
|
||||
|
||||
protected:
|
||||
//! Disable default copy constructor
|
||||
Program(const Program&) = delete;
|
||||
@@ -79,48 +67,17 @@ class Program : public device::Program {
|
||||
|
||||
virtual bool defineGlobalVar(const char* name, void* dptr);
|
||||
|
||||
protected:
|
||||
/* HSA executable */
|
||||
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
|
||||
hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader
|
||||
};
|
||||
|
||||
class HSAILProgram : public roc::Program {
|
||||
public:
|
||||
HSAILProgram(roc::NullDevice& device, amd::Program& owner);
|
||||
virtual ~HSAILProgram();
|
||||
|
||||
protected:
|
||||
bool createBinary(amd::option::Options* options) override { return true; }
|
||||
|
||||
virtual bool setKernels(void* binary, size_t binSize,
|
||||
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
|
||||
std::string uri = std::string()) override;
|
||||
|
||||
private:
|
||||
std::string codegenOptions(amd::option::Options* options);
|
||||
|
||||
bool saveBinaryAndSetType(type_t type) override;
|
||||
};
|
||||
|
||||
class LightningProgram final : public roc::Program {
|
||||
public:
|
||||
LightningProgram(roc::NullDevice& device, amd::Program& owner);
|
||||
virtual ~LightningProgram() {}
|
||||
|
||||
protected:
|
||||
bool createBinary(amd::option::Options* options) final;
|
||||
|
||||
bool saveBinaryAndSetType(type_t type) final { return true; }
|
||||
|
||||
private:
|
||||
bool saveBinaryAndSetType(type_t type, void* rawBinary, size_t size);
|
||||
bool createBinary(amd::option::Options* options) override final;
|
||||
|
||||
bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize,
|
||||
bool internalKernel) override final;
|
||||
|
||||
bool setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc = amd::Os::FDescInit(),
|
||||
size_t foffset = 0, std::string uri = std::string()) override final;
|
||||
protected:
|
||||
/* HSA executable */
|
||||
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
|
||||
hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader
|
||||
};
|
||||
|
||||
/*@}*/ // namespace amd::roc
|
||||
|
||||
@@ -71,8 +71,6 @@ Settings::Settings() {
|
||||
numDeviceEvents_ = 1024;
|
||||
numWaitEvents_ = 8;
|
||||
|
||||
useLightning_ = (!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : true;
|
||||
|
||||
lcWavefrontSize64_ = true;
|
||||
imageBufferWar_ = false;
|
||||
|
||||
@@ -116,7 +114,6 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b
|
||||
pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_);
|
||||
}
|
||||
enableXNACK_ = enableXNACK;
|
||||
hsailExplicitXnack_ = enableXNACK;
|
||||
|
||||
// Enable extensions
|
||||
enableExtension(ClKhrByteAddressableStore);
|
||||
@@ -146,17 +143,12 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b
|
||||
enableExtension(ClKhrFp16);
|
||||
supportDepthsRGB_ = true;
|
||||
|
||||
if (useLightning_) {
|
||||
enableExtension(ClAmdAssemblyProgram);
|
||||
// enable subnormals for gfx900 and later
|
||||
if (gfxipMajor >= 9) {
|
||||
singleFpDenorm_ = true;
|
||||
enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
|
||||
enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
|
||||
}
|
||||
} else {
|
||||
// Also enable AMD double precision extension?
|
||||
enableExtension(ClAmdFp64);
|
||||
enableExtension(ClAmdAssemblyProgram);
|
||||
// enable subnormals for gfx900 and later
|
||||
if (gfxipMajor >= 9) {
|
||||
singleFpDenorm_ = true;
|
||||
enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
|
||||
enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups;
|
||||
}
|
||||
|
||||
if ((gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10) ||
|
||||
|
||||
@@ -806,11 +806,8 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
|
||||
desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) &&
|
||||
"Unsupported address qualifier");
|
||||
|
||||
const bool readOnly =
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST ||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
(mem->getMemFlags() & CL_MEM_READ_ONLY) != 0;
|
||||
const bool readOnly = (desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST) ||
|
||||
((mem->getMemFlags() & CL_MEM_READ_ONLY) != 0);
|
||||
|
||||
if (!readOnly) {
|
||||
mem->signalWrite(&dev());
|
||||
|
||||
@@ -55,8 +55,8 @@ target_include_directories(elf_test
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL -DWITH_LIGHTNING_COMPILER -DDEBUG)
|
||||
add_definitions(-DCOMGR_DYN_DLL -DDEBUG)
|
||||
|
||||
target_link_libraries(elf_test PRIVATE amdrocclr_static)
|
||||
|
||||
#-------------------------------------elf_test--------------------------------------#
|
||||
#-------------------------------------elf_test--------------------------------------#
|
||||
|
||||
@@ -340,7 +340,6 @@ class Kernel : public RuntimeObject {
|
||||
|
||||
virtual ObjectType objectType() const { return ObjectTypeKernel; }
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Templated find function to retrieve the right value based on string
|
||||
template <typename V, typename T, size_t N>
|
||||
static V FindValue(const T (&structure)[N], const std::string& name);
|
||||
@@ -417,8 +416,7 @@ class Kernel : public RuntimeObject {
|
||||
static const KernelFieldMapV3Type kKernelFieldMapV3[];
|
||||
static const ArgValueKindV3Type kArgValueKindV3[];
|
||||
static const ArgFieldMapV3Type kArgFieldMapV3[];
|
||||
#endif
|
||||
}; // defined(USE_COMGR_LIBRARY)
|
||||
};
|
||||
|
||||
|
||||
/*! @}
|
||||
|
||||
@@ -20,8 +20,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
|
||||
// Static values initialization from class Kernel.
|
||||
const amd::Kernel::ArgFieldMapType amd::Kernel::kArgFieldMap[] = {
|
||||
{"Name", ArgField::Name},
|
||||
@@ -189,5 +187,3 @@ cl_int amd::Kernel::FindValue(const T (&structure)[N], const std::string& name)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
@@ -23,11 +23,6 @@
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/context.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
#include "utils/libUtils.h"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "hsailctx.hpp"
|
||||
#endif
|
||||
|
||||
#include <cstdlib> // for malloc
|
||||
#include <cstring> // for strcmp
|
||||
@@ -38,21 +33,6 @@
|
||||
|
||||
namespace amd {
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static aclTargetInfo* aclutGetTargetInfo(aclBinary* binary) {
|
||||
aclTargetInfo* tgt = NULL;
|
||||
if (binary->struct_size == sizeof(aclBinary_0_8)) {
|
||||
tgt = &reinterpret_cast<aclBinary_0_8*>(binary)->target;
|
||||
} else if (binary->struct_size == sizeof(aclBinary_0_8_1)) {
|
||||
tgt = &reinterpret_cast<aclBinary_0_8_1*>(binary)->target;
|
||||
} else {
|
||||
assert(!"Binary format not supported!");
|
||||
tgt = &binary->target;
|
||||
}
|
||||
return tgt;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void remove_g_option(std::string& option) {
|
||||
// Remove " -g " option from application.
|
||||
// People can still add -g in AMD_OCL_BUILD_OPTIONS_APPEND, if it is so desired.
|
||||
@@ -114,16 +94,7 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng
|
||||
amd::option::Options* options, const amd::Program* same_prog,
|
||||
amd::Os::FileDesc fdesc, size_t foffset, std::string uri) {
|
||||
if (image != NULL && !amd::Elf::isElfMagic((const char*)image)) {
|
||||
if (device.settings().useLightning_) {
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
else if (!amd::Hsail::ValidateBinaryImage(
|
||||
image, length,
|
||||
language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)) {
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
#endif // !defined(WITH_COMPILER_LIB)
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
|
||||
// Check if the device is already associated with this program
|
||||
@@ -138,43 +109,11 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
bool emptyOptions = (options == nullptr);
|
||||
#endif
|
||||
amd::option::Options emptyOpts;
|
||||
if (options == NULL) {
|
||||
options = &emptyOpts;
|
||||
}
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
if (image != NULL && length != 0 &&
|
||||
amd::Hsail::ValidateBinaryImage(image, length, BINARY_TYPE_ELF)) {
|
||||
acl_error errorCode;
|
||||
aclBinary* binary = amd::Hsail::ReadFromMem(image, length, &errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
||||
assert(symbol && "symbol not found");
|
||||
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
||||
size_t symSize = 0;
|
||||
const void* opts = amd::Hsail::ExtractSymbol(device.binCompiler(), binary, &symSize, aclCOMMENT,
|
||||
symName.c_str(), &errorCode);
|
||||
// if we have options from binary and input options was not specified
|
||||
if (opts != NULL && emptyOptions) {
|
||||
std::string sBinOptions = std::string((char*)opts, symSize);
|
||||
if (!amd::option::parseAllOptions(sBinOptions, *options, false, false)) {
|
||||
programLog_ = options->optionsLog();
|
||||
LogError("Parsing compilation options from binary failed.");
|
||||
return CL_INVALID_COMPILER_OPTIONS;
|
||||
}
|
||||
}
|
||||
options->oVariables->Legacy = !device.settings().useLightning_
|
||||
? isAMDILTarget(*amd::aclutGetTargetInfo(binary))
|
||||
: isHSAILTarget(*amd::aclutGetTargetInfo(binary));
|
||||
amd::Hsail::BinaryFini(binary);
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
options->oVariables->BinaryIsSpirv = language_ == SPIRV;
|
||||
device::Program* program = rootDev.createProgram(*this, options);
|
||||
if (program == NULL) {
|
||||
@@ -272,8 +211,7 @@ int32_t Program::compile(const std::vector<Device*>& devices, size_t numHeaders,
|
||||
for (const auto& it : devices) {
|
||||
option::Options parsedOptions;
|
||||
constexpr bool LinkOptsOnly = false;
|
||||
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
|
||||
it->settings().useLightning_)) {
|
||||
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
|
||||
programLog_ = parsedOptions.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return CL_INVALID_COMPILER_OPTIONS;
|
||||
@@ -345,8 +283,7 @@ int32_t Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
for (const auto& it : devices) {
|
||||
option::Options parsedOptions;
|
||||
constexpr bool LinkOptsOnly = true;
|
||||
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
|
||||
it->settings().useLightning_)) {
|
||||
if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
|
||||
programLog_ = parsedOptions.optionsLog();
|
||||
LogError("Parsing link options failed.");
|
||||
return CL_INVALID_LINKER_OPTIONS;
|
||||
@@ -366,29 +303,6 @@ int32_t Program::link(const std::vector<Device*>& devices, size_t numInputs,
|
||||
continue;
|
||||
}
|
||||
inputDevPrograms[i] = findIt->second;
|
||||
// Check the binary's target for the first found device program.
|
||||
// TODO: Revise these binary's target checks
|
||||
// and possibly remove them after switching to HSAIL by default.
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
device::Program::binary_t binary = inputDevPrograms[i]->binary();
|
||||
if (!found && binary.first != NULL && binary.second > 0 &&
|
||||
amd::Hsail::ValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) {
|
||||
acl_error errorCode = ACL_SUCCESS;
|
||||
void* mem = const_cast<void*>(binary.first);
|
||||
aclBinary* aclBin = amd::Hsail::ReadFromMem(mem, binary.second, &errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
LogWarning("Error while linking: Could not read from raw binary.");
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
if (isHSAILTarget(*amd::aclutGetTargetInfo(aclBin))) {
|
||||
parsedOptions.oVariables->Frontend = "clang";
|
||||
parsedOptions.oVariables->Legacy = it->settings().useLightning_;
|
||||
} else if (isAMDILTarget(*amd::aclutGetTargetInfo(aclBin))) {
|
||||
parsedOptions.oVariables->Frontend = "edg";
|
||||
}
|
||||
amd::Hsail::BinaryFini(aclBin);
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
found = true;
|
||||
}
|
||||
if (inputDevPrograms.size() == 0) {
|
||||
@@ -522,8 +436,7 @@ int32_t Program::build(const std::vector<Device*>& devices, const char* options,
|
||||
for (const auto& it : devices) {
|
||||
option::Options parsedOptions;
|
||||
constexpr bool LinkOptsOnly = false;
|
||||
if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly,
|
||||
it->settings().useLightning_)) {
|
||||
if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) {
|
||||
programLog_ = parsedOptions.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return CL_INVALID_COMPILER_OPTIONS;
|
||||
@@ -677,7 +590,7 @@ int Program::GetOclCVersion(const char* clVer) {
|
||||
}
|
||||
|
||||
bool Program::ParseAllOptions(const std::string& options, option::Options& parsedOptions,
|
||||
bool optionChangable, bool linkOptsOnly, bool isLC) {
|
||||
bool optionChangable, bool linkOptsOnly) {
|
||||
std::string allOpts = options;
|
||||
if (optionChangable) {
|
||||
if (linkOptsOnly) {
|
||||
@@ -704,7 +617,7 @@ bool Program::ParseAllOptions(const std::string& options, option::Options& parse
|
||||
}
|
||||
}
|
||||
}
|
||||
return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly, isLC);
|
||||
return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly);
|
||||
}
|
||||
|
||||
bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func) {
|
||||
|
||||
@@ -225,7 +225,7 @@ class Program : public RuntimeObject {
|
||||
static int GetOclCVersion(const char* clVer);
|
||||
|
||||
bool static ParseAllOptions(const std::string& options, option::Options& parsedOptions,
|
||||
bool optionChangable, bool linkOptsOnly, bool isLC);
|
||||
bool optionChangable, bool linkOptsOnly);
|
||||
|
||||
void setVarInfoCallBack(VarInfoCallback callback) { varcallback = callback; }
|
||||
|
||||
|
||||
@@ -141,8 +141,6 @@ release(uint, PAL_MALL_POLICY, 0, \
|
||||
"2 = Allocations will always be put through the MALL") \
|
||||
release(bool, GPU_ENABLE_WAVE32_MODE, true, \
|
||||
"Enables Wave32 compilation in HW if available") \
|
||||
release(bool, GPU_ENABLE_LC, true, \
|
||||
"Enables LC path") \
|
||||
release(bool, GPU_ENABLE_HW_P2P, false, \
|
||||
"Enables HW P2P path") \
|
||||
release(bool, GPU_ENABLE_COOP_GROUPS, true, \
|
||||
|
||||
在新工单中引用
屏蔽一个用户