diff --git a/projects/clr/opencl/amdocl/amdocl.def.in b/projects/clr/opencl/amdocl/amdocl.def.in index 650c3ff7fa..506c42c830 100644 --- a/projects/clr/opencl/amdocl/amdocl.def.in +++ b/projects/clr/opencl/amdocl/amdocl.def.in @@ -128,47 +128,6 @@ clGetKernelSubGroupInfo clSetDefaultDeviceCommandQueue #endif -#if !defined(WITH_LIGHTNING_COMPILER) -aclCompilerInit -aclCompilerFini -aclCompilerVersion -aclVersionSize -aclGetErrorString -aclGetArchInfo -aclGetDeviceInfo -aclGetTargetInfo -aclGetArchitecture -aclGetFamily -aclGetChip -aclBinaryInit -aclBinaryFini -aclReadFromFile -aclReadFromMem -aclWriteToFile -aclWriteToMem -aclCreateFromBinary -aclBinaryVersion -aclInsertSection -aclRemoveSection -aclExtractSection -aclInsertSymbol -aclRemoveSymbol -aclExtractSymbol -aclDbgAddArgument -aclDbgRemoveArgument -aclQueryInfo -aclCompile -aclLink -aclGetCompilerLog -aclRetrieveType -aclSetType -aclConvertType -aclDisassemble -aclInsertKernelStatistics -aclGetDeviceBinary -aclDumpBinary -#endif // !defined(WITH_LIGHTNING_COMPILER) - #if (OPENCL_MAJOR > 2) || (OPENCL_MAJOR == 2 && OPENCL_MINOR >= 1) clCreateProgramWithIL #endif diff --git a/projects/clr/opencl/amdocl/cl_device.cpp b/projects/clr/opencl/amdocl/cl_device.cpp index 0daf2196e7..a9e189d618 100644 --- a/projects/clr/opencl/amdocl/cl_device.cpp +++ b/projects/clr/opencl/amdocl/cl_device.cpp @@ -135,9 +135,6 @@ RUNTIME_ENTRY(cl_int, clGetPlatformInfo, "cl_khr_dx9_media_sharing " #endif //_WIN32 "cl_amd_event_callback " -#if defined(WITH_COMPILER_LIB) - "cl_amd_offline_devices " -#endif // defined(WITH_COMPILER_LIB) ; break; case CL_PLATFORM_ICD_SUFFIX_KHR: diff --git a/projects/clr/rocclr/cmake/ROCclr.cmake b/projects/clr/rocclr/cmake/ROCclr.cmake index 7f2ce424a6..e30e2de56f 100644 --- a/projects/clr/rocclr/cmake/ROCclr.cmake +++ b/projects/clr/rocclr/cmake/ROCclr.cmake @@ -20,15 +20,9 @@ # ROCclr abstracts the usage of multiple AMD compilers and runtimes. # It is possible to support multiple backends concurrently in the same binary. -option(ROCCLR_ENABLE_HSAIL "Enable support for HSAIL compiler" OFF) -option(ROCCLR_ENABLE_LC "Enable support for LC compiler" ON) option(ROCCLR_ENABLE_HSA "Enable support for HSA runtime" ON) option(ROCCLR_ENABLE_PAL "Enable support for PAL runtime" OFF) -if((NOT ROCCLR_ENABLE_HSAIL) AND (NOT ROCCLR_ENABLE_LC)) - message(FATAL "Support for at least one compiler needs to be enabled!") -endif() - if((NOT ROCCLR_ENABLE_HSA) AND (NOT ROCCLR_ENABLE_PAL)) message(FATAL "Support for at least one runtime needs to be enabled!") endif() @@ -68,7 +62,6 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/device.cpp ${ROCCLR_SRC_DIR}/device/devkernel.cpp ${ROCCLR_SRC_DIR}/device/devprogram.cpp - ${ROCCLR_SRC_DIR}/device/hsailctx.cpp ${ROCCLR_SRC_DIR}/elf/elf.cpp ${ROCCLR_SRC_DIR}/os/alloc.cpp ${ROCCLR_SRC_DIR}/os/os_posix.cpp @@ -142,13 +135,7 @@ if(UNIX) target_link_libraries(rocclr PUBLIC rt) endif() -if(ROCCLR_ENABLE_HSAIL) - include(ROCclrHSAIL) -endif() - -if(ROCCLR_ENABLE_LC) - include(ROCclrLC) -endif() +include(ROCclrLC) if(ROCCLR_ENABLE_HSA) include(ROCclrHSA) diff --git a/projects/clr/rocclr/cmake/ROCclrHSAIL.cmake b/projects/clr/rocclr/cmake/ROCclrHSAIL.cmake deleted file mode 100644 index 24af48413a..0000000000 --- a/projects/clr/rocclr/cmake/ROCclrHSAIL.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -target_compile_definitions(rocclr PUBLIC WITH_COMPILER_LIB HSAIL_DYN_DLL) diff --git a/projects/clr/rocclr/cmake/ROCclrLC.cmake b/projects/clr/rocclr/cmake/ROCclrLC.cmake index d8ecf4ad02..8ff08899ce 100644 --- a/projects/clr/rocclr/cmake/ROCclrLC.cmake +++ b/projects/clr/rocclr/cmake/ROCclrLC.cmake @@ -37,7 +37,7 @@ if (NOT amd_comgr_FOUND) endif() get_target_property(_amd_comgr_lib_type amd_comgr TYPE) -target_compile_definitions(rocclr PUBLIC WITH_LIGHTNING_COMPILER USE_COMGR_LIBRARY) +target_compile_definitions(rocclr PUBLIC) if(_amd_comgr_lib_type STREQUAL "SHARED_LIBRARY") target_compile_definitions(rocclr PUBLIC COMGR_DYN_DLL) endif() diff --git a/projects/clr/rocclr/compiler/lib/include/acl.h b/projects/clr/rocclr/compiler/lib/include/acl.h deleted file mode 100644 index 44163b8ea1..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/acl.h +++ /dev/null @@ -1,217 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_0_8_H_ -#define _ACL_0_8_H_ -#ifdef __cplusplus -extern "C" { -#endif -#include "aclTypes.h" -//!--------------------------------------------------------------------------!// -// Functions that deal with aclCompiler objects. -//!--------------------------------------------------------------------------!// -aclCompiler* ACL_API_ENTRY aclCompilerInit(aclCompilerOptions* opts, - acl_error* error_code) ACL_API_0_8; -acl_error ACL_API_ENTRY aclCompilerFini(aclCompiler* cl) ACL_API_0_8; -aclCLVersion ACL_API_ENTRY aclCompilerVersion(aclCompiler* cl, acl_error* error_code) ACL_API_0_8; -uint32_t ACL_API_ENTRY aclVersionSize(aclCLVersion num, acl_error* error_code) ACL_API_0_8; -const char* ACL_API_ENTRY aclGetErrorString(acl_error error_code) ACL_API_0_8; - -//!--------------------------------------------------------------------------!// -// Functions that deal with target specific information. -//!--------------------------------------------------------------------------!// -//! Returns in the names argument, if non-NULL, a pointer to each of the arch -// names that the compiler supports. If names is NULL and arch_size is -// non-NULL, returns the number of arch entries that are required. -acl_error ACL_API_ENTRY aclGetArchInfo(const char** arch_names, size_t* arch_size) ACL_API_0_8; - -//! Returns in the arch argument, if non-NULL, a pointer to each device -// name that the compiler supports. If device_size is non-NULL, -// returns the number of device entries that are used. -acl_error ACL_API_ENTRY aclGetDeviceInfo(const char* arch, const char** names, - size_t* device_size) ACL_API_0_8; - -//! Function that returns a correctly filled out aclTargetInfo structure based -// on the information passed into the kernel. -aclTargetInfo ACL_API_ENTRY aclGetTargetInfo(const char* arch, const char* device, - acl_error* error_code) ACL_API_0_8; - -//! Function that returns a correctly filled out aclTargetInfo structure based -// on the information passed into the kernel. -aclTargetInfo ACL_API_ENTRY aclGetTargetInfoFromChipID(const char* arch, const uint32_t chip_id, - acl_error* error_code) ACL_API_0_8; - -//! Function that returns a string representation of the target architecture. -const char* ACL_API_ENTRY aclGetArchitecture(const aclTargetInfo& target) ACL_API_0_8; - -//! Function that returns a string representation of the target chip options. -const uint64_t ACL_API_ENTRY aclGetChipOptions(const aclTargetInfo& target) ACL_API_0_8; - -//! Function that returns a string representation of the target family. -const char* ACL_API_ENTRY aclGetFamily(const aclTargetInfo& target) ACL_API_0_8; - -//! Function that returns a string representation of the target chip. -const char* ACL_API_ENTRY aclGetChip(const aclTargetInfo& target) ACL_API_0_8; - -//!--------------------------------------------------------------------------!// -// Functions that deal with aclBinary objects. -//!--------------------------------------------------------------------------!// -aclBinary* ACL_API_ENTRY aclBinaryInit(size_t struct_version, const aclTargetInfo* target, - const aclBinaryOptions* options, - acl_error* error_code) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclBinaryFini(aclBinary* bin) ACL_API_0_8; - -aclBinary* ACL_API_ENTRY aclReadFromFile(const char* str, acl_error* error_code) ACL_API_0_8; - -aclBinary* ACL_API_ENTRY aclReadFromMem(const void* mem, size_t size, - acl_error* error_code) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclWriteToFile(aclBinary* bin, const char* str) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclWriteToMem(aclBinary* bin, void** mem, size_t* size) ACL_API_0_8; - -aclBinary* ACL_API_ENTRY aclCreateFromBinary(const aclBinary* binary, - aclBIFVersion version) ACL_API_0_8; - -aclBIFVersion ACL_API_ENTRY aclBinaryVersion(const aclBinary* binary) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclInsertSection(aclCompiler* cl, aclBinary* binary, const void* data, - size_t data_size, aclSections id) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclInsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data, - size_t data_size, aclSections id, - const char* symbol) ACL_API_0_8; - -const void* ACL_API_ENTRY aclExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size, - aclSections id, acl_error* error_code) ACL_API_0_8; - -const void* ACL_API_ENTRY aclExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size, - aclSections id, const char* symbol, - acl_error* error_code) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclRemoveSection(aclCompiler* cl, aclBinary* binary, - aclSections id) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclRemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id, - const char* symbol) ACL_API_0_8; - -//!--------------------------------------------------------------------------!// -// Functions that deal with debug/metdata. -//!--------------------------------------------------------------------------!// -acl_error ACL_API_ENTRY aclQueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query, - const char* kernel, void* data_ptr, - size_t* ptr_size) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclDbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel, - const char* name, bool byVal) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclDbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel, - const char* name) ACL_API_0_8; - -//!--------------------------------------------------------------------------!// -// Functions that deal with various compilation phases. -//!--------------------------------------------------------------------------!// -acl_error ACL_API_ENTRY aclCompile(aclCompiler* cl, aclBinary* bin, const char* options, - aclType from, aclType to, - aclLogFunction compile_callback) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclLink(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs, - aclBinary** libs, aclType link_mode, const char* options, - aclLogFunction link_callback) ACL_API_0_8; - -const char* ACL_API_ENTRY aclGetCompilerLog(aclCompiler* cl) ACL_API_0_8; - -const void* ACL_API_ENTRY aclRetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name, - size_t* data_size, aclType type, - acl_error* error_code) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclSetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type, - const void* data, size_t size) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclConvertType(aclCompiler* cl, aclBinary* bin, const char* name, - aclType type) ACL_API_0_8; - -acl_error ACL_API_ENTRY aclDisassemble(aclCompiler* cl, aclBinary* bin, const char* kernel, - aclLogFunction disasm_callback) ACL_API_0_8; - -const void* ACL_API_ENTRY aclGetDeviceBinary(aclCompiler* cl, const aclBinary* bin, - const char* kernel, size_t* size, - acl_error* error_code) ACL_API_0_8; -//!--------------------------------------------------------------------------!// -// Functions that deal with binary image. -//!--------------------------------------------------------------------------!// -bool ACL_API_ENTRY aclValidateBinaryImage(const void* binary, size_t length, unsigned) ACL_API_0_8; -//!--------------------------------------------------------------------------!// -// Functions that deal with aclJITObjectImage objects. -//!--------------------------------------------------------------------------!// -aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCreate(aclCompiler* cl, const void* buffer, - size_t length, aclBinary* bin, - acl_error* error_code); - -aclJITObjectImage ACL_API_ENTRY aclJITObjectImageCopy(aclCompiler* cl, const void* buffer, - size_t length, acl_error* error_code); - -acl_error ACL_API_ENTRY aclJITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer); - -acl_error ACL_API_ENTRY aclJITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image); - -size_t ACL_API_ENTRY aclJITObjectImageSize(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code); - -const char* ACL_API_ENTRY aclJITObjectImageData(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code); - -size_t ACL_API_ENTRY aclJITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code); - -acl_error ACL_API_ENTRY aclJITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image, - aclJITSymbolCallback callback, void* data); - -#if defined(LEGACY_COMPLIB) -char* ACL_API_ENTRY aclJITObjectImageDisassembleKernel(aclCompiler* cl, - constAclJITObjectImage image, - const char* kernel, acl_error* error_code); -#endif - -//!--------------------------------------------------------------------------!// -// Debug functionality -//!--------------------------------------------------------------------------!// -void aclDumpBinary(const aclBinary* bin); - -//!--------------------------------------------------------------------------!// -// Functions that deal with kenel statistics. -//!--------------------------------------------------------------------------!// -void aclGetKstatsSI(const void* shader, aclKernelStats& kstats); -acl_error ACL_API_ENTRY aclInsertKernelStatistics(aclCompiler* cl, aclBinary* bin); -//! Define hardware info constants for SI and above devices -static constexpr unsigned SI_sgprs_avail = 102; -static constexpr unsigned SI_vgprs_avail = 256; -static constexpr unsigned SI_ldssize_avail = 32 * 1024; - -//!--------------------------------------------------------------------------!// -// Functions that deal with memory. -// Free memory allocated by aclWriteToMem -//!--------------------------------------------------------------------------!// -acl_error ACL_API_ENTRY aclFreeMem(aclBinary* bin, void* mem); -#ifdef __cplusplus -} -#endif -#endif // _ACL_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/include/aclDefs.h b/projects/clr/rocclr/compiler/lib/include/aclDefs.h deleted file mode 100644 index 6902de681e..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/aclDefs.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_DEFS_0_8_H_ -#define _ACL_DEFS_0_8_H_ - -#ifndef ACL_API_ENTRY -#if defined(_WIN32) || defined(__CYGWIN__) -#define ACL_API_ENTRY __stdcall -#else -#define ACL_API_ENTRY -#endif -#endif - -#ifndef ACL_API_0_8 -#define ACL_API_0_8 -#endif - -#ifndef BIF_API_2_0 -#define BIF_API_2_0 -#endif - -#ifndef BIF_API_2_1 -#define BIF_API_2_1 -#endif - -#ifndef BIF_API_3_0 -#define BIF_API_3_0 -#endif - -#ifndef MAX_HIDDEN_KERNARGS_NUM -#define MAX_HIDDEN_KERNARGS_NUM 6 -#else -#error "MAX_HIDDEN_KERNARGS_NUM is already defined" -#endif - -#endif // _ACL_DEFS_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/include/aclEnums.h b/projects/clr/rocclr/compiler/lib/include/aclEnums.h deleted file mode 100644 index 965934cf0d..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/aclEnums.h +++ /dev/null @@ -1,364 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_ENUMS_0_8_H_ -#define _ACL_ENUMS_0_8_H_ - -typedef enum _acl_error_enum_0_8 { - ACL_SUCCESS = 0, - ACL_ERROR = 1, - ACL_INVALID_ARG = 2, - ACL_OUT_OF_MEM = 3, - ACL_SYS_ERROR = 4, - ACL_UNSUPPORTED = 5, - ACL_ELF_ERROR = 6, - ACL_INVALID_FILE = 7, - ACL_INVALID_COMPILER = 8, - ACL_INVALID_TARGET = 9, - ACL_INVALID_BINARY = 10, - ACL_INVALID_OPTION = 11, - ACL_INVALID_TYPE = 12, - ACL_INVALID_SECTION = 13, - ACL_INVALID_SYMBOL = 14, - ACL_INVALID_QUERY = 15, - ACL_FRONTEND_FAILURE = 16, - ACL_INVALID_BITCODE = 17, - ACL_LINKER_ERROR = 18, - ACL_OPTIMIZER_ERROR = 19, - ACL_CODEGEN_ERROR = 20, - ACL_ISAGEN_ERROR = 21, - ACL_INVALID_SOURCE = 22, - ACL_LIBRARY_ERROR = 23, - ACL_INVALID_SPIR = 24, - ACL_LWVERIFY_FAIL = 25, - ACL_HWVERIFY_FAIL = 26, - ACL_SPIRV_LOAD_FAIL = 27, - ACL_SPIRV_SAVE_FAIL = 28, - ACL_LAST_ERROR = 29 -} acl_error_0_8; - -typedef enum _comp_device_caps_enum_0_8 { - capError = 0, - capFMA = 1, - capImageSupport = 2, - capSaveSOURCE = 3, // input source - capSaveLLVMIR = 4, // output LLVMIR from frontend - capSaveCG = 5, // output from LLVM-BE - capSaveEXE = 6, // output executable - capSaveAMDIL = 7, // Save per-kernel AMDIL - capSaveHSAIL = 8, // Save per-kernel HSAIL - capEncrypted = 9, - capSaveDISASM = 10, - capSaveAS = 11, - capSaveSPIR = 12, - capDumpLast = 13 -} compDeviceCaps_0_8; - -typedef enum _comp_opt_settings_enum_0_8 { - optO0 = 0, // No optimization setting. - optO1 = 1, - optO2 = 2, - optO3 = 3, - optO4 = 4, - optOs = 5, - optError = 6, // Invalid optimization set - optLast = 7 -} compOptSettings_0_8; - -#define FLAG_SHIFT_VALUE 5 -#define FLAG_MASK_VALUE ((1 << capDumpLast) - 1) -#define FLAG_BITLOC(A) (1 << ((A) & FLAG_MASK_VALUE)) -#define FLAG_ARRAY_SIZE 4 - -//! An enumeration that defines the possible valid device types that -// can be compiled for. -typedef enum _acl_dev_type_enum_0_8 { - aclError = 0, // aclDevType of 0 is an error. - aclX86 = 1, // Targeting a 32bit X86 CPU device. - aclAMDIL = 2, // Targeting an AMDIL GPU device. - aclHSAIL = 3, // Targeting an HSAIL GPU device. - aclX64 = 4, // Targeting a 64bit X86 CPU device. - aclHSAIL64 = 5, // Targeting a 64bit HSAIL GPU device. - aclAMDIL64 = 6, // Targeting a 64bit AMDIL GPU device - aclLast = 7 -} aclDevType_0_8; - -//! Enum that represents the versions of the compiler -typedef enum _acl_cl_version_enum_0_8 { - ACL_VERSION_ERROR = 0, - ACL_VERSION_0_7 = 1, - ACL_VERSION_0_8 = 2, - ACL_VERSION_0_8_1 = 3, - ACL_VERSION_0_9 = 4, - ACL_VERSION_1_0 = 5, - ACL_VERSION_LAST = 6 -} aclCLVersion_0_8; - -//! Enum of the various aclTypes that are supported -typedef enum _acl_type_enum_0_8 { - ACL_TYPE_DEFAULT = 0, - ACL_TYPE_OPENCL = 1, - ACL_TYPE_LLVMIR_TEXT = 2, - ACL_TYPE_LLVMIR_BINARY = 3, - ACL_TYPE_SPIR_TEXT = 4, - ACL_TYPE_SPIR_BINARY = 5, - ACL_TYPE_AMDIL_TEXT = 6, - ACL_TYPE_AMDIL_BINARY = 7, - ACL_TYPE_HSAIL_TEXT = 8, - ACL_TYPE_HSAIL_BINARY = 9, - ACL_TYPE_X86_TEXT = 10, - ACL_TYPE_X86_BINARY = 11, - ACL_TYPE_CG = 12, - ACL_TYPE_SOURCE = 13, - ACL_TYPE_ISA = 14, - ACL_TYPE_HEADER = 15, - ACL_TYPE_RSLLVMIR_BINARY = 16, - ACL_TYPE_SPIRV_BINARY = 17, - ACL_TYPE_ASM_TEXT = 18, - ACL_TYPE_LAST = 19 -} aclType_0_8; - -//! Enum of the various loader types that are supported. -typedef enum _acl_loader_type_enum_0_8 { - ACL_LOADER_COMPLIB = 0, - ACL_LOADER_FRONTEND = 1, - ACL_LOADER_LINKER = 2, - ACL_LOADER_OPTIMIZER = 3, - ACL_LOADER_CODEGEN = 4, - ACL_LOADER_BACKEND = 5, - ACL_LOADER_SC = 6, - ACL_LOADER_LAST = 7 -} aclLoaderType_0_8; - -// Enumeration for the various acl versions -typedef enum _bif_version_enum_0_8 { - aclBIFVersionError = 0, // Error - aclBIFVersion20 = 1, // Version 2.0 of the OpenCL BIF - aclBIFVersion21 = 2, // Version 2.1 of the OpenCL BIF - aclBIFVersion30 = 3, // Version 3.0 of the OpenCL BIF - aclBIFVersion31 = 4, // Version 3.1 of the OpenCL BIF - aclBIFVersionLatest = aclBIFVersion31, // Most recent version of the BIF - aclBIFVersionCAL = 5, - aclBIFVersionLast = 6 -} aclBIFVersion_0_8; - -// Enumeration for the various platform types -typedef enum _bif_platform_enum_0_8 { - aclPlatformCAL = 0, // For BIF 2.0 backward compatibility - aclPlatformCPU = 1, // For BIF 2.0 backward compatibility - aclPlatformCompLib = 2, - aclPlatformLast = 3 -} aclPlatform_0_8; - -// Enumeration for the various bif sections -typedef enum _bif_sections_enum_0_8 { - aclLLVMIR = 0, - aclSOURCE = 1, - aclILTEXT = 2, // For BIF 2.0 backward compatibility - aclASTEXT = 3, // For BIF 2.0 backward compatibility - aclCAL = 4, // For BIF 2.0 backward compatibility - aclDLL = 5, // For BIF 2.0 backward compatibility - aclSTRTAB = 6, - aclSYMTAB = 7, - aclRODATA = 8, - aclSHSTRTAB = 9, - aclNOTES = 10, - aclCOMMENT = 11, - aclILDEBUG = 12, // For BIF 2.0 backward compatibility - aclDEBUG_INFO = 13, - aclDEBUG_ABBREV = 14, - aclDEBUG_LINE = 15, - aclDEBUG_PUBNAMES = 16, - aclDEBUG_PUBTYPES = 17, - aclDEBUG_LOC = 18, - aclDEBUG_ARANGES = 19, - aclDEBUG_RANGES = 20, - aclDEBUG_MACINFO = 21, - aclDEBUG_STR = 22, - aclDEBUG_FRAME = 23, - aclJITBINARY = 24, // For BIF 2.0 backward compatibility - aclCODEGEN = 25, - aclTEXT = 26, - aclINTERNAL = 27, - aclSPIR = 28, - aclHEADER = 29, - aclBRIG = 30, - aclBRIGxxx1 = 31, - aclBRIGxxx2 = 32, - aclBRIGxxx3 = 33, - aclHSADEBUG = 34, - aclKSTATS = 35, // For storing kernel statistics - aclSPIRV = 36, - aclLAST = 37 -} aclSections_0_8; - -//! An enumeration that defines what are valid queries for aclQueryInfo. -typedef enum _rt_query_types_enum_0_8 { - RT_ABI_VERSION = 0, - RT_DEVICE_NAME = 1, - RT_MEM_SIZES = 2, - RT_GPU_FUNC_CAPS = 3, - RT_GPU_FUNC_ID = 4, - RT_GPU_DEFAULT_ID = 5, - RT_WORK_GROUP_SIZE = 6, - RT_WORK_REGION_SIZE = 7, - RT_ARGUMENT_ARRAY = 8, - RT_GPU_PRINTF_ARRAY = 9, - RT_CPU_BARRIER_NAMES = 10, - RT_DEVICE_ENQUEUE = 11, - RT_KERNEL_INDEX = 12, - RT_KERNEL_NAME = 13, - RT_KERNEL_NAMES = 14, - RT_CONTAINS_LLVMIR = 15, - RT_CONTAINS_OPTIONS = 16, - RT_CONTAINS_BRIG = 17, - RT_CONTAINS_HSAIL = 18, - RT_CONTAINS_ISA = 19, - RT_CONTAINS_LOADER_MAP = 20, - RT_CONTAINS_SPIR = 21, - RT_NUM_KERNEL_HIDDEN_ARGS = 22, - RT_CONTAINS_SPIRV = 23, - RT_WAVES_PER_SIMD_HINT = 24, - RT_WORK_GROUP_SIZE_HINT = 25, - RT_VEC_TYPE_HINT = 26, - RT_LAST_TYPE = 27 -} aclQueryType_0_8; - -//! An enumeration for the various GPU capabilities -typedef enum _rt_gpu_caps_enum_0_8 { - RT_COMPILER_WRITE = 1 << 0, - RT_DATA_SECTION = 1 << 1, - RT_WGS = 1 << 2, - RT_LIMIT_WGS = 1 << 3, - RT_PACKED_REGS = 1 << 4, - RT_64BIT_ABI = 1 << 5, - RT_PRINTF = 1 << 6, - RT_ARENA_UAV = 1 << 7, - RT_LRP_MEM = 1 << 8, // Local/Region/Private Memory - RT_INDEX_TEMPS = 1 << 9, - RT_WRS = 1 << 10, - RT_GWS = 1 << 11, - RT_SWGWS = 1 << 12, - RT_GPU_CAPS_MASK = 0xFFF -} aclGPUCaps_0_8; - -//! An enumeration for the various CPU capabilities. -typedef enum _rt_cpu_caps_enum_0_8 { - RT_KERNEL_BARRIER = 1 << 0, - RT_PROGRAM_BARRIER = 1 << 1, - RT_CPU_CAPS_MASK = 0x3 -} aclCPUCaps_0_8; - -//! An enumeration that maps Resource type to index values -typedef enum _rt_gpu_resource_enum_0_8 { - RT_RES_UAV = 0, // UAV resources - RT_RES_PRI = 1, // Private resources - RT_RES_LDS = 2, // LDS resources - RT_RES_GDS = 3, // GDS resources - RT_RES_CON = 4, // Constant resources - RT_RES_LAST = 5 -} aclGPUResource_0_8; - -//! An enumeration that maps memory types to index values -typedef enum _rt_gpu_mem_sizes_enum_0_8 { - RT_MEM_HW_LOCAL = 0, - RT_MEM_SW_LOCAL = 1, - RT_MEM_HW_PRIVATE = 2, - RT_MEM_SW_PRIVATE = 3, - RT_MEM_HW_REGION = 4, - RT_MEM_SW_REGION = 5, - RT_MEM_LAST = 6 -} aclGPUMemSizes_0_8; - -// Enumerations for the various argument types. -typedef enum _acl_arg_type_enum_0_8 { - ARG_TYPE_ERROR = 0, - ARG_TYPE_SAMPLER = 1, - ARG_TYPE_IMAGE = 2, - ARG_TYPE_COUNTER = 3, - ARG_TYPE_VALUE = 4, - ARG_TYPE_POINTER = 5, - ARG_TYPE_SEMAPHORE = 6, - ARG_TYPE_QUEUE = 7, // enum for device enqueue - ARG_TYPE_LAST = 8 -} aclArgType_0_8; - -// Enumerations of the valid data types for pass by value and -// pass by pointer kernel arguments. -typedef enum _acl_data_type_enum_0_8 { - DATATYPE_ERROR = 0, - DATATYPE_i1 = 1, - DATATYPE_i8 = 2, - DATATYPE_i16 = 3, - DATATYPE_i32 = 4, - DATATYPE_i64 = 5, - DATATYPE_u8 = 6, - DATATYPE_u16 = 7, - DATATYPE_u32 = 8, - DATATYPE_u64 = 9, - DATATYPE_f16 = 10, - DATATYPE_f32 = 11, - DATATYPE_f64 = 12, - DATATYPE_f80 = 13, - DATATYPE_f128 = 14, - DATATYPE_struct = 15, - DATATYPE_union = 16, - DATATYPE_event = 17, - DATATYPE_opaque = 18, - DATATYPE_unknown = 19, - DATATYPE_LAST = 20 -} aclArgDataType_0_8; - -// Enumerations of the valid memory types for pass by pointer -// kernel arguments -typedef enum _acl_memory_type_enum_0_8 { - PTR_MT_ERROR = 0, // Error - PTR_MT_GLOBAL = 1, // global buffer - PTR_MT_SCRATCH_EMU = 2, // SW emulated private memory - PTR_MT_LDS_EMU = 3, // SW emulated local memory - PTR_MT_UAV = 4, // uniformed access vector memory - PTR_MT_CONSTANT_EMU = 5, // SW emulated constant memory - PTR_MT_GDS_EMU = 6, // SW emulated region memory - PTR_MT_LDS = 7, // HW local memory - PTR_MT_SCRATCH = 8, // HW private memory - PTR_MT_CONSTANT = 9, // HW constant memory - PTR_MT_GDS = 10, // HW region memory - PTR_MT_UAV_SCRATCH = 11, // SI and later HW private memory - PTR_MT_UAV_CONSTANT = 12, // SI and later HW constant memory - PTR_MT_LAST = 13 -} aclMemoryType_0_8; - -// Enumeration that specifies the various access types for a pointer/image. -typedef enum _acl_access_type_enum_0_8 { - ACCESS_TYPE_ERROR = 0, - ACCESS_TYPE_RO = 1, - ACCESS_TYPE_WO = 2, - ACCESS_TYPE_RW = 3, - ACCESS_TYPE_LAST = 4 -} aclAccessType_0_8; - -// Enumeration that specifies the binary types. -typedef enum _acl_binary_image_type_enum_0_8 { - BINARY_TYPE_ELF = 1, - BINARY_TYPE_LLVM = 2, - BINARY_TYPE_SPIRV = 4, -} aclBinaryImageType_0_8; - -#endif // _ACL_ENUMS_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/include/aclFunctors.h b/projects/clr/rocclr/compiler/lib/include/aclFunctors.h deleted file mode 100644 index 31b22c8d24..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/aclFunctors.h +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_FUNCTORS_0_8_H_ -#define _ACL_FUNCTORS_0_8_H_ - -//! Callback for the log function function pointer that many -// API calls take to have the calling application receive -// information on what errors occur. -typedef void (*aclLogFunction_0_8)(const char* msg, size_t size); - -typedef bool (*aclJITSymbolCallback)(const char*, const void*, void*); -typedef void* aclJITObjectImage; -typedef const void* constAclJITObjectImage; - -typedef acl_error(ACL_API_ENTRY* InsertSec_0_8)(aclCompiler* cl, aclBinary* binary, - const void* data, size_t data_size, - aclSections id) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* InsertSym_0_8)(aclCompiler* cl, aclBinary* binary, - const void* data, size_t data_size, aclSections id, - const char* symbol) ACL_API_0_8; - -typedef const void*(ACL_API_ENTRY* ExtractSec_0_8)(aclCompiler* cl, const aclBinary* binary, - size_t* size, aclSections id, - acl_error* error_code)ACL_API_0_8; - -typedef const void*(ACL_API_ENTRY* ExtractSym_0_8)(aclCompiler* cl, const aclBinary* binary, - size_t* size, aclSections id, const char* symbol, - acl_error* error_code)ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* RemoveSec_0_8)(aclCompiler* cl, aclBinary* binary, - aclSections id) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* RemoveSym_0_8)(aclCompiler* cl, aclBinary* binary, aclSections id, - const char* symbol) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* QueryInfo_0_8)(aclCompiler* cl, const aclBinary* binary, - aclQueryType query, const char* kernel, - void* data_ptr, size_t* ptr_size) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* AddDbgArg_0_8)(aclCompiler* cl, aclBinary* bin, const char* kernel, - const char* name, bool byVal) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* RemoveDbgArg_0_8)(aclCompiler* cl, aclBinary* bin, - const char* kernel, - const char* name) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* Compile_0_8)(aclCompiler* cl, aclBinary* bin, const char* options, - aclType from, aclType to, - aclLogFunction_0_8 compile_callback) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* Link_0_8)(aclCompiler* cl, aclBinary* src_bin, - unsigned int num_libs, aclBinary** libs, - aclType link_mode, const char* options, - aclLogFunction_0_8 link_callback) ACL_API_0_8; - -typedef const char*(ACL_API_ENTRY* CompLog_0_8)(aclCompiler* cl)ACL_API_0_8; - -typedef const void*(ACL_API_ENTRY* RetrieveType_0_8)(aclCompiler* cl, const aclBinary* bin, - const char* name, size_t* data_size, - aclType type, - acl_error* error_code)ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* SetType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name, - aclType type, const void* data, - size_t size) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* ConvertType_0_8)(aclCompiler* cl, aclBinary* bin, const char* name, - aclType type) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* Disassemble_0_8)(aclCompiler* cl, aclBinary* bin, - const char* kernel, - aclLogFunction_0_8 disasm_callback) ACL_API_0_8; - -typedef const void*(ACL_API_ENTRY* GetDevBinary_0_8)(aclCompiler* cl, const aclBinary* bin, - const char* kernel, size_t* size, - acl_error* error_code)ACL_API_0_8; - -typedef aclLoaderData*(ACL_API_ENTRY* LoaderInit_0_8)(aclCompiler* cl, aclBinary* bin, - aclLogFunction_0_8 callback, - acl_error* error); - -typedef acl_error(ACL_API_ENTRY* LoaderFini_0_8)(aclLoaderData* data); - -typedef aclModule*(ACL_API_ENTRY* FEToIR_0_8)(aclLoaderData* ald, const char* source, - size_t data_size, aclContext* ctx, - acl_error* error)ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* SourceToISA_0_8)(aclLoaderData* ald, const char* source, - size_t data_size) ACL_API_0_8; - -typedef aclModule*(ACL_API_ENTRY* IRPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx, - acl_error* error)ACL_API_0_8; - -typedef aclModule*(ACL_API_ENTRY* LinkPhase_0_8)(aclLoaderData* data, aclModule* ir, - unsigned int num_libs, aclModule** libs, - aclContext* ctx, acl_error* error)ACL_API_0_8; - -typedef const void*(ACL_API_ENTRY* CGPhase_0_8)(aclLoaderData* data, aclModule* ir, aclContext* ctx, - acl_error* error)ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* DisasmISA_0_8)(aclLoaderData* data, const char* kernel, - const void* isa_code, size_t isa_size) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* SetupLoaderObject_0_8)(aclCompiler* cl) ACL_API_0_8; - -typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCreate_0_8)( - const void* buffer, size_t length, aclBinary* bin, acl_error* error_code) ACL_API_0_8; - -typedef aclJITObjectImage(ACL_API_ENTRY* JITObjectImageCopy_0_8)(const void* buffer, size_t length, - acl_error* error_code) ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* JITObjectImageDestroy_0_8)(aclJITObjectImage image) ACL_API_0_8; - -typedef size_t(ACL_API_ENTRY* JITObjectImageSize_0_8)(aclJITObjectImage image, - acl_error* error_code) ACL_API_0_8; - -typedef const char*(ACL_API_ENTRY* JITObjectImageData_0_8)(aclJITObjectImage image, - acl_error* error_code)ACL_API_0_8; - -typedef acl_error(ACL_API_ENTRY* JITObjectImageFinalize_0_8)(aclJITObjectImage image) ACL_API_0_8; - -typedef size_t(ACL_API_ENTRY* JITObjectImageGetGlobalsSize_0_8)(aclJITObjectImage image, - acl_error* error_code) ACL_API_0_8; - -typedef bool (*JITSymbolCallback_0_8)(const char*, const void*, void*); - -typedef acl_error(ACL_API_ENTRY* JITObjectImageIterateSymbols_0_8)( - aclJITObjectImage image, JITSymbolCallback_0_8 jit_callback, void* data) ACL_API_0_8; - -typedef char*(ACL_API_ENTRY* JITObjectImageDisassembleKernel_0_8)(constAclJITObjectImage image, - const char* kernel, - acl_error* error_code)ACL_API_0_8; - -typedef void* (*AllocFunc_0_8)(size_t size)ACL_API_0_8; - -typedef void (*FreeFunc_0_8)(void* ptr) ACL_API_0_8; - -#endif // _ACL_FUNCTORS_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/include/aclStructs.h b/projects/clr/rocclr/compiler/lib/include/aclStructs.h deleted file mode 100644 index ccdd7460e4..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/aclStructs.h +++ /dev/null @@ -1,365 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_STRUCTS_0_8_H_ -#define _ACL_STRUCTS_0_8_H_ -#define ACL_STRUCT_HEADER size_t struct_size -//! A structure that holds information on the various types of arguments -// The format in memory of this structure is -// ------------- -// | aclArgData | -// ------------- -// |->argStr | -// ------------- -// |->typeStr | -// ------------- -typedef struct _acl_md_arg_type_0_8 { - ACL_STRUCT_HEADER; - size_t argNameSize; - size_t typeStrSize; - const char* argStr; - const char* typeStr; - union { - struct { // Struct for sampler arguments - unsigned ID; - unsigned isKernelDefined; - unsigned value; - } sampler; - struct { // Struct for image arguments - unsigned resID; - unsigned cbNum; - unsigned cbOffset; - aclAccessType type; - bool is2D; - bool is1D; - bool isArray; - bool isBuffer; - } image; - struct { // struct for atomic counter arguments - unsigned is32bit; - unsigned resID; - unsigned cbNum; - unsigned cbOffset; - } counter; - struct { // struct for semaphore arguments - unsigned resID; - unsigned cbNum; - unsigned cbOffset; - } sema; - struct { // struct for pass by value arguments - unsigned numElements; - unsigned cbNum; - unsigned cbOffset; - aclArgDataType data; - } value; - struct { // struct for pass by pointer arguments - unsigned numElements; - unsigned cbNum; - unsigned cbOffset; - unsigned bufNum; - unsigned align; - aclArgDataType data; - aclMemoryType memory; - aclAccessType type; - bool isVolatile; - bool isRestrict; - bool isPipe; - } pointer; - struct { // Struct for queue arguments - unsigned numElements; - unsigned cbNum; - unsigned cbOffset; - aclArgDataType data; - aclMemoryType memory; - } queue; - } arg; - aclArgType type; - bool isConst; -} aclArgData_0_8; - -//! A structure that holds information for printf -// The format in memory of this structure is -// -------------- -// | aclPrintfFmt| -// -------------- -// |->argSizes | -// -------------- -// |->fmrStr | -// -------------- - -typedef struct _acl_md_printf_fmt_0_8 { - ACL_STRUCT_HEADER; - unsigned ID; - size_t numSizes; - size_t fmtStrSize; - uint32_t* argSizes; - const char* fmtStr; -} aclPrintfFmt_0_8; - -//! A structure that holds the metadata in the RODATA section. -typedef struct _acl_metadata_0_8 { - ACL_STRUCT_HEADER; // This holds the size of the structure itself for versioning. - size_t data_size; // This holds the size of all the memory allocated for this structure. - uint32_t major, minor, revision; // RT_ABI_VERSION - uint32_t gpuCaps; // RT_GPU_FUNC_CAPS - uint32_t funcID; // RT_GPU_FUNC_ID - uint32_t gpuRes[5]; // RT_GPU_DEFAULT_ID - size_t wgs[3]; // RT_WORK_GROUP_SIZE - uint32_t wrs[3]; // RT_WORK_REGION_SIZE - size_t kernelNameSize; - size_t deviceNameSize; - size_t mem[6]; // RT_MEM_SIZES - size_t numArgs; - size_t numPrintf; - - aclArgData_0_8* args; // RT_ARGUMENT_ARRAY - aclPrintfFmt_0_8* printf; // RT_GPU_PRINTF_ARRAY - const char* kernelName; // RT_KERNEL_NAME - const char* deviceName; // RT_DEVICE_NAME - bool enqueue_kernel; // RT_DEVICE_ENQUEUE - uint32_t kernel_index; // RT_KERNEL_INDEX - uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS - size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT - size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT - size_t vecTypeHintSize; - const char* vth; // RT_VEC_TYPE_HINT -} aclMetadata_0_8; - -//! An structure that holds information on the capabilities of the bif device. -typedef struct _acl_device_caps_rec_0_8 { - ACL_STRUCT_HEADER; - uint32_t flags[4]; - uint32_t encryptCode; -} aclDevCaps_0_8; - -//! Structure that holds information on the target that the source is -// being compiled for. -typedef struct _acl_target_info_rec_0_8 { - ACL_STRUCT_HEADER; - aclDevType arch_id; // An identifier for the architecture. - uint32_t chip_id; // A identifier for the chip. -} aclTargetInfo_0_8; - -// Structure for the version 0.8 of the structure. -typedef struct _acl_binary_opts_rec_0_8 { - ACL_STRUCT_HEADER; - uint32_t elfclass; - uint32_t bitness; - const char* temp_file; - uint32_t kernelArgAlign; -} aclBinaryOptions_0_8; - -// Structure for the version 0.8.1 of the structure. -// This versions addes in alloc/dealloc functions. -typedef struct _acl_binary_opts_rec_0_8_1 { - ACL_STRUCT_HEADER; - uint32_t elfclass; - uint32_t bitness; - const char* temp_file; - uint32_t kernelArgAlign; - AllocFunc_0_8 alloc; - FreeFunc_0_8 dealloc; -} aclBinaryOptions_0_8_1; - -//! Structure that holds the OpenCL binary information. -typedef struct _acl_bif_rec_0_8 { - ACL_STRUCT_HEADER; - aclTargetInfo_0_8 target; // Information about the target device. - aclBIF* bin; // Pointer to the acl. - aclOptions* options; // Pointer to acl options. - aclBinaryOptions_0_8 binOpts; // Pointer to the binary options. - aclDevCaps_0_8 caps; // Capabilities of the BIF. -} aclBinary_0_8; - -//! Version of the aclBinary that uses the 0_8_1 version of the aclBinaryOptions. -typedef struct _acl_bif_rec_0_8_1 { - ACL_STRUCT_HEADER; - aclTargetInfo_0_8 target; // Information about the target device. - aclBIF* bin; // Pointer to the acl. - aclOptions* options; // Pointer to acl options. - aclBinaryOptions_0_8_1 binOpts; // Pointer to the binary options. - aclDevCaps_0_8 caps; // Capabilities of the BIF. -} aclBinary_0_8_1; - -#define ACL_LOADER_COMMON \ - ACL_STRUCT_HEADER; \ - bool isBuiltin; \ - const char* libName; \ - void* handle; \ - LoaderInit init; \ - LoaderFini fini; - -// Struct that maps to the common structure between all loaders. -typedef struct _acl_common_loader_rec_0_8 { - ACL_LOADER_COMMON; -} aclCommonLoader_0_8; - -typedef struct _acl_cl_loader_rec_0_8 { - ACL_LOADER_COMMON; - Compile compile; - Link link; - CompLog getLog; - RetrieveType_0_8 retrieveType; - SetType_0_8 setType; - ConvertType_0_8 convertType; - Disassemble disassemble; - GetDevBinary_0_8 devBinary; - InsertSec insSec; - ExtractSec extSec; - RemoveSec remSec; - InsertSym insSym; - ExtractSym extSym; - RemoveSym remSym; - QueryInfo getInfo; - AddDbgArg addDbg; - RemoveDbgArg removeDbg; - SetupLoaderObject setupLoaderObject; - JITObjectImageCreate jitOICreate; - JITObjectImageCopy jitOICopy; - JITObjectImageDestroy jitOIDestroy; - JITObjectImageSize jitOISize; - JITObjectImageData jitOIData; - JITObjectImageFinalize jitOIFinalize; - JITObjectImageGetGlobalsSize jitOIGlobalSize; - JITObjectImageIterateSymbols jitOIIterateSymbols; - JITObjectImageDisassembleKernel jitOIDisassembleKernel; -} aclCLLoader_0_8; - -//! Structure that holds the required functions -// that sc exports for the SCDLL infrastructure. -typedef struct _acl_sc_loader_rec_0_8 { - ACL_LOADER_COMMON; - uint32_t /*SC_UINT32*/ sc_interface_version; - void /**SC_EXPORT_FUNCTIONS**/* scef; - // Any version specific fields go here. -} aclSCLoader_0_8; - -typedef struct _acl_fe_loader_rec_0_8 { - ACL_LOADER_COMMON; - FEToIR toIR; // Used for Source to aclModule containing LLVMIR - FEToIR toModule; // Used to convert raw SPIR/LLVM-IR to aclModule - SourceToISA toISA; // Used for Source to ISA -} aclFELoader_0_8; - -typedef struct _acl_opt_loader_rec_0_8 { - ACL_LOADER_COMMON; - IRPhase optimize; // Used for IR to IR transformation -} aclOptLoader_0_8; - -typedef struct _acl_link_loader_rec_0_8 { - ACL_LOADER_COMMON; - LinkPhase link; // Used for Linking in IR modules - IRPhase toLLVMIR; // Used for converting SPIR to LLVMIR - IRPhase toSPIR; // Used for converting LLVMIR to SPIR -} aclLinkLoader_0_8; - -typedef struct _acl_cg_loader_rec_0_8 { - ACL_LOADER_COMMON; - CGPhase codegen; // Used for converting from LLVMIR to target ASM. -} aclCGLoader_0_8; - -typedef struct _acl_be_loader_rec_0_8 { - ACL_LOADER_COMMON; - SourceToISA finalize; // Used for converting from target source to target ISA. - SourceToISA assemble; // Used for converting from target text to target binary. - DisasmISA disassemble; // Used for converting from target binary to target ISA. -} aclBELoader_0_8; - -typedef struct _acl_compiler_opts_rec_0_8 { - ACL_STRUCT_HEADER; // Size of the structure for version checking. - const char* clLib; - const char* feLib; - const char* optLib; - const char* linkLib; - const char* cgLib; - const char* beLib; - const char* scLib; -} aclCompilerOptions_0_8; - -typedef struct _acl_compiler_opts_rec_0_8_1 { - ACL_STRUCT_HEADER; // Size of the structure for version checking. - const char* clLib; - const char* feLib; - const char* optLib; - const char* linkLib; - const char* cgLib; - const char* beLib; - const char* scLib; // Name or path to the shader compiler shared library - AllocFunc alloc; - FreeFunc dealloc; -} aclCompilerOptions_0_8_1; - -//! Structure that holds the OpenCL compiler and various loaders. -typedef struct _acl_compiler_rec_0_8 { - ACL_STRUCT_HEADER; // Size of structure for version checking. - aclCLLoader clAPI; // Pointer to the compiler API. - aclFELoader feAPI; // Pointer to the FE Loader API. - aclOptLoader optAPI; // Pointer to the Opt Loader API. - aclLinkLoader linkAPI; // Pointer to the Link Loader API. - aclCGLoader cgAPI; // Pointer to the CG Loader API. - aclBELoader beAPI; // Pointer to the BE Loader API. - aclSCLoader scAPI; // Pointer to the SC Loader API. - aclCompilerOptions* opts; // The options structure for the compiler. - void* llvm_shutdown; // Pointer to the llvm shutdown object. - char* buildLog; // Pointer to the current build log. - unsigned logSize; // Size of the current build log. - aclLoaderData* apiData; // pointer to data store for the compiler API loader. -} aclCompilerHandle_0_8; - -//! Structure that holds the OpenCL compiler and various loaders. -typedef struct _acl_compiler_rec_0_8_1 { - ACL_STRUCT_HEADER; - aclCLLoader clAPI; // Pointer to the compiler API. - aclFELoader feAPI; // Pointer to the FE Loader API. - aclOptLoader optAPI; // Pointer to the Opt Loader API. - aclLinkLoader linkAPI; // Pointer to the Link Loader API. - aclCGLoader cgAPI; // Pointer to the CG Loader API. - aclBELoader beAPI; // Pointer to the BE Loader API. - aclSCLoader scAPI; // Pointer to the SC Loader API. - AllocFunc alloc; - FreeFunc dealloc; - aclCompilerOptions* opts; // The options structure for the compiler. - void* llvm_shutdown; // Pointer to the llvm shutdown object. - char* buildLog; // Pointer to the current build log. - unsigned logSize; // Size of the current build log. - aclLoaderData* apiData; // pointer to data store for the compiler API loader. -} aclCompilerHandle_0_8_1; - -//! Structure to hold kernel statistics obtained from kernel -typedef struct _acl_kernel_stats_0_8_1 { - unsigned int scratchRegs; - unsigned int scratchSize; - unsigned int availablevgprs; - unsigned int availablesgprs; - unsigned int usedvgprs; - unsigned int usedsgprs; - unsigned int availableldssize; - unsigned int usedldssize; - unsigned int availablestacksize; - unsigned int usedstacksize; - unsigned int wavefrontsize; - unsigned int wavefrontpersimd; - unsigned int threadsperworkgroup; - unsigned int reqdworkgroup_x; - unsigned int reqdworkgroup_y; - unsigned int reqdworkgroup_z; -} aclKernelStats; - -#endif // _ACL_STRUCTS_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/include/aclTypes.h b/projects/clr/rocclr/compiler/lib/include/aclTypes.h deleted file mode 100644 index 65263cdc5c..0000000000 --- a/projects/clr/rocclr/compiler/lib/include/aclTypes.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _ACL_API_TYPES_0_8_H_ -#define _ACL_API_TYPES_0_8_H_ -#include "aclDefs.h" -#include -#include - -// Typedefs that always point to the most recent versions of the objects. -typedef struct _acl_md_arg_type_0_8 aclArgData; -typedef struct _acl_md_printf_fmt_0_8 aclPrintfFmt; -typedef struct _acl_metadata_0_8 aclMetadata; -typedef struct _acl_device_caps_rec_0_8 aclDevCaps; -typedef struct _acl_target_info_rec_0_8 aclTargetInfo; -typedef struct _acl_bif_rec_0_8_1 aclBinary; -typedef struct _acl_binary_opts_rec_0_8_1 aclBinaryOptions; -typedef struct _acl_compiler_rec_0_8_1 aclCompiler; -typedef struct _acl_compiler_opts_rec_0_8_1 aclCompilerOptions; -typedef struct _acl_options_0_8* aclOptions; // Opaque pointer to amd::Options -typedef struct _acl_binary_0_8* aclBIF; // Opaque pointer to bifbase -typedef struct _acl_common_loader_rec_0_8 aclCommonLoader; -typedef struct _acl_cl_loader_rec_0_8 aclCLLoader; -typedef struct _acl_sc_loader_rec_0_8 aclSCLoader; -typedef struct _acl_fe_loader_rec_0_8 aclFELoader; -typedef struct _acl_link_loader_rec_0_8 aclLinkLoader; -typedef struct _acl_opt_loader_rec_0_8 aclOptLoader; -typedef struct _acl_cg_loader_rec_0_8 aclCGLoader; -typedef struct _acl_be_loader_rec_0_8 aclBELoader; -typedef struct _acl_llvm_module_0_8* aclModule; // Opaque pointer to llvm::Module -typedef struct _acl_llvm_context_0_8* aclContext; // Opaque pointer to llvm::Context -typedef struct _acl_loader_data_0_8* aclLoaderData; // Opaque pointer to loader data - -#include "aclEnums.h" -// Typedefs for enumerations -typedef enum _acl_error_enum_0_8 acl_error; -typedef enum _comp_device_caps_enum_0_8 compDeviceCaps; -typedef enum _comp_opt_settings_enum_0_8 compOptSettings; -typedef enum _acl_dev_type_enum_0_8 aclDevType; -typedef enum _acl_cl_version_enum_0_8 aclCLVersion; -typedef enum _acl_type_enum_0_8 aclType; -typedef enum _rt_query_types_enum_0_8 aclQueryType; -typedef enum _rt_gpu_caps_enum_0_8 aclGPUCaps; -typedef enum _rt_gpu_resource_enum_0_8 aclGPUResource; -typedef enum _rt_gpu_mem_sizes_enum_0_8 aclGPUMemSizes; -typedef enum _acl_arg_type_enum_0_8 aclArgType; -typedef enum _acl_data_type_enum_0_8 aclArgDataType; -typedef enum _acl_memory_type_enum_0_8 aclMemoryType; -typedef enum _acl_access_type_enum_0_8 aclAccessType; -typedef enum _bif_version_enum_0_8 aclBIFVersion; -typedef enum _bif_platform_enum_0_8 aclPlatform; -typedef enum _bif_sections_enum_0_8 aclSections; -typedef enum _acl_loader_type_enum_0_8 aclLoaderType; -typedef enum _acl_binary_image_type_enum_0_8 aclBinaryImageType; - -#include "aclFunctors.h" -// Typedefs for function pointers -typedef aclLogFunction_0_8 aclLogFunction; -typedef InsertSec_0_8 InsertSec; -typedef RemoveSec_0_8 RemoveSec; -typedef ExtractSec_0_8 ExtractSec; -typedef InsertSym_0_8 InsertSym; -typedef RemoveSym_0_8 RemoveSym; -typedef ExtractSym_0_8 ExtractSym; -typedef QueryInfo_0_8 QueryInfo; -typedef Compile_0_8 Compile; -typedef Link_0_8 Link; -typedef AddDbgArg_0_8 AddDbgArg; -typedef RemoveDbgArg_0_8 RemoveDbgArg; -typedef SetupLoaderObject_0_8 SetupLoaderObject; -typedef CompLog_0_8 CompLog; -typedef RetrieveType_0_8 RetrieveType; -typedef SetType_0_8 SetType; -typedef ConvertType_0_8 ConvertType; -typedef Disassemble_0_8 Disassemble; -typedef GetDevBinary_0_8 GetDevBinary; -typedef LoaderInit_0_8 LoaderInit; -typedef LoaderFini_0_8 LoaderFini; -typedef FEToIR_0_8 FEToIR; -typedef SourceToISA_0_8 SourceToISA; -typedef IRPhase_0_8 IRPhase; -typedef LinkPhase_0_8 LinkPhase; -typedef CGPhase_0_8 CGPhase; -typedef DisasmISA_0_8 DisasmISA; -typedef AllocFunc_0_8 AllocFunc; -typedef FreeFunc_0_8 FreeFunc; -typedef JITObjectImageCreate_0_8 JITObjectImageCreate; -typedef JITObjectImageCopy_0_8 JITObjectImageCopy; -typedef JITObjectImageDestroy_0_8 JITObjectImageDestroy; -typedef JITObjectImageSize_0_8 JITObjectImageSize; -typedef JITObjectImageData_0_8 JITObjectImageData; -typedef JITObjectImageFinalize_0_8 JITObjectImageFinalize; -typedef JITObjectImageGetGlobalsSize_0_8 JITObjectImageGetGlobalsSize; -typedef JITSymbolCallback_0_8 JITSymbolCallback; -typedef JITObjectImageIterateSymbols_0_8 JITObjectImageIterateSymbols; -typedef JITObjectImageDisassembleKernel_0_8 JITObjectImageDisassembleKernel; - -#include "aclStructs.h" - -#endif // _CL_API_TYPES_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/spirv/spirvUtils.h b/projects/clr/rocclr/compiler/lib/spirv/spirvUtils.h deleted file mode 100644 index 5ceaf1b7d6..0000000000 --- a/projects/clr/rocclr/compiler/lib/spirv/spirvUtils.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _COMPLIB_SPIRV_UTILS_H -#define _COMPLIB_SPIRV_UTILS_H - -#include - -bool validateSPIRV(const void* image, size_t length); -bool isSPIRVMagic(const void* image, size_t length); - -#endif diff --git a/projects/clr/rocclr/compiler/lib/utils/bif_section_labels.hpp b/projects/clr/rocclr/compiler/lib/utils/bif_section_labels.hpp deleted file mode 100644 index e31ef61b28..0000000000 --- a/projects/clr/rocclr/compiler/lib/utils/bif_section_labels.hpp +++ /dev/null @@ -1,230 +0,0 @@ -/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _CL_UTILS_BIF_SECTION_LABELS_HPP_ -#define _CL_UTILS_BIF_SECTION_LABELS_HPP_ -#ifdef __cplusplus -extern "C" { -#endif -namespace bif { -const unsigned PRE = 0; -const unsigned POST = 1; -} // namespace bif - -typedef enum { - symOpenclCompilerOptions, - symAMDILCompilerOptions, - symHSACompilerOptions, - symOpenclLinkerOptions, - symOpenclMeta, - symOpenclKernel, - symOpenclStub, - symOpenclGlobal, - symISAMeta, - symISABinary, - symAMDILText, - symAMDILBinary, - symHSAILText, - symBRIG, - symAMDILFMeta, - symISAText, - symBRIGxxx1, - symBRIGxxx2, - symBRIGxxx3, - symX86Barrier, - symAMDILHeader, - symDebugInfo, - symDebugilText, - symDebugilBinary, - symAsmText, - symDLL, - symLast, - symKernelStats, - symBRIGLoaderMap -} oclBIFSymbolID; - -struct oclBIFSymbolStruct { - oclBIFSymbolID id; - // pre/post fix of the symbol string - const char* str[2]; - // the BIF section that the symbol is stored for GPU/CPU - aclSections sections[2]; -}; - -// TODO: analyze the changes since 30 and remove unused anymore symbols, -// for example, symISAMeta, update convert functions, check backward compatibility. -// These are the symbols that are defined by the BIF 3.1 spec -static constexpr oclBIFSymbolStruct BIF31[28] = { - // 0: BIF 3.0 compiler options, .comment section via library support. - {symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}}, - // 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil. - {symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}}, - // 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail. - {symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}}, - // 3: BIF 3.0 linker options, .comment section via library support. - {symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}}, - // 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU, - // .rodata section via -fbin-exe for GPU - {symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}}, - // 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg. - {symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}}, - // 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg. - {symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}}, - // 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe. - {symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}}, - // 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe. - {symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}}, - // 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe. - {symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}}, - // 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil. - {symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}}, - // 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil. - {symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}}, - // 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail. - {symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}}, - // 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail. - {symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}}, - // 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil. - {symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}}, - // 15: BIF 3.0 per kernel ISA text, .internal section via disassembly. - {symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}}, - // 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig. - {symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}}, - // 17: Unused after changes in HSAIL PRM - {symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}}, - // 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig. - {symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}}, - // 19: BIF 3.0 per kernel barrier metadata, only valid for X86. - {symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}}, - // 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x) - {symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}}, - // 21: BIF 3.0 HSA BRIG or ISA debug info - {symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}}, - // 22: BIF 3.0 debugil text, .internal section via -g - {symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}}, - // 23: BIF 3.0 debugil binary, .internal section, can be converted from - // __debugil_text - {symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}}, - {symAsmText, {"", ""}, {aclLAST, aclCODEGEN}}, - {symDLL, {"", ""}, {aclLAST, aclTEXT}}, - // 26: BIF 3.0 HSAIL kernel statistics - {symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}}, - // 27: BIF 3.0 BRIG loader map - {symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}}, -}; // BIF31 - -// These are the symbols that are defined by the BIF 3.0 spec -static constexpr oclBIFSymbolStruct BIF30[28] = { - // 0: BIF 3.0 compiler options, .comment section via library support. - {symOpenclCompilerOptions, {"__OpenCL_", "compiler_options"}, {aclCOMMENT, aclCOMMENT}}, - // 1: BIF 3.0 AMDIL compile options, .comment section via -fbin-amdil. - {symAMDILCompilerOptions, {"__AMDIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}}, - // 2: BIF 3.0 HSAIL compile options, .comment section via -fbin-hsail. - {symHSACompilerOptions, {"__HSAIL_", "_compiler_options"}, {aclCOMMENT, aclLAST}}, - // 3: BIF 3.0 linker options, .comment section via library support. - {symOpenclLinkerOptions, {"__OpenCL_", "linker_options"}, {aclCOMMENT, aclCOMMENT}}, - // 4: BIF 3.0 per kernel metadata, .cg section via -fbin-cg for CPU, - // .rodata section via -fbin-exe for GPU - {symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclCODEGEN}}, - // 5: BIF 3.0 per kernel text(x86 only), .cg section via -fbin-cg. - {symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclCODEGEN}}, - // 6: BIF 3.0 per kernel stub(x86 only), .cg section via -fbin-cg. - {symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclCODEGEN}}, - // 7: BIF 3.0 per constant buffer data, .rodata section via -fbin-exe. - {symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclRODATA}}, - // 8: BIF 3.0 per kernel ISA metadata, .rodata section via -fbin-exe. - {symISAMeta, {"__ISA_", "_metadata"}, {aclRODATA, aclLAST}}, - // 9: BIF 3.0 per kernel ISA, .text section via -fbin-exe. - {symISABinary, {"__ISA_", "_binary"}, {aclTEXT, aclLAST}}, - // 10: BIF 3.0 per kernel AMDIL source, .internal section via -fbin-amdil. - {symAMDILText, {"__AMDIL_", "_text"}, {aclINTERNAL, aclLAST}}, - // 11: BIF 3.0 per kernel AMDIL binary, .internal section via -fbin-amdil. - {symAMDILBinary, {"__AMDIL_", "_binary"}, {aclINTERNAL, aclLAST}}, - // 12: BIF 3.0 per kernel HSAIL source, .internal section via -fbin-hsail. - {symHSAILText, {"__HSAIL_", "_text"}, {aclCODEGEN, aclLAST}}, - // 13: BIF 3.0 per kernel HSAIL binary, .internal section via -fbin-hsail. - {symBRIG, {"__BRIG__", ""}, {aclBRIG, aclLAST}}, - // 14: BIF 3.0 per function metadata, .internal section via -fbin-amdil. - {symAMDILFMeta, {"__AMDIL_", "_fmetadata"}, {aclINTERNAL, aclLAST}}, - // 15: BIF 3.0 per kernel ISA text, .internal section via disassembly. - {symISAText, {"__ISA_", "_text"}, {aclINTERNAL, aclLAST}}, - // 16: BIF 3.0 BRIG operands declarations, .brig section via -fbin-brig. - {symBRIGxxx1, {"", ""}, {aclLAST, aclLAST}}, - // 17: Unused after changes in HSAIL PRM - {symBRIGxxx2, {"", ""}, {aclLAST, aclLAST}}, - // 18: BIF 3.0 BRIG strtab declarations, .brig section via -fbin-brig. - {symBRIGxxx3, {"", ""}, {aclLAST, aclLAST}}, - // 19: BIF 3.0 per kernel barrier metadata, only valid for X86. - {symX86Barrier, {"__X86_", "_barrier"}, {aclLAST, aclLAST}}, - // 20: BIF 3.0 per kernel header, .internal section via -fbin-amdil.(Legacy from bif2.x) - {symAMDILHeader, {"__AMDIL_", "_header"}, {aclINTERNAL, aclLAST}}, - // 21: BIF 3.0 HSA BRIG or ISA debug info - {symDebugInfo, {"__debug_brig__", "__debug_isa__"}, {aclHSADEBUG, aclLAST}}, - // 22: BIF 3.0 debugil text, .internal section via -g - {symDebugilText, {"__debugil_text", ""}, {aclINTERNAL, aclLAST}}, - // 23: BIF 3.0 debugil binary, .internal section, can be converted from - // __debugil_text - {symDebugilBinary, {"__debugil_binary", ""}, {aclINTERNAL, aclLAST}}, - {symAsmText, {"", ""}, {aclLAST, aclCODEGEN}}, - {symDLL, {"", ""}, {aclLAST, aclTEXT}}, - // 26: BIF 3.0 HSAIL kernel statistics - {symKernelStats, {"__HSAIL_", "_kernel_statistics"}, {aclKSTATS, aclLAST}}, - // 27: BIF 3.0 BRIG loader map - {symBRIGLoaderMap, {"__Loader_Map", ""}, {aclCODEGEN, aclLAST}}, -}; // BIF30 - - -// These are the sections that are defined by the BIF 2.0 spec -static constexpr oclBIFSymbolStruct BIF20[13] = { - {symOpenclCompilerOptions, {"__OpenCL_compile_options", ""}, {aclCOMMENT, aclCOMMENT}}, - {symOpenclLinkerOptions, {"__OpenCL_linker_options", ""}, {aclCOMMENT, aclCOMMENT}}, - {symOpenclKernel, {"__OpenCL_", "_kernel"}, {aclLAST, aclDLL}}, - {symISABinary, {"__OpenCL_", "_kernel"}, {aclCAL, aclLAST}}, - {symOpenclMeta, {"__OpenCL_", "_metadata"}, {aclRODATA, aclDLL}}, - {symAMDILHeader, {"__OpenCL_", "_header"}, {aclRODATA, aclLAST}}, - {symOpenclGlobal, {"__OpenCL_", "_global"}, {aclRODATA, aclLAST}}, - {symAMDILText, {"__OpenCL_", "_amdil"}, {aclILTEXT, aclLAST}}, - {symAMDILFMeta, {"__OpenCL_", "_fmetadata"}, {aclRODATA, aclLAST}}, - {symOpenclStub, {"__OpenCL_", "_stub"}, {aclLAST, aclDLL}}, - {symDebugilText, {"", ""}, {aclILDEBUG, aclLAST}}, - {symAsmText, {"", ""}, {aclLAST, aclASTEXT}}, - {symDLL, {"", ""}, {aclLAST, aclDLL}}, -}; // BIF20 - - -inline const oclBIFSymbolStruct* findBIFSymbolStruct(const oclBIFSymbolStruct* symbols, - size_t nSymbols, oclBIFSymbolID id) { - for (size_t i = 0; i < nSymbols; ++i) { - if (id == symbols[i].id) { - return &symbols[i]; - } - } - return NULL; -} - -inline const oclBIFSymbolStruct* findBIF30SymStruct(oclBIFSymbolID id) { - size_t nBIF30Symbol = sizeof(BIF30) / sizeof(oclBIFSymbolStruct); - return findBIFSymbolStruct(BIF30, nBIF30Symbol, id); -} - -#ifdef __cplusplus -} -#endif -#endif // _CL_UTILS_BIF_SECTION_LABELS_HPP_ diff --git a/projects/clr/rocclr/compiler/lib/utils/libUtils.h b/projects/clr/rocclr/compiler/lib/utils/libUtils.h deleted file mode 100644 index 6b72062f74..0000000000 --- a/projects/clr/rocclr/compiler/lib/utils/libUtils.h +++ /dev/null @@ -1,383 +0,0 @@ -/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef _CL_LIB_UTILS_0_8_H_ -#define _CL_LIB_UTILS_0_8_H_ -#include "acl.h" -#include -#include -#include -#include -#include -#include -#include "library.hpp" -#include "utils/bif_section_labels.hpp" -#include "utils/options.hpp" -using namespace bif; - -// Utility function to set a flag in option structure -// of the aclDevCaps. -void setFlag(aclDevCaps* elf, compDeviceCaps option); - -// Utility function to flip a flag in option structure -// of the aclDevCaps. -void flipFlag(aclDevCaps* elf, compDeviceCaps option); - -// Utility function to clear a flag in option structure -// of the aclDevCaps. -void clearFlag(aclDevCaps* elf, compDeviceCaps option); - -// Utility function to check that a flag in option structure -// of the aclDevCaps is set. -bool checkFlag(aclDevCaps* elf, compDeviceCaps option); - -// Utility function to initialize and elf device capabilities -void initElfDeviceCaps(aclBinary* elf); - -// Append the string to the aclCompiler log string. -void appendLogToCL(aclCompiler* cl, const std::string& logStr); - -const char* getDeviceName(const aclTargetInfo& target); - -// Select the correct library from the target information. -amd::LibrarySelector getLibraryType(const aclTargetInfo* target); - -// get family_enum from the target information. -unsigned getFamilyEnum(const aclTargetInfo* target); - -// get chip_enum from the target information. -unsigned getChipEnum(const aclTargetInfo* target); - -// get isa type name (compute capability) from the target information. -const std::string& getIsaTypeName(const aclTargetInfo* target); - -// get isa type (compute capability) from the target information. -int getIsaType(const aclTargetInfo* target); - -// get Feature String for target. -std::string getFeatureString(const aclTargetInfo& target, amd::option::Options* OptionsObj); - -// Create a copy of an ELF and duplicate all sections/symbols -aclBinary* createELFCopy(aclBinary* src); - -// Create a BIF2.1 elf from a BIF 2.0 elf -aclBinary* convertBIF20ToBIF21(aclBinary* src); - -// Create a BIF3.0 elf from a BIF 2.0 elf -aclBinary* convertBIF20ToBIF30(aclBinary* src); - -// Create a BIF3.1 elf from a BIF 2.0 elf -aclBinary* convertBIF20ToBIF31(aclBinary* src); - -// Create a BIF2.0 elf from a BIF 2.1 elf -aclBinary* convertBIF21ToBIF20(aclBinary* src); - -// Create a BIF3.0 elf from a BIF 2.1 elf -aclBinary* convertBIF21ToBIF30(aclBinary* src); - -// Create a BIF3.1 elf from a BIF 2.1 elf -aclBinary* convertBIF21ToBIF31(aclBinary* src); - -// Create a BIF2.0 elf from a BIF 3.0 elf -aclBinary* convertBIF30ToBIF20(aclBinary* src); - -// Create a BIF2.1 elf from a BIF 3.0 elf -aclBinary* convertBIF30ToBIF21(aclBinary* src); - -// Create a BIF3.1 elf from a BIF 3.0 elf -aclBinary* convertBIF30ToBIF31(aclBinary* src); - -// Create a BIF2.0 elf from a BIF 3.1 elf -aclBinary* convertBIF31ToBIF20(aclBinary* src); - -// Create a BIF2.1 elf from a BIF 3.1 elf -aclBinary* convertBIF31ToBIF21(aclBinary* src); - -// Create a BIF3.0 elf from a BIF 3.1 elf -aclBinary* convertBIF31ToBIF30(aclBinary* src); - -// get a pointer to the aclBIF irrespective of the -// binary version. -aclBIF* aclutGetBIF(aclBinary*); - -// Get a pointer to the aclOptions irrespective of -// the binary version. -aclOptions* aclutGetOptions(aclBinary*); - -// Get a pointer to the aclBinaryOptions struct -// irrespective of the binary version. -aclBinaryOptions* aclutGetBinOpts(aclBinary*); - -// Get a pointer to the target info struct -// irrespective of the binary version. -aclTargetInfo* aclutGetTargetInfo(aclBinary*); - -// Get a pointer to the device caps -// irrespective of the binary version. -aclDevCaps* aclutGetCaps(aclBinary*); - -// Copy two binary option structures irrespective -// of the binary version and uses defaults when -// things don't match up. -void aclutCopyBinOpts(aclBinaryOptions* dst, const aclBinaryOptions* src, bool is64bit); - -// Retrieve kernel statistics from binary -// and insert to elf as symbol -acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*); - -// Returns target chip name. -std::string aclutGetCodegenName(const aclTargetInfo& tgtInfo); - -// Helper function that returns the -// allocation function from the binary. -AllocFunc aclutAlloc(const aclBinary* bin); - -// Helper function that returns the -// de-allocation function from the binary. -FreeFunc aclutFree(const aclBinary* bin); - - -// Helper function that returns the -// allocation function from the compiler. -AllocFunc aclutAlloc(const aclCompiler* bin); - -// Helper function that returns the -// de-allocation function from the compiler. -FreeFunc aclutFree(const aclCompiler* bin); - -// Helper function that returns the -// allocation function from the compiler options. -AllocFunc aclutAlloc(const aclCompilerOptions* bin); - -// Helper function that returns the -// de-allocation function from the compiler options. -FreeFunc aclutFree(const aclCompilerOptions* bin); - -inline std::vector splitSpaceSeparatedString(char* str) { - std::string s(str); - std::stringstream ss(s); - std::istream_iterator beg(ss), end; - std::vector vec(beg, end); - return vec; -} - -// Helper function that returns OpenCL mangled kernel name. -inline std::string aclutOpenclMangledKernelName(const std::string& kernel_name) { - const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclKernel); - assert(sym && "symbol not found"); - return std::string("&") + sym->str[PRE] + kernel_name + sym->str[POST]; -} - -// Helper function that returns OpenCL mangled kernel metadata symbol name. -inline std::string aclutOpenclMangledKernelMetadataName(const std::string& kernel_name) { - const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta); - assert(sym && "symbol not found"); - return sym->str[PRE] + aclutOpenclMangledKernelName(kernel_name) + sym->str[POST]; -} - -#ifdef WITH_TARGET_HSAIL -// Helper function that updates metadata for all the kernels in binary; -// the updated attribute is the number of hidden kernel arguments. -inline acl_error aclutUpdateMetadataWithHiddenKernargsNum(aclCompiler* cl, aclBinary* bin, - uint32_t num) { - if (num == MAX_HIDDEN_KERNARGS_NUM) { - return ACL_SUCCESS; - } - const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta); - assert(sym && "symbol not found"); - aclSections secID = sym->sections[0]; - size_t kernelNamesSize = 0; - acl_error error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, NULL, &kernelNamesSize); - if (error_code != ACL_SUCCESS) { - return error_code; - } - char* kernelNames = new char[kernelNamesSize]; - error_code = aclQueryInfo(cl, bin, RT_KERNEL_NAMES, NULL, kernelNames, &kernelNamesSize); - if (error_code != ACL_SUCCESS) { - delete[] kernelNames; - return error_code; - } - std::vector vKernels = splitSpaceSeparatedString(kernelNames); - delete[] kernelNames; - size_t roSize = 0; - for (auto it = vKernels.begin(); it != vKernels.end(); ++it) { - std::string symbol = aclutOpenclMangledKernelMetadataName(*it); - void* roSec = - const_cast(aclExtractSymbol(cl, bin, &roSize, secID, symbol.c_str(), &error_code)); - if (error_code != ACL_SUCCESS) { - return error_code; - } - if (!roSec || roSize == 0) { - error_code = ACL_ELF_ERROR; - return error_code; - } - aclMetadata* md = reinterpret_cast(roSec); - md->numHiddenKernelArgs = num; - error_code = aclRemoveSymbol(cl, bin, secID, symbol.c_str()); - if (error_code != ACL_SUCCESS) { - return error_code; - } - error_code = aclInsertSymbol(cl, bin, md, roSize, secID, symbol.c_str()); - if (error_code != ACL_SUCCESS) { - return error_code; - } - } - return error_code; -} -#endif - -struct _target_mappings_rec; -typedef _target_mappings_rec TargetMapping; - -// Returns the TargetMapping for the specific target device. -const TargetMapping& getTargetMapping(const aclTargetInfo& target); - -inline bool is64BitTarget(const aclTargetInfo& target) { - return (target.arch_id == aclX64 || target.arch_id == aclAMDIL64 || target.arch_id == aclHSAIL64); -} - -inline bool isCpuTarget(const aclTargetInfo& target) { - return (target.arch_id == aclX64 || target.arch_id == aclX86); -} - -inline bool isGpuTarget(const aclTargetInfo& target) { - return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64 || - target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64); -} - -inline bool isAMDILTarget(const aclTargetInfo& target) { - return (target.arch_id == aclAMDIL || target.arch_id == aclAMDIL64); -} - -inline bool isHSAILTarget(const aclTargetInfo& target) { - return (target.arch_id == aclHSAIL || target.arch_id == aclHSAIL64); -} - -const std::string& getLegacyLibName(); - -inline bool isValidTarget(const aclTargetInfo& target) { - return (target.arch_id && target.chip_id); -} - -bool isChipSupported(const aclTargetInfo& target); - -enum scId { - SC_AMDIL = 0, - SC_HSAIL = 0, - SC_LAST, -}; - -// Helper function that allocates an aligned memory. -inline void* alignedMalloc(size_t size, size_t alignment) { -#if defined(_WIN32) - return ::_aligned_malloc(size, alignment); -#else - void* ptr = NULL; - if (0 == ::posix_memalign(&ptr, alignment, size)) { - return ptr; - } - return NULL; -#endif -} - -// Helper function that frees an aligned memory. -inline void alignedFree(void* ptr) { -#if defined(_WIN32) - ::_aligned_free(ptr); -#else - free(ptr); -#endif -} - -#if defined(_WIN32) -inline void convertLongAbsFilePathIfNeeded(std::string& filename) { - if (filename.empty()) { - return; - } - std::wstring ws(filename.begin(), filename.end()); - wchar_t abs_path[_MAX_ENV]; - _wfullpath(abs_path, ws.c_str(), _MAX_ENV); - std::wstring ws_abs = std::wstring(abs_path); - if (ws_abs.size() >= _MAX_PATH) { - std::string s(ws_abs.begin(), ws_abs.end()); - filename = "\\\\?\\" + s; - } -} -#endif - -inline char* readFile(std::string source_filename, size_t& size) { -#if defined(_WIN32) - convertLongAbsFilePathIfNeeded(source_filename); -#endif - FILE* fp = ::fopen(source_filename.c_str(), "rb"); - unsigned int length; - size_t offset = 0; - char* ptr; - if (!fp) { - return NULL; - } - // obtain file size - ::fseek(fp, 0, SEEK_END); - length = ::ftell(fp); - ::rewind(fp); - ptr = reinterpret_cast(::malloc(offset + length + 1)); - if (length != fread(&ptr[offset], 1, length, fp)) { - ::free(ptr); - ::fclose(fp); - return NULL; - } - ptr[offset + length] = '\0'; - size = offset + length; - ::fclose(fp); - return ptr; -} - -inline bool writeFile(std::string source_filename, const char* source, size_t size) { -#if defined(_WIN32) - convertLongAbsFilePathIfNeeded(source_filename); -#endif - FILE* fp = ::fopen(source_filename.c_str(), "wb"); - if (!fp) { - return EXIT_FAILURE; - } - if (!::fwrite(source, size, 1, fp)) { - ::fclose(fp); - return EXIT_FAILURE; - } - ::fclose(fp); - return EXIT_SUCCESS; -} - -#if !defined(BCMAG) -#define BCMAG "BC" -#define SBCMAG 2 -#endif -// Helper predicate returns true if p starts with bit code signature. -// TODO: Move it into Compiler Lib back in new 1_0 API -inline static bool isBcMagic(const char* p) { - if (p == NULL || strncmp(p, BCMAG, SBCMAG) != 0) { - return false; - } - return true; -} - -void dump(aclBinary* bin); - -#endif // _CL_LIB_UTILS_0_8_H_ diff --git a/projects/clr/rocclr/compiler/lib/utils/options.cpp b/projects/clr/rocclr/compiler/lib/utils/options.cpp index adb8d486a6..618a52b8af 100644 --- a/projects/clr/rocclr/compiler/lib/utils/options.cpp +++ b/projects/clr/rocclr/compiler/lib/utils/options.cpp @@ -593,7 +593,7 @@ int getOptionDesc(std::string& options, size_t StartPos, bool IsShortForm, Optio } bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, bool IsPrefixOption, - bool IsOffFlag, bool IsLC) { + bool IsOffFlag) { OptionVariables* ovars = Opts.oVariables; OptionDescriptor* od = &OptDescTable[OptDescTableIx]; @@ -733,9 +733,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, Opts.clcOptions.append(" -D__FAST_RELAXED_MATH__=1"); Opts.clangOptions.push_back("-D__FAST_RELAXED_MATH__=1"); - if (IsLC) { // w/a for SWDEV-116690 - Opts.clangOptions.push_back("-cl-fast-relaxed-math"); - } + Opts.clangOptions.push_back("-cl-fast-relaxed-math"); // fall-through to handle UnsafeMathOpt case OID_UnsafeMathOpt: @@ -861,10 +859,8 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, break; case OID_OptUseNative: - if (IsLC) { - Opts.llvmOptions.append(" -mllvm -amdgpu-use-native="); - Opts.llvmOptions.append(sval); - } + Opts.llvmOptions.append(" -mllvm -amdgpu-use-native="); + Opts.llvmOptions.append(sval); break; case OID_WFComma: @@ -886,9 +882,7 @@ bool processOption(int OptDescTableIx, Options& Opts, const std::string& Value, Opts.clangOptions.push_back(sval); } else if (((OptionIdentifier)OptDescTableIx) == OID_WBComma) { Opts.llvmOptions.append(" "); - if (IsLC) { - Opts.llvmOptions.append("-mllvm "); - } + Opts.llvmOptions.append("-mllvm "); Opts.llvmOptions.append(sval); } else if (((OptionIdentifier)OptDescTableIx) == OID_WHComma) { Opts.finalizerOptions.push_back(sval); @@ -953,7 +947,7 @@ namespace amd { namespace option { -bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC) { +bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly) { Opts.origOptionStr = options; OptionVariables* ovars = Opts.oVariables; OptionDescriptor* od = OptDescTable; @@ -1084,8 +1078,7 @@ bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, boo if (!(OPTION_info(od) & OA_RUNTIME)) continue; } - if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno), - isLC)) { + if (!processOption(option_ndx, Opts, value, isPrefix_option, (isPrefix_mno || isPrefix_fno))) { // Keep the optionsLog set in processOption(). std::string tmpStr("Invalid option: "); tmpStr += options.substr(bpos, (pos == std::string::npos) ? pos : pos - bpos); diff --git a/projects/clr/rocclr/compiler/lib/utils/options.hpp b/projects/clr/rocclr/compiler/lib/utils/options.hpp index 648e98aec2..c12cc27ea7 100644 --- a/projects/clr/rocclr/compiler/lib/utils/options.hpp +++ b/projects/clr/rocclr/compiler/lib/utils/options.hpp @@ -323,9 +323,9 @@ class Options { OptionDescriptor* getOptDescTable(); bool init(); bool teardown(); -bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly, bool isLC); -inline bool parseLinkOptions(std::string& options, Options& Opts, bool isLC) { - return parseAllOptions(options, Opts, true /*linkOptsOnly*/, isLC); +bool parseAllOptions(std::string& options, Options& Opts, bool linkOptsOnly); +inline bool parseLinkOptions(std::string& options, Options& Opts) { + return parseAllOptions(options, Opts, true /*linkOptsOnly*/); } diff --git a/projects/clr/rocclr/device/comgrctx.cpp b/projects/clr/rocclr/device/comgrctx.cpp index 9c8c9922b6..bcb2e9d511 100644 --- a/projects/clr/rocclr/device/comgrctx.cpp +++ b/projects/clr/rocclr/device/comgrctx.cpp @@ -18,7 +18,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(USE_COMGR_LIBRARY) #include "os/os.hpp" #include "utils/flags.hpp" #include "comgrctx.hpp" @@ -129,4 +128,3 @@ bool Comgr::LoadLib(bool is_versioned) { } } // namespace amd -#endif diff --git a/projects/clr/rocclr/device/comgrctx.hpp b/projects/clr/rocclr/device/comgrctx.hpp index 2f44f1e63a..26950c98cd 100644 --- a/projects/clr/rocclr/device/comgrctx.hpp +++ b/projects/clr/rocclr/device/comgrctx.hpp @@ -21,7 +21,6 @@ #pragma once #include -#if defined(USE_COMGR_LIBRARY) #include "top.hpp" #include "amd_comgr/amd_comgr.h" @@ -446,4 +445,3 @@ class Comgr : public amd::AllStatic { }; } // namespace amd -#endif diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index fcab448501..5d07c2dbba 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -51,12 +51,6 @@ extern void PalDeviceUnload(); #include "blowfish/oclcrypt.hpp" #endif -#if defined(WITH_COMPILER_LIB) -#include "utils/bif_section_labels.hpp" -#include "utils/libUtils.h" -#include "spirv/spirvUtils.h" -#endif - #include #include #include @@ -641,10 +635,6 @@ bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKe // Build all kernels std::string opt = "-cl-internal-kernel "; - if (!device->settings().useLightning_) { - opt += "-Wf,--force_disable_spir "; - } - if (!extraOptions.empty()) { opt += extraOptions; } @@ -786,29 +776,10 @@ Device::~Device() { } bool Device::ValidateComgr() { -#if defined(USE_COMGR_LIBRARY) // Check if Lightning compiler was requested - if (settings_->useLightning_) { - constexpr bool kComgrVersioned = false; - std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned); - // Use Lightning only if it's available - settings_->useLightning_ = amd::Comgr::IsReady(); - return settings_->useLightning_; - } -#endif - return true; -} - -bool Device::ValidateHsail() { -#if defined(WITH_COMPILER_LIB) - // Check if HSAIL compiler was requested - if (!settings_->useLightning_) { - std::call_once(amd::Hsail::initialized, amd::Hsail::LoadLib); - // Use Hsail only if it's available - return amd::Hsail::IsReady(); - } -#endif - return true; + constexpr bool kComgrVersioned = false; + std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned); + return amd::Comgr::IsReady(); } size_t GetMaxStackSize(const std::string& procName) { @@ -1272,43 +1243,6 @@ bool ClBinary::setElfTarget() { return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM); } -#if defined(WITH_COMPILER_LIB) -std::string ClBinary::getBIFSymbol(unsigned int symbolID) const { - size_t nSymbols = 0; - // Due to PRE & POST defines in bif_section_labels.hpp conflict with - // PRE & POST struct members in sp3-si-chip-registers.h - // unable to include bif_section_labels.hpp in device.hpp - //! @todo: resolve conflict by renaming defines, - // then include bif_section_labels.hpp in device.hpp & - // use oclBIFSymbolID instead of unsigned int as a parameter - const oclBIFSymbolID symID = static_cast(symbolID); - switch (format_) { - case BIF_VERSION2: { - nSymbols = sizeof(BIF20) / sizeof(oclBIFSymbolStruct); - const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID); - assert(symb && "BIF20 symbol with symbolID not found"); - if (symb) { - return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]); - } - break; - } - case BIF_VERSION3: { - nSymbols = sizeof(BIF30) / sizeof(oclBIFSymbolStruct); - const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID); - assert(symb && "BIF30 symbol with symbolID not found"); - if (symb) { - return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]); - } - break; - } - default: - assert(0 && "unexpected BIF type"); - return ""; - } - return ""; -} -#endif - void ClBinary::init(amd::option::Options* optionsObj) { // option has higher priority than environment variable. if ((flags_ & BinarySourceMask) != BinaryRemoveSource) { @@ -1588,52 +1522,6 @@ bool ClBinary::loadLlvmBinary(std::string& llvmBinary, return false; } -bool ClBinary::loadCompileOptions(std::string& compileOptions) const { - char* options = nullptr; - size_t sz; - compileOptions.clear(); -#if defined(WITH_COMPILER_LIB) - if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), &options, - &sz)) { - if (sz > 0) { - compileOptions.append(options, sz); - } - return true; - } -#endif - return false; -} - -bool ClBinary::loadLinkOptions(std::string& linkOptions) const { - char* options = nullptr; - size_t sz; - linkOptions.clear(); -#if defined(WITH_COMPILER_LIB) - if (elfIn_->getSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), &options, - &sz)) { - if (sz > 0) { - linkOptions.append(options, sz); - } - return true; - } -#endif - return false; -} - -void ClBinary::storeCompileOptions(const std::string& compileOptions) { -#if defined(WITH_COMPILER_LIB) - elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), - compileOptions.c_str(), compileOptions.length()); -#endif -} - -void ClBinary::storeLinkOptions(const std::string& linkOptions) { -#if defined(WITH_COMPILER_LIB) - elfOut()->addSymbol(amd::Elf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), - linkOptions.c_str(), linkOptions.length()); -#endif -} - bool ClBinary::isSPIR() const { char* section = nullptr; size_t sz = 0; diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 9bb172b8e1..c569c497ec 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -34,9 +34,6 @@ #include "devprogram.hpp" #include "devkernel.hpp" #include "amdocl/cl_profile_amd.h" -#if defined(WITH_COMPILER_LIB) -#include "hsailctx.hpp" -#endif #include "devsignal.hpp" #if defined(__clang__) @@ -692,8 +689,6 @@ class Settings : public amd::HeapObject { // that replaces generic OS allocation routines uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format uint singleFpDenorm_ : 1; //!< Support Single FP Denorm - uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device - uint useLightning_ : 1; //!< Enable LC path for this device uint enableWgpMode_ : 1; //!< Enable WGP mode for this device uint enableWave32Mode_ : 1; //!< Enable Wave32 mode for this device uint lcWavefrontSize64_ : 1; //!< Enable Wave64 mode for this device @@ -705,7 +700,7 @@ class Settings : public amd::HeapObject { uint gwsInitSupported_ : 1; //!< Check if GWS is supported on this machine. uint kernel_arg_opt_ : 1; //!< Enables kernel arg optimization for blit kernels uint kernel_arg_impl_ : 2; //!< Kernel argument implementation - uint reserved_ : 12; + uint reserved_ : 14; }; uint value_; }; @@ -968,7 +963,7 @@ class Memory : public amd::HeapObject { HostMemoryRegistered = 0x00000010, //!< Host memory was registered MemoryCpuUncached = 0x00000020, //!< Memory is uncached on CPU access(slow read) AllowedPeerAccess = 0x00000040, //!< Memory can be accessed from peer - PersistentMap = 0x00000080 //!< Map Peristent memory + PersistentMap = 0x00000080 //!< Map Persistent memory }; uint flags_; //!< Memory object flags @@ -1086,22 +1081,6 @@ class ClBinary : public amd::HeapObject { amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format ) const; - //! Loads compile options from OCL binary file - bool loadCompileOptions(std::string& compileOptions //!< return the compile options loaded - ) const; - - //! Loads link options from OCL binary file - bool loadLinkOptions(std::string& linkOptions //!< return the link options loaded - ) const; - - //! Store compile options into OCL binary file - void storeCompileOptions(const std::string& compileOptions //!< the compile options to be stored - ); - - //! Store link options into OCL binary file - void storeLinkOptions(const std::string& linkOptions //!< the link options to be stored - ); - //! Check if the binary is recompilable bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform); @@ -1165,12 +1144,6 @@ class ClBinary : public amd::HeapObject { //! Returns TRUE if binary file was allocated bool isBinaryAllocated() const { return (flags_ & BinaryAllocated) ? true : false; } -#if defined(WITH_COMPILER_LIB) - //! Returns BIF symbol name by symbolID, - //! returns empty string if not found or if BIF version is unsupported - std::string getBIFSymbol(unsigned int symbolID) const; -#endif - protected: const amd::Device& dev_; //!< Device object @@ -1377,10 +1350,7 @@ class VirtualDevice : public amd::HeapObject { mutable std::atomic queued_async_handlers_ = 0; //!< Outstanding HSA async handlers }; -#if defined(USE_COMGR_LIBRARY) extern bool getValueFromIsaMeta(const std::string& isa, const char* key, std::string& retValue); -#endif - } // namespace amd::device namespace amd { @@ -1615,9 +1585,6 @@ class Isa { */ class Device : public RuntimeObject { protected: -#if defined(WITH_COMPILER_LIB) - typedef aclCompiler Compiler; -#endif public: // The structures below for MGPU launch match the device library format @@ -1692,11 +1659,6 @@ class Device : public RuntimeObject { ); }; -#if defined(WITH_COMPILER_LIB) - virtual Compiler* compiler() const = 0; - virtual Compiler* binCompiler() const { return compiler(); } -#endif - Device(); virtual ~Device(); @@ -2115,9 +2077,6 @@ class Device : public RuntimeObject { //! Checks if OCL runtime can use code object manager for compilation bool ValidateComgr(); - //! Checks if OCL runtime can use hsail for compilation - bool ValidateHsail(); - bool IpcCreate(void* dev_ptr, size_t* mem_size, char* handle, size_t* mem_offset) const; bool IpcAttach(const char* handle, size_t mem_size, size_t mem_offset, unsigned int flags, diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index e5164f246b..c031b77140 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -25,20 +25,12 @@ #include "devkernel.hpp" #include "utils/macros.hpp" #include "utils/options.hpp" -#if defined(WITH_COMPILER_LIB) -#include "utils/bif_section_labels.hpp" -#include "utils/libUtils.h" -#endif #include "comgrctx.hpp" #include #include #include -#if defined(WITH_COMPILER_LIB) -#include "hsailctx.hpp" -#endif - namespace amd::device { // ================================================================================================ @@ -51,8 +43,6 @@ static constexpr clk_value_type_t ClkValueMapType[6][6] = { {T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16}, }; -#if defined(USE_COMGR_LIBRARY) - // ================================================================================================ amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string* str) { @@ -593,7 +583,6 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k return status; } -#endif // ================================================================================================ Kernel::Kernel(const amd::Device& dev, const std::string& name, const Program& prog) @@ -674,15 +663,6 @@ bool Kernel::createSignature(const parameters_t& params, uint32_t numParameters, // ================================================================================================ Kernel::~Kernel() { delete signature_; } -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -std::string Kernel::openclMangledName(const std::string& name) { - const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel); - assert(bifSym && "symbol not found"); - return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST]; -} -#endif - // ================================================================================================ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize, amd::NDRange& lclWorkSize) const { @@ -772,300 +752,6 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize, } // ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) { - if (argInfo->argStr[0] == '_' && argInfo->argStr[1] == '.') { - *isHidden = true; - if (strcmp(&argInfo->argStr[2], "global_offset_0") == 0) { - return amd::KernelParameterDescriptor::HiddenGlobalOffsetX; - } else if (strcmp(&argInfo->argStr[2], "global_offset_1") == 0) { - return amd::KernelParameterDescriptor::HiddenGlobalOffsetY; - } else if (strcmp(&argInfo->argStr[2], "global_offset_2") == 0) { - return amd::KernelParameterDescriptor::HiddenGlobalOffsetZ; - } else if (strcmp(&argInfo->argStr[2], "printf_buffer") == 0) { - return amd::KernelParameterDescriptor::HiddenPrintfBuffer; - } else if (strcmp(&argInfo->argStr[2], "hostcall_buffer") == 0) { - return amd::KernelParameterDescriptor::HiddenHostcallBuffer; - } else if (strcmp(&argInfo->argStr[2], "vqueue_pointer") == 0) { - return amd::KernelParameterDescriptor::HiddenDefaultQueue; - } else if (strcmp(&argInfo->argStr[2], "aqlwrap_pointer") == 0) { - return amd::KernelParameterDescriptor::HiddenCompletionAction; - } - return amd::KernelParameterDescriptor::HiddenNone; - } - switch (argInfo->type) { - case ARG_TYPE_POINTER: - return amd::KernelParameterDescriptor::MemoryObject; - case ARG_TYPE_QUEUE: - return amd::KernelParameterDescriptor::QueueObject; - case ARG_TYPE_VALUE: - return (argInfo->arg.value.data == DATATYPE_struct) - ? amd::KernelParameterDescriptor::ReferenceObject - : amd::KernelParameterDescriptor::ValueObject; - case ARG_TYPE_IMAGE: - return amd::KernelParameterDescriptor::ImageObject; - case ARG_TYPE_SAMPLER: - return amd::KernelParameterDescriptor::SamplerObject; - case ARG_TYPE_ERROR: - default: - return amd::KernelParameterDescriptor::HiddenNone; - } -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) { - uint sizeType; - uint numElements; - if (argInfo->type == ARG_TYPE_QUEUE) { - return T_QUEUE; - } else if (argInfo->type == ARG_TYPE_POINTER || argInfo->type == ARG_TYPE_IMAGE) { - return T_POINTER; - } else if (argInfo->type == ARG_TYPE_VALUE) { - switch (argInfo->arg.value.data) { - case DATATYPE_i8: - case DATATYPE_u8: - sizeType = 0; - numElements = size; - break; - case DATATYPE_i16: - case DATATYPE_u16: - sizeType = 1; - numElements = size / 2; - break; - case DATATYPE_i32: - case DATATYPE_u32: - sizeType = 2; - numElements = size / 4; - break; - case DATATYPE_i64: - case DATATYPE_u64: - sizeType = 3; - numElements = size / 8; - break; - case DATATYPE_f16: - sizeType = 4; - numElements = size / 2; - break; - case DATATYPE_f32: - sizeType = 4; - numElements = size / 4; - break; - case DATATYPE_f64: - sizeType = 5; - numElements = size / 8; - break; - case DATATYPE_struct: - case DATATYPE_opaque: - case DATATYPE_ERROR: - default: - return T_VOID; - } - - switch (numElements) { - case 1: - return ClkValueMapType[sizeType][0]; - case 2: - return ClkValueMapType[sizeType][1]; - case 3: - return ClkValueMapType[sizeType][2]; - case 4: - return ClkValueMapType[sizeType][3]; - case 8: - return ClkValueMapType[sizeType][4]; - case 16: - return ClkValueMapType[sizeType][5]; - default: - return T_VOID; - } - } else if (argInfo->type == ARG_TYPE_SAMPLER) { - return T_SAMPLER; - } else { - return T_VOID; - } -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) { - switch (argInfo->type) { - case ARG_TYPE_POINTER: - return sizeof(void*); - case ARG_TYPE_VALUE: - switch (argInfo->arg.value.data) { - case DATATYPE_i8: - case DATATYPE_u8: - return 1; - case DATATYPE_u16: - case DATATYPE_i16: - case DATATYPE_f16: - return 2; - case DATATYPE_u32: - case DATATYPE_i32: - case DATATYPE_f32: - return 4; - case DATATYPE_i64: - case DATATYPE_u64: - case DATATYPE_f64: - return 8; - case DATATYPE_struct: - return 128; - case DATATYPE_ERROR: - default: - return -1; - } - case ARG_TYPE_IMAGE: - return sizeof(cl_mem); - case ARG_TYPE_SAMPLER: - return sizeof(cl_sampler); - default: - return -1; - } -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) { - if (argInfo->type == ARG_TYPE_POINTER) { - return argInfo->arg.pointer.align; - } - return 1; -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline bool GetReadOnlyOCL(const aclArgData* argInfo) { - if (argInfo->type == ARG_TYPE_POINTER) { - return (argInfo->arg.pointer.type == ACCESS_TYPE_RO) ? true : false; - } else if (argInfo->type == ARG_TYPE_IMAGE) { - return (argInfo->arg.image.type == ACCESS_TYPE_RO) ? true : false; - } - return false; -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -inline static int GetArgSizeOCL(const aclArgData* argInfo) { - switch (argInfo->type) { - case ARG_TYPE_POINTER: - return sizeof(void*); - case ARG_TYPE_VALUE: - switch (argInfo->arg.value.data) { - case DATATYPE_i8: - case DATATYPE_u8: - case DATATYPE_struct: - return 1 * argInfo->arg.value.numElements; - case DATATYPE_u16: - case DATATYPE_i16: - case DATATYPE_f16: - return 2 * argInfo->arg.value.numElements; - case DATATYPE_u32: - case DATATYPE_i32: - case DATATYPE_f32: - return 4 * argInfo->arg.value.numElements; - case DATATYPE_i64: - case DATATYPE_u64: - case DATATYPE_f64: - return 8 * argInfo->arg.value.numElements; - case DATATYPE_ERROR: - default: - return -1; - } - case ARG_TYPE_IMAGE: - case ARG_TYPE_SAMPLER: - case ARG_TYPE_QUEUE: - return sizeof(void*); - default: - return -1; - } -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) { - if (argInfo->type == ARG_TYPE_POINTER) { - switch (argInfo->arg.pointer.memory) { - case PTR_MT_UAV_CONSTANT: - case PTR_MT_CONSTANT_EMU: - case PTR_MT_CONSTANT: - return CL_KERNEL_ARG_ADDRESS_CONSTANT; - case PTR_MT_UAV: - case PTR_MT_GLOBAL: - case PTR_MT_SCRATCH_EMU: - return CL_KERNEL_ARG_ADDRESS_GLOBAL; - case PTR_MT_LDS_EMU: - case PTR_MT_LDS: - return CL_KERNEL_ARG_ADDRESS_LOCAL; - case PTR_MT_ERROR: - default: - LogError("Unsupported address type"); - return CL_KERNEL_ARG_ADDRESS_PRIVATE; - } - } else if ((argInfo->type == ARG_TYPE_IMAGE) || (argInfo->type == ARG_TYPE_QUEUE)) { - return CL_KERNEL_ARG_ADDRESS_GLOBAL; - } - - // default for all other cases - return CL_KERNEL_ARG_ADDRESS_PRIVATE; -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) { - if (argInfo->type == ARG_TYPE_IMAGE) { - switch (argInfo->arg.image.type) { - case ACCESS_TYPE_RO: - return CL_KERNEL_ARG_ACCESS_READ_ONLY; - case ACCESS_TYPE_WO: - return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; - default: - return CL_KERNEL_ARG_ACCESS_READ_WRITE; - } - } - return CL_KERNEL_ARG_ACCESS_NONE; -} -#endif - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) { - cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE; - if (argInfo->type == ARG_TYPE_POINTER) { - if (argInfo->arg.pointer.isVolatile) { - rv |= CL_KERNEL_ARG_TYPE_VOLATILE; - } - if (argInfo->arg.pointer.isRestrict) { - rv |= CL_KERNEL_ARG_TYPE_RESTRICT; - } - if (argInfo->arg.pointer.isPipe) { - rv |= CL_KERNEL_ARG_TYPE_PIPE; - } - if (argInfo->isConst) { - rv |= CL_KERNEL_ARG_TYPE_CONST; - } - switch (argInfo->arg.pointer.memory) { - case PTR_MT_CONSTANT: - case PTR_MT_UAV_CONSTANT: - case PTR_MT_CONSTANT_EMU: - rv |= CL_KERNEL_ARG_TYPE_CONST; - break; - default: - break; - } - } - return rv; -} -#endif - -// ================================================================================================ -#if defined(USE_COMGR_LIBRARY) bool Kernel::GetAttrCodePropMetadata() { amd_comgr_metadata_node_t kernelMetaNode; if (!prog().getKernelMetadata(name(), &kernelMetaNode)) { @@ -1313,89 +999,8 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) { params.insert(params.end(), hiddenParams.begin(), hiddenParams.end()); createSignature(params, numParams, amd::KernelSignature::ABIVersion_2); } -#endif // defined(USE_COMGR_LIBRARY) // ================================================================================================ -#if defined(WITH_COMPILER_LIB) -void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) { - // Iterate through the arguments and insert into parameterList - device::Kernel::parameters_t params; - device::Kernel::parameters_t hiddenParams; - amd::KernelParameterDescriptor desc; - size_t offset = 0; - size_t offsetStruct = argBufferSize; - - for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) { - size_t size = GetArgSizeOCL(aclArg); - size_t alignment = GetArgAlignmentOCL(aclArg); - bool isHidden = false; - desc.info_.oclObject_ = GetOclArgumentTypeOCL(aclArg, &isHidden); - - // Allocate the hidden arguments, but abstraction layer will skip them - if (isHidden) { - offset = amd::alignUp(offset, alignment); - desc.offset_ = offset; - desc.size_ = size; - offset += size; - hiddenParams.push_back(desc); - continue; - } - - desc.name_ = aclArg->argStr; - desc.typeName_ = aclArg->typeStr; - desc.type_ = GetOclTypeOCL(aclArg, size); - - desc.addressQualifier_ = GetOclAddrQualOCL(aclArg); - desc.accessQualifier_ = GetOclAccessQualOCL(aclArg); - desc.typeQualifier_ = GetOclTypeQualOCL(aclArg); - desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(aclArg); - desc.size_ = size; - - // Check if HSAIL expects data by reference and allocate it behind - if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) { - desc.offset_ = offsetStruct; - // Align the offset reference - offset = amd::alignUp(offset, sizeof(size_t)); - patchReferences_.insert({desc.offset_, offset}); - offsetStruct += size; - // Adjust the offset of arguments - offset += sizeof(size_t); - } else { - // These objects have forced data size to uint64_t - if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) || - (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) || - (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) { - offset = amd::alignUp(offset, sizeof(uint64_t)); - desc.offset_ = offset; - offset += sizeof(uint64_t); - } else { - offset = amd::alignUp(offset, alignment); - desc.offset_ = offset; - offset += size; - } - } - // Update read only flag - desc.info_.readOnly_ = GetReadOnlyOCL(aclArg); - - params.push_back(desc); - - if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) { - flags_.imageEna_ = true; - if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) { - flags_.imageWriteEna_ = true; - } - } - } - // Save the number of OCL arguments - uint32_t numParams = params.size(); - // Append the hidden arguments to the OCL arguments - params.insert(params.end(), hiddenParams.begin(), hiddenParams.end()); - createSignature(params, numParams, amd::KernelSignature::ABIVersion_1); -} -#endif - -// ================================================================================================ -#if defined(USE_COMGR_LIBRARY) void Kernel::InitPrintf(const std::vector& printfInfoStrings) { size_t HIPPrintfInfoID = 0; for (auto str : printfInfoStrings) { @@ -1501,76 +1106,4 @@ void Kernel::InitPrintf(const std::vector& printfInfoStrings) { // ] } } -#endif // defined(USE_COMGR_LIBRARY) - -// ================================================================================================ -#if defined(WITH_COMPILER_LIB) -void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) { - uint index = 0, HIPIndex = 0; - for (; aclPrintf->struct_size != 0; aclPrintf++) { - if (amd::IS_HIP) { - index = HIPIndex++; - printf_.resize(HIPIndex); - } else { - index = aclPrintf->ID; - if (printf_.size() <= index) { - printf_.resize(index + 1); - } - } - - PrintfInfo& info = printf_[index]; - const std::string& pfmt = aclPrintf->fmtStr; - bool need_nl = true; - for (size_t pos = 0; pos < pfmt.size(); ++pos) { - char symbol = pfmt[pos]; - need_nl = true; - if (symbol == '\\') { - switch (pfmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (pfmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl && !amd::IS_HIP) { - info.fmtString_ += "\n"; - } - uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); - for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { - info.arguments_.push_back(*tmp_ptr); - } - } -} -#endif // defined(WITH_COMPILER_LIB) } // namespace amd::device diff --git a/projects/clr/rocclr/device/devkernel.hpp b/projects/clr/rocclr/device/devkernel.hpp index c9b724af41..3640173d00 100644 --- a/projects/clr/rocclr/device/devkernel.hpp +++ b/projects/clr/rocclr/device/devkernel.hpp @@ -20,9 +20,6 @@ #pragma once -#if defined(WITH_COMPILER_LIB) -#include "aclTypes.h" -#endif #include "platform/context.hpp" #include "platform/object.hpp" #include "platform/memory.hpp" @@ -98,7 +95,6 @@ struct KernelParameterDescriptor { }; } // namespace amd -#if defined(USE_COMGR_LIBRARY) //! Runtime handle structure for device enqueue struct RuntimeHandle { uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t @@ -174,8 +170,6 @@ enum class KernelField : uint8_t { MaxSize = 18 }; -#endif // defined(USE_COMGR_LIBRARY) - namespace amd { namespace hsa { namespace loader { @@ -290,10 +284,6 @@ class Kernel : public amd::HeapObject { //! Return the build log const std::string& buildLog() const { return buildLog_; } -#if defined(WITH_COMPILER_LIB) - static std::string openclMangledName(const std::string& name); -#endif - const std::unordered_map& patch() const { return patchReferences_; } //! Returns TRUE if kernel uses dynamic parallelism @@ -356,7 +346,6 @@ class Kernel : public amd::HeapObject { protected: //! Initializes the abstraction layer kernel parameters -#if defined(USE_COMGR_LIBRARY) void InitParameters(const amd_comgr_metadata_node_t kernelMD); //! Retrieve kernel attribute and code properties metadata @@ -372,13 +361,7 @@ class Kernel : public amd::HeapObject { const uint32_t codeObjectVer() const { return prog().codeObjectVer(); } //! Initializes HSAIL Printf metadata and info for LC void InitPrintf(const std::vector& printfInfoStrings); -#endif -#if defined(WITH_COMPILER_LIB) - void InitParameters(const aclArgData* aclArg, //!< List of ACL arguments - uint32_t argBufferSize); - //! Initializes HSAIL Printf metadata and info - void InitPrintf(const aclPrintfFmt* aclPrintf); -#endif + //! Returns program associated with this kernel const Program& prog() const { return prog_; } @@ -426,7 +409,5 @@ class Kernel : public amd::HeapObject { KernelKind kind_{Normal}; //!< Kernel kind, is normal unless specified otherwise }; -#if defined(USE_COMGR_LIBRARY) amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string* str); -#endif // defined(USE_COMGR_LIBRARY) } // namespace amd::device diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp index 0ed6f064be..8a8b72effe 100644 --- a/projects/clr/rocclr/device/devprogram.cpp +++ b/projects/clr/rocclr/device/devprogram.cpp @@ -27,10 +27,6 @@ #include "devkernel.hpp" #include "utils/macros.hpp" #include "utils/options.hpp" -#if defined(WITH_COMPILER_LIB) -#include "utils/bif_section_labels.hpp" -#include "utils/libUtils.h" -#endif #include "comgrctx.hpp" #include @@ -48,18 +44,8 @@ #include #endif // defined(ATI_OS_LINUX) -#if defined(WITH_COMPILER_LIB) -#include "spirv/spirvUtils.h" -#include "hsailctx.hpp" -#endif - namespace amd::device { -// TODO: Can this be unified with the copies in: -// runtime/device/pal/palprogram.cpp, runtime/device/gpu/gpuprogram.cpp, -// compiler/lib/utils/v0_8/libUtils.h, compiler/lib/backends/gpu/hsail_be.cpp, -// compiler/legacy-lib/utils/v0_8/libUtils.h, -// and compiler/legacy-lib/backends/gpu/hsail_be.cpp ? inline static std::vector splitSpaceSeparatedString(const char* str) { std::string s(str); std::stringstream ss(s); @@ -68,11 +54,6 @@ inline static std::vector splitSpaceSeparatedString(const char* str return vec; } -#if defined(WITH_COMPILER_LIB) -// HSAIL build lock -amd::Monitor Program::buildLock_(true); -#endif - // ================================================================================================ Program::Program(amd::Device& device, amd::Program& owner) : device_(device), @@ -86,36 +67,19 @@ Program::Program(amd::Device& device, amd::Program& owner) elfSectionType_(amd::Elf::LLVMIR), compileOptions_(), linkOptions_(), -#if defined(WITH_COMPILER_LIB) - binaryElf_(nullptr), -#endif lastBuildOptionsArg_(), buildStatus_(CL_BUILD_NONE), buildError_(CL_SUCCESS), globalVariableTotalSize_(0), - programOptions_(nullptr) { -#if defined(WITH_COMPILER_LIB) - memset(&binOpts_, 0, sizeof(binOpts_)); - binOpts_.struct_size = sizeof(binOpts_); - binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; -#endif -} + programOptions_(nullptr) {} // ================================================================================================ Program::~Program() { clear(); - - if (isLC()) { -#if defined(USE_COMGR_LIBRARY) - for (auto const& kernelMeta : kernelMetadataMap_) { - amd::Comgr::destroy_metadata(kernelMeta.second); - } - amd::Comgr::destroy_metadata(metadata_); -#endif + for (auto const& kernelMeta : kernelMetadataMap_) { + amd::Comgr::destroy_metadata(kernelMeta.second); } + amd::Comgr::destroy_metadata(metadata_); } // ================================================================================================ @@ -130,19 +94,7 @@ void Program::clear() { } // ================================================================================================ -bool Program::compileImpl(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options) { - if (isLC()) { - return compileImplLC(sourceCode, headers, headerIncludeNames, options); - } else { - return compileImplHSAIL(sourceCode, headers, headerIncludeNames, options); - } -} -// ================================================================================================ - -#if defined(USE_COMGR_LIBRARY) // If buildLog is not null, and dataSet contains a log object, extract the // first log data object from dataSet and process it with // extractByteCodeBinary. @@ -549,7 +501,6 @@ bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs, return (status == AMD_COMGR_STATUS_SUCCESS); } -#endif // defined(USE_COMGR_LIBRARY) static std::size_t getOCLSourceHash(const std::string& sourceCode) { return std::hash()(sourceCode); @@ -561,10 +512,9 @@ static std::size_t getOCLOptionsHash(const amd::option::Options& options) { return std::hash()(opts); } -bool Program::compileImplLC(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options) { -#if defined(USE_COMGR_LIBRARY) +bool Program::compileImpl(const std::string& sourceCode, + const std::vector& headers, + const char** headerIncludeNames, amd::option::Options* options) { const char* xLang = options->oVariables->XLang; if (xLang != nullptr) { if (strcmp(xLang, "asm") == 0) { @@ -694,8 +644,7 @@ bool Program::compileImplLC(const std::string& sourceCode, } if (clBinary()->saveLLVMIR()) { clBinary()->elfOut()->addSection(amd::Elf::LLVMIR, llvmBinary_.data(), llvmBinary_.size()); - // store the original compile options - clBinary()->storeCompileOptions(compileOptions_); + compileOptions_.clear(); } } else { buildLog_ += "Error: Failed to compile source (from CL or HIP source to LLVM IR).\n"; @@ -703,143 +652,11 @@ bool Program::compileImplLC(const std::string& sourceCode, amd::Comgr::destroy_data_set(inputs); return ret; -#else // defined(USE_COMGR_LIBRARY) - return false; -#endif // defined(USE_COMGR_LIBRARY) -} - - -// ================================================================================================ - -#if defined(WITH_COMPILER_LIB) -static void logFunction(const char* msg, size_t size) { - std::cout << "Compiler Log: " << msg << std::endl; -} -#endif - -// ================================================================================================ -bool Program::compileImplHSAIL(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options) { -#if defined(WITH_COMPILER_LIB) - amd::ScopedLock sl(&buildLock_); - - acl_error errorCode; - aclTargetInfo target; - - const char* arch = LP64_SWITCH("hsail", "hsail64"); - const char* hsailName = device().isa().hsailName(); - if (!hsailName) { - // HSAIL compiler does not support device's ISA. - LogPrintfError("HSAIL compiler does not support %s", device().isa().targetId()); - return false; - } - target = amd::Hsail::GetTargetInfo(arch, hsailName, &errorCode); - - // end if asic info is ready - // We dump the source code for each program (param: headers) - // into their filenames (headerIncludeNames) into the TEMP - // folder specific to the OS and add the include path while - // compiling - - // Find the temp folder for the OS - std::string tempFolder = amd::Os::getTempPath(); - - // Iterate through each source code and dump it into tmp - std::fstream f; - std::vector newDirs; - for (size_t i = 0; i < headers.size(); ++i) { - std::string headerPath = tempFolder; - std::string headerIncludeName(headerIncludeNames[i]); - // replace / in path with current os's file separator - if (amd::Os::fileSeparator() != '/') { - for (auto& it : headerIncludeName) { - if (it == '/') it = amd::Os::fileSeparator(); - } - } - size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); - if (pos != std::string::npos) { - headerPath += amd::Os::fileSeparator(); - headerPath += headerIncludeName.substr(0, pos); - headerIncludeName = headerIncludeName.substr(pos + 1); - } - if (!amd::Os::pathExists(headerPath)) { - bool ret = amd::Os::createPath(headerPath); - assert(ret && "failed creating path!"); - newDirs.push_back(headerPath); - } - std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName; - f.open(headerFullName.c_str(), std::fstream::out); - // Should we allow asserts - assert(!f.fail() && "failed creating header file!"); - f.write(headers[i]->c_str(), headers[i]->length()); - f.close(); - } - - // Create Binary - binaryElf_ = amd::Hsail::BinaryInit(sizeof(aclBinary), &target, &binOpts_, &errorCode); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: aclBinary init failure\n"; - LogWarning("aclBinaryInit failed"); - return false; - } - - // Insert opencl into binary - errorCode = amd::Hsail::InsertSection(device().compiler(), binaryElf_, sourceCode.c_str(), - strlen(sourceCode.c_str()), aclSOURCE); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: Inserting openCl Source to binary\n"; - } - - // Set the options for the compiler - // Set the include path for the temp folder that contains the includes - if (!headers.empty()) { - compileOptions_.append(" -I"); - compileOptions_.append(tempFolder); - } - -#if !defined(_LP64) && defined(ATI_OS_LINUX) - if (options->origOptionStr.find("-cl-std=CL2.0") != std::string::npos) { - errorCode = ACL_UNSUPPORTED; - LogWarning("aclCompile failed"); - return false; - } -#endif - - // Compile source to IR - compileOptions_.append(ProcessOptionsFlattened(options)); - errorCode = - amd::Hsail::Compile(device().compiler(), binaryElf_, compileOptions_.c_str(), ACL_TYPE_OPENCL, - ACL_TYPE_LLVMIR_BINARY, nullptr /* logFunction */); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - if (errorCode != ACL_SUCCESS) { - LogWarning("aclCompile failed"); - buildLog_ += "Error: Compiling CL to IR\n"; - return false; - } - - clBinary()->storeCompileOptions(compileOptions_); - - // Save the binary in the interface class - saveBinaryAndSetType(TYPE_COMPILED); -#endif // defined(WITH_COMPILER_LIB) - return true; } // ================================================================================================ -bool Program::linkImpl(const std::vector& inputPrograms, - amd::option::Options* options, bool createLibrary) { - if (isLC()) { - return linkImplLC(inputPrograms, options, createLibrary); - } else { - return linkImplHSAIL(inputPrograms, options, createLibrary); - } -} - -// ================================================================================================ -bool Program::linkImplLC(const std::vector& inputPrograms, amd::option::Options* options, - bool createLibrary) { -#if defined(USE_COMGR_LIBRARY) +bool Program::linkImpl(const std::vector& inputPrograms, amd::option::Options* options, + bool createLibrary) { amd_comgr_data_set_t inputs; if (amd::Comgr::create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) { @@ -920,10 +737,6 @@ bool Program::linkImplLC(const std::vector& inputPrograms, amd::option if (clBinary()->saveLLVMIR()) { clBinary()->elfOut()->addSection(amd::Elf::LLVMIR, llvmBinary_.data(), llvmBinary_.size()); - // store the original link options - clBinary()->storeLinkOptions(linkOptions_); - // store the original compile options - clBinary()->storeCompileOptions(compileOptions_); } // skip the rest if we are building an opencl library @@ -937,114 +750,9 @@ bool Program::linkImplLC(const std::vector& inputPrograms, amd::option } return linkImpl(options); -#else // defined(USE_COMGR_LIBRARY) - return false; -#endif // defined(USE_COMGR_LIBRARY) } // ================================================================================================ -bool Program::linkImplHSAIL(const std::vector& inputPrograms, - amd::option::Options* options, bool createLibrary) { -#if defined(WITH_COMPILER_LIB) - amd::ScopedLock sl(&buildLock_); - - acl_error errorCode; - - // For each program we need to extract the LLVMIR and create - // aclBinary for each - std::vector binaries_to_link; - - for (auto program : inputPrograms) { - // Check if the program was created with clCreateProgramWIthBinary - binary_t binary = program->binary(); - if ((binary.first != nullptr) && (binary.second > 0)) { - // Binary already exists -- we can also check if there is no - // opencl source code - // Need to check if LLVMIR exists in the binary - // If LLVMIR does not exist then is it valid - // We need to pull out all the compiled kernels - // We cannot do this at present because we need at least - // Hsail text to pull the kernels oout - void* mem = const_cast(binary.first); - binaryElf_ = amd::Hsail::ReadFromMem(mem, binary.second, &errorCode); - if (errorCode != ACL_SUCCESS) { - LogWarning("Error while linking : Could not read from raw binary"); - return false; - } - } - - // At this stage each Program contains a valid binary_elf - // Check if LLVMIR is in the binary - size_t boolSize = sizeof(bool); - bool containsLLLVMIR = false; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, nullptr, - &containsLLLVMIR, &boolSize); - - if (errorCode != ACL_SUCCESS || !containsLLLVMIR) { - bool spirv = false; - size_t boolSize = sizeof(bool); - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_SPIRV, nullptr, - &spirv, &boolSize); - if (errorCode != ACL_SUCCESS) { - spirv = false; - } - if (spirv) { - errorCode = - amd::Hsail::Compile(device().compiler(), binaryElf_, options->origOptionStr.c_str(), - ACL_TYPE_SPIRV_BINARY, ACL_TYPE_LLVMIR_BINARY, nullptr); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error while linking: Could not load SPIR-V"; - return false; - } - } else { - buildLog_ += "Error while linking : Invalid binary (Missing LLVMIR section)"; - return false; - } - } - // Create a new aclBinary for each LLVMIR and save it in a list - aclBIFVersion ver = amd::Hsail::BinaryVersion(binaryElf_); - aclBinary* bin = amd::Hsail::CreateFromBinary(binaryElf_, ver); - binaries_to_link.push_back(bin); - } - - errorCode = - amd::Hsail::Link(device().compiler(), binaries_to_link[0], binaries_to_link.size() - 1, - binaries_to_link.size() > 1 ? &binaries_to_link[1] : nullptr, - ACL_TYPE_LLVMIR_BINARY, "-create-library", nullptr); - if (errorCode != ACL_SUCCESS) { - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - buildLog_ += "Error while linking : aclLink failed"; - return false; - } - // Store the newly linked aclBinary for this program. - binaryElf_ = binaries_to_link[0]; - // Free all the other aclBinaries - for (size_t i = 1; i < binaries_to_link.size(); i++) { - amd::Hsail::BinaryFini(binaries_to_link[i]); - } - if (createLibrary) { - saveBinaryAndSetType(TYPE_LIBRARY); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - return true; - } - - // Now call linkImpl with the new options - return linkImpl(options); -#else - return false; -#endif // defined(WITH_COMPILER_LIB) -} - -// ================================================================================================ -bool Program::linkImpl(amd::option::Options* options) { - if (isLC()) { - return linkImplLC(options); - } else { - return linkImplHSAIL(options); - } -} - static void dumpCodeObject(const std::string& image) { char fname[30]; static std::atomic index; @@ -1057,8 +765,7 @@ static void dumpCodeObject(const std::string& image) { } // ================================================================================================ -bool Program::linkImplLC(amd::option::Options* options) { -#if defined(USE_COMGR_LIBRARY) +bool Program::linkImpl(amd::option::Options* options) { file_type_t continueCompileFrom = FILE_TYPE_LLVMIR_BINARY; internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; @@ -1232,111 +939,6 @@ bool Program::linkImplLC(amd::option::Options* options) { setType(TYPE_EXECUTABLE); return true; -#else // defined(USE_COMGR_LIBRARY) - return false; -#endif // defined(USE_COMGR_LIBRARY) -} - - -// ================================================================================================ -bool Program::linkImplHSAIL(amd::option::Options* options) { -#if defined(WITH_COMPILER_LIB) - amd::ScopedLock sl(&buildLock_); - - acl_error errorCode; - bool finalize = true; - internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; - // If !binaryElf_ then program must have been created using clCreateProgramWithBinary - aclType continueCompileFrom = - (!binaryElf_) ? static_cast(getNextCompilationStageFromBinary(options)) - : ACL_TYPE_LLVMIR_BINARY; - - switch (continueCompileFrom) { - case ACL_TYPE_SPIRV_BINARY: - case ACL_TYPE_SPIR_BINARY: - // Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases: - // 1. if the program is not created with binary; - // 2. if the program is created with binary and contains only .llvmir & .comment - // 3. if the program is created with binary, contains .llvmir, .comment, brig sections, - // but the binary's compile & link options differ from current ones (recompilation); - case ACL_TYPE_LLVMIR_BINARY: - // Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases: - // 1. if the program is created with binary and contains only brig sections - case ACL_TYPE_HSAIL_BINARY: - // Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases: - // 1. if the program is created with binary and contains only hsail text - case ACL_TYPE_HSAIL_TEXT: { - std::string curOptions = options->origOptionStr + ProcessOptionsFlattened(options); - errorCode = amd::Hsail::Compile(device().compiler(), binaryElf_, curOptions.c_str(), - continueCompileFrom, ACL_TYPE_CG, logFunction); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error while BRIG Codegen phase: compilation error \n"; - return false; - } - break; - } - case ACL_TYPE_CG: - break; - case ACL_TYPE_ISA: - finalize = false; - break; - default: - buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n"; - return false; - } - - if (finalize) { - std::string fin_options(options->origOptionStr + ProcessOptionsFlattened(options)); - // Append an option so that we can selectively enable a SCOption on CZ - // whenever IOMMUv2 is enabled. - if (device().isFineGrainedSystem(true)) { - fin_options.append(" -sc-xnack-iommu"); - } - - if (device().settings().enableWave32Mode_) { - fin_options.append(" -force-wave-size-32"); - } - - if (device().settings().enableWgpMode_) { - fin_options.append(" -force-wgp-mode"); - } - - if (device().settings().hsailExplicitXnack_) { - fin_options.append(" -xnack"); - } - - errorCode = amd::Hsail::Compile(device().compiler(), binaryElf_, fin_options.c_str(), - ACL_TYPE_CG, ACL_TYPE_ISA, logFunction); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: BRIG finalization to ISA failed.\n"; - return false; - } - } - - size_t binSize; - void* binary = const_cast( - amd::Hsail::ExtractSection(device().compiler(), binaryElf_, &binSize, aclTEXT, &errorCode)); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: cannot extract ISA from compiled binary.\n"; - return false; - } - - // Call the device layer to setup all available kernels on the actual device - if (!createKernels(binary, binSize, options->oVariables->UniformWorkGroupSize, internal_)) { - buildLog_ += "Error: Cannot create kernel.\n"; - return false; - } - - // Save the binary in the interface class - saveBinaryAndSetType(TYPE_EXECUTABLE); - buildLog_ += amd::Hsail::GetCompilerLog(device().compiler()); - - return true; -#else - return false; -#endif // defined(WITH_COMPILER_LIB) } // ================================================================================================ @@ -1521,7 +1123,7 @@ int32_t Program::link(const std::vector& inputPrograms, const char* or buildLog_ += "Internal error: Get compile options failed."; } } else { - if (!amd::option::parseAllOptions(compileOptions_, options, false, isLC())) { + if (!amd::option::parseAllOptions(compileOptions_, options, false)) { buildStatus_ = CL_BUILD_ERROR; buildLog_ += options.optionsLog(); LogError("Parsing compile options failed."); @@ -1791,79 +1393,23 @@ int32_t Program::build(const std::string& sourceCode, const char* origOptions, return buildError(); } -// ================================================================================================ -bool Program::loadHSAIL() { -#if defined(WITH_COMPILER_LIB) - amd::ScopedLock sl(&buildLock_); - - acl_error errorCode; - size_t binSize; - void* bin = const_cast( - amd::Hsail::ExtractSection(device().compiler(), binaryElf_, &binSize, aclTEXT, &errorCode)); - if (errorCode != ACL_SUCCESS) { - LogError("Error: cannot extract ISA from compiled binary."); - return false; - } - // Call the device layer to setup all available kernels on the actual device - return setKernels(bin, binSize); -#else - return false; -#endif -} - -// ================================================================================================ -bool Program::loadLC() { -#if defined(USE_COMGR_LIBRARY) - return setKernels(const_cast(binary().first), binary().second, BinaryFd().first, - BinaryFd().second, BinaryURI()); -#else - return false; -#endif -} - // ================================================================================================ bool Program::load() { - bool ret; - if (isLC()) { - ret = loadLC(); - } else { - ret = loadHSAIL(); - } - if (ret) { - coLoaded_ = 1; - } - return ret; + coLoaded_ = setKernels(const_cast(binary().first), binary().second, BinaryFd().first, + BinaryFd().second, BinaryURI()); + return coLoaded_; } // ================================================================================================ std::vector Program::ProcessOptions(amd::option::Options* options) { std::vector optionsVec; - if (!isLC()) { - optionsVec.push_back("-D__AMD__=1"); - - std::string processorName = device().isa().processorName(); - const char* hsailName = device().isa().hsailName(); - - optionsVec.push_back(std::string("-D__") + processorName + "__=1"); - optionsVec.push_back(std::string("-D__") + processorName + "=1"); - if (hsailName && (strcmp(hsailName, processorName.c_str()) != 0)) { - optionsVec.push_back(std::string("-D__") + hsailName + "__=1"); - optionsVec.push_back(std::string("-D__") + hsailName + "=1"); - } - - // Set options for the standard device specific options - // All our devices support these options now - optionsVec.push_back("-DFP_FAST_FMAF=1"); - optionsVec.push_back("-DFP_FAST_FMA=1"); - } else { - if (!isHIP()) { - int major, minor; - ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor); - std::stringstream ss; - ss << "-D__OPENCL_VERSION__=" << (major * 100 + minor * 10); - optionsVec.push_back(ss.str()); - } + if (!isHIP()) { + int major, minor; + ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor); + std::stringstream ss; + ss << "-D__OPENCL_VERSION__=" << (major * 100 + minor * 10); + optionsVec.push_back(ss.str()); } if (!isHIP()) { @@ -1884,36 +1430,21 @@ std::vector Program::ProcessOptions(amd::option::Options* options) options->oVariables->UniformWorkGroupSize = true; } - if (!device().settings().useLightning_) { - if (!device().settings().singleFpDenorm_) { - optionsVec.push_back("-cl-denorms-are-zero"); - } - - // Check if the host is 64 bit or 32 bit - LP64_ONLY(optionsVec.push_back("-m64")); - } - // Tokenize the extensions string into a vector of strings std::istringstream istrstr(device().info().extensions_); std::istream_iterator sit(istrstr), end; std::vector extensions(sit, end); - if (isLC()) { - if (!extensions.empty()) { - std::ostringstream clext; + if (!extensions.empty()) { + std::ostringstream clext; - clext << "-cl-ext=+"; - std::copy(extensions.begin(), extensions.end() - 1, - std::ostream_iterator(clext, ",+")); - clext << extensions.back(); + clext << "-cl-ext=+"; + std::copy(extensions.begin(), extensions.end() - 1, + std::ostream_iterator(clext, ",+")); + clext << extensions.back(); - optionsVec.push_back("-Xclang"); - optionsVec.push_back(clext.str()); - } - } else { - for (auto e : extensions) { - optionsVec.push_back(std::string("-D") + e + "=1"); - } + optionsVec.push_back("-Xclang"); + optionsVec.push_back(clext.str()); } } @@ -1940,8 +1471,7 @@ bool Program::getCompileOptionsAtLinking(const std::vector& inputProgr amd::option::Options compileOptions2; amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2; - if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions, false, - isLC())) { + if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions, false)) { buildLog_ += thisCompileOptions->optionsLog(); LogError("Parsing compile options failed."); return false; @@ -1958,7 +1488,7 @@ bool Program::getCompileOptionsAtLinking(const std::vector& inputProgr linkOptsCanOverwrite = true; } else { amd::option::Options thisLinkOptions; - if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions, isLC())) { + if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) { buildLog_ += thisLinkOptions.optionsLog(); LogError("Parsing link options failed."); return false; @@ -1987,17 +1517,6 @@ bool Program::getCompileOptionsAtLinking(const std::vector& inputProgr return true; } -// ================================================================================================ -bool isSPIRVMagicL(const void* Image, size_t Length) { - const unsigned SPRVMagicNumber = 0x07230203; - if (Image == nullptr || Length < sizeof(unsigned)) { - DevLogPrintfError("Invalid Argument, Image: 0x%x Length: %u \n", Image, Length); - return false; - } - auto Magic = static_cast(Image); - return *Magic == SPRVMagicNumber; -} - // ================================================================================================ bool Program::initClBinary(const char* binaryIn, size_t size, amd::Os::FileDesc fdesc, size_t foffset, std::string uri) { @@ -2015,78 +1534,23 @@ bool Program::initClBinary(const char* binaryIn, size_t size, amd::Os::FileDesc // unencrypted int encryptCode = 0; char* decryptedBin = nullptr; - bool isSPIRV = false; - bool isBc = false; -#if defined(WITH_COMPILER_LIB) - if (!device().settings().useLightning_) { - isSPIRV = isSPIRVMagicL(binaryIn, size); - isBc = isBcMagic(binaryIn); + size_t decryptedSize; + if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { + DevLogError("Cannot Decrypt Elf \n"); + return false; + } + if (decryptedBin != nullptr) { + // It is decrypted binary. + bin = decryptedBin; + sz = decryptedSize; } -#endif // defined(WITH_COMPILER_LIB) - if (isSPIRV || isBc) { -#if defined(WITH_COMPILER_LIB) - acl_error err = ACL_SUCCESS; - aclBinaryOptions binOpts = {0}; - binOpts.struct_size = sizeof(binOpts); - binOpts.elfclass = - (info().arch_id == aclX64 || info().arch_id == aclHSAIL64) ? ELFCLASS64 : ELFCLASS32; - binOpts.bitness = ELFDATA2LSB; - binOpts.alloc = &::malloc; - binOpts.dealloc = &::free; - aclBinary* aclbin_v30 = amd::Hsail::BinaryInit(sizeof(aclBinary), &info(), &binOpts, &err); - if (err != ACL_SUCCESS) { - LogWarning("aclBinaryInit failed"); - amd::Hsail::BinaryFini(aclbin_v30); - return false; - } - err = amd::Hsail::InsertSection(device().compiler(), aclbin_v30, binaryIn, size, - isSPIRV ? aclSPIRV : aclSPIR); - if (ACL_SUCCESS != err) { - LogWarning("aclInsertSection failed"); - amd::Hsail::BinaryFini(aclbin_v30); - return false; - } - if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) { - err = amd::Hsail::WriteToMem(aclbin_v30, (void**)const_cast(&bin), &sz); - if (err != ACL_SUCCESS) { - LogWarning("aclWriteToMem failed"); - amd::Hsail::BinaryFini(aclbin_v30); - return false; - } - amd::Hsail::BinaryFini(aclbin_v30); - } else { - aclBinary* aclbin_v21 = amd::Hsail::CreateFromBinary(aclbin_v30, aclBIFVersion21); - err = amd::Hsail::WriteToMem(aclbin_v21, (void**)const_cast(&bin), &sz); - if (err != ACL_SUCCESS) { - LogWarning("aclWriteToMem failed"); - amd::Hsail::BinaryFini(aclbin_v30); - amd::Hsail::BinaryFini(aclbin_v21); - return false; - } - amd::Hsail::BinaryFini(aclbin_v30); - amd::Hsail::BinaryFini(aclbin_v21); - } -#endif // defined(WITH_COMPILER_LIB) - } else { - size_t decryptedSize; - if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { - DevLogError("Cannot Decrypt Elf \n"); - return false; - } - if (decryptedBin != nullptr) { - // It is decrypted binary. - bin = decryptedBin; - sz = decryptedSize; - } - - if (!isElf(bin)) { - // Invalid binary. - delete[] decryptedBin; - DevLogError("Bin is not ELF \n"); - return false; - } + if (!isElf(bin)) { + // Invalid binary. + delete[] decryptedBin; + DevLogError("Bin is not ELF \n"); + return false; } clBinary()->setFlags(encryptCode); @@ -2157,8 +1621,8 @@ bool Program::setBinary(const char* binaryIn, size_t size, const device::Program compileOptions_ = same_dev_prog->compileOptions(); linkOptions_ = same_dev_prog->linkOptions(); } else if (!amd::IS_HIP) { - clBinary()->loadCompileOptions(compileOptions_); - clBinary()->loadLinkOptions(linkOptions_); + compileOptions_.clear(); + linkOptions_.clear(); } clBinary()->resetElfIn(); @@ -2169,170 +1633,42 @@ bool Program::setBinary(const char* binaryIn, size_t size, const device::Program Program::file_type_t Program::getCompilationStagesFromBinary( std::vector& completeStages, bool& needOptionsCheck) { Program::file_type_t from = FILE_TYPE_DEFAULT; - if (isLC()) { -#if defined(USE_COMGR_LIBRARY) - completeStages.clear(); - needOptionsCheck = true; - //! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT? - // Checking llvmir in .llvmir section - bool containsLlvmirText = (type() == TYPE_COMPILED); - bool containsShaderIsa = (type() == TYPE_EXECUTABLE); - bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty()); + completeStages.clear(); + needOptionsCheck = true; + //! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT? + // Checking llvmir in .llvmir section + bool containsLlvmirText = (type() == TYPE_COMPILED); + bool containsShaderIsa = (type() == TYPE_EXECUTABLE); + bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty()); - if (containsLlvmirText && containsOpts) { - completeStages.push_back(from); - from = FILE_TYPE_LLVMIR_BINARY; - } - if (containsShaderIsa) { - completeStages.push_back(from); - from = FILE_TYPE_ISA; - } - std::string sCurOptions = compileOptions_ + linkOptions_; - amd::option::Options curOptions; - if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) { - buildLog_ += curOptions.optionsLog(); - LogError("Parsing compile options failed."); - return FILE_TYPE_DEFAULT; - } - switch (from) { - case FILE_TYPE_CG: - case FILE_TYPE_ISA: - // do not check options, if LLVMIR is absent or might be absent or options are absent - if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) { - needOptionsCheck = false; - } - break; - // recompilation might be needed - case FILE_TYPE_LLVMIR_BINARY: - case FILE_TYPE_DEFAULT: - default: - break; - } -#endif // defined(USE_COMGR_LIBRARY) - } else { -#if defined(WITH_COMPILER_LIB) - acl_error errorCode; - size_t secSize = 0; - completeStages.clear(); - needOptionsCheck = true; - size_t boolSize = sizeof(bool); - // Checking llvmir in .llvmir section - bool containsSpirv = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_SPIRV, nullptr, - &containsSpirv, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsSpirv = false; - } - if (containsSpirv) { - completeStages.push_back(from); - from = FILE_TYPE_SPIRV_BINARY; - } - bool containsSpirText = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_SPIR, nullptr, - &containsSpirText, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsSpirText = false; - } - if (containsSpirText) { - completeStages.push_back(from); - from = FILE_TYPE_SPIR_BINARY; - } - bool containsLlvmirText = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, nullptr, - &containsLlvmirText, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsLlvmirText = false; - } - // Checking compile & link options in .comment section - bool containsOpts = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_OPTIONS, nullptr, - &containsOpts, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsOpts = false; - } - if (containsLlvmirText && containsOpts) { - completeStages.push_back(from); - from = FILE_TYPE_LLVMIR_BINARY; - } - // Checking HSAIL in .cg section - bool containsHsailText = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_HSAIL, nullptr, - &containsHsailText, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsHsailText = false; - } - // Checking BRIG sections - bool containsBrig = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_BRIG, nullptr, - &containsBrig, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsBrig = false; - } - if (containsBrig) { - completeStages.push_back(from); - from = FILE_TYPE_HSAIL_BINARY; - } else if (containsHsailText) { - completeStages.push_back(from); - from = FILE_TYPE_HSAIL_TEXT; - } - // Checking Loader Map symbol from CG section - bool containsLoaderMap = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LOADER_MAP, - nullptr, &containsLoaderMap, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsLoaderMap = false; - } - if (containsLoaderMap) { - completeStages.push_back(from); - from = FILE_TYPE_CG; - } - // Checking ISA in .text section - bool containsShaderIsa = true; - errorCode = amd::Hsail::QueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_ISA, nullptr, - &containsShaderIsa, &boolSize); - if (errorCode != ACL_SUCCESS) { - containsShaderIsa = false; - } - if (containsShaderIsa) { - completeStages.push_back(from); - from = FILE_TYPE_ISA; - } - std::string sCurOptions = compileOptions_ + linkOptions_; - amd::option::Options curOptions; - if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) { - buildLog_ += curOptions.optionsLog(); - LogError("Parsing compile options failed."); - return FILE_TYPE_DEFAULT; - } - switch (from) { - // compile from HSAIL text, no matter prev. stages and options - case FILE_TYPE_HSAIL_TEXT: + if (containsLlvmirText && containsOpts) { + completeStages.push_back(from); + from = FILE_TYPE_LLVMIR_BINARY; + } + if (containsShaderIsa) { + completeStages.push_back(from); + from = FILE_TYPE_ISA; + } + std::string sCurOptions = compileOptions_ + linkOptions_; + amd::option::Options curOptions; + if (!amd::option::parseAllOptions(sCurOptions, curOptions, false)) { + buildLog_ += curOptions.optionsLog(); + LogError("Parsing compile options failed."); + return FILE_TYPE_DEFAULT; + } + switch (from) { + case FILE_TYPE_CG: + case FILE_TYPE_ISA: + // do not check options, if LLVMIR is absent or might be absent or options are absent + if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) { needOptionsCheck = false; - break; - case FILE_TYPE_HSAIL_BINARY: - // do not check options, if LLVMIR is absent or might be absent or options are absent - if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) { - needOptionsCheck = false; - } - break; - case FILE_TYPE_CG: - case FILE_TYPE_ISA: - // do not check options, if LLVMIR is absent or might be absent or options are absent - if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) { - needOptionsCheck = false; - } - // do not check options, if BRIG is absent or might be absent or LoaderMap is absent - if (!curOptions.oVariables->BinCG || !containsBrig || !containsLoaderMap) { - needOptionsCheck = false; - } - break; - // recompilation might be needed - case FILE_TYPE_LLVMIR_BINARY: - case FILE_TYPE_DEFAULT: - default: - break; - } -#endif // #if defined(WITH_COMPILER_LIB) + } + break; + // recompilation might be needed + case FILE_TYPE_LLVMIR_BINARY: + case FILE_TYPE_DEFAULT: + default: + break; } return from; } @@ -2345,17 +1681,6 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt std::string uri = this->BinaryURI(); // If the binary already exists if ((binary.first != nullptr) && (binary.second > 0)) { -#if defined(WITH_COMPILER_LIB) - if (amd::Hsail::ValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) { - acl_error errorCode; - binaryElf_ = amd::Hsail::ReadFromMem(binary.first, binary.second, &errorCode); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n"; - return continueCompileFrom; - } - } -#endif // defined(WITH_COMPILER_LIB) - // save the current options std::string sCurCompileOptions = compileOptions_; std::string sCurLinkOptions = linkOptions_; @@ -2384,46 +1709,15 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt // If compile options are absent in binary, do not compare and recompile if (compileOptions_.empty()) break; - std::string sBinOptions; -#if defined(WITH_COMPILER_LIB) - if (binaryElf_ != nullptr) { - const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions); - assert(symbol && "symbol not found"); - std::string symName = - std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]); - size_t symSize = 0; - acl_error errorCode; - - const void* opts = amd::Hsail::ExtractSymbol(device().compiler(), binaryElf_, &symSize, - aclCOMMENT, symName.c_str(), &errorCode); - if (errorCode != ACL_SUCCESS) { - recompile = true; - break; - } - sBinOptions = std::string((char*)opts, symSize); - } else -#endif // defined(WITH_COMPILER_LIB) - { - sBinOptions = sCurOptions; - } - compileOptions_ = sCurCompileOptions; linkOptions_ = sCurLinkOptions; - amd::option::Options curOptions, binOptions; - if (!amd::option::parseAllOptions(sBinOptions, binOptions, false, isLC())) { - buildLog_ += binOptions.optionsLog(); - LogError("Parsing compile options from binary failed."); - return FILE_TYPE_DEFAULT; - } - if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) { + amd::option::Options curOptions; + if (!amd::option::parseAllOptions(sCurOptions, curOptions, false)) { buildLog_ += curOptions.optionsLog(); LogError("Parsing compile options failed."); return FILE_TYPE_DEFAULT; } - if (!curOptions.equals(binOptions)) { - recompile = true; - } break; } default: @@ -2451,7 +1745,6 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt } // ================================================================================================ -#if defined(USE_COMGR_LIBRARY) bool ComgrBinaryData::create(amd_comgr_data_kind_t kind, void* binary, size_t binSize) { amd_comgr_status_t status = amd::Comgr::create_data(kind, &binaryData_); if (status != AMD_COMGR_STATUS_SUCCESS) { @@ -2655,10 +1948,8 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) { return (status == AMD_COMGR_STATUS_SUCCESS); } -#endif bool Program::FindGlobalVarSize(void* binary, size_t binSize) { -#if defined(USE_COMGR_LIBRARY) // HIP doesn't need information about global variable size. // Hence runtime can skip expensive Elf object creation for parsing if (!amd::IS_HIP) { @@ -2706,11 +1997,9 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { buildLog_ += "Error: create kernel metadata map using COMgr\n"; return false; } -#endif // defined(USE_COMGR_LIBRARY) return true; } -#if defined(USE_COMGR_LIBRARY) amd_comgr_status_t getSymbolFromModule(amd_comgr_symbol_t symbol, void* userData) { size_t nlen = 0; size_t* userDataInfo = nullptr; @@ -2794,10 +2083,8 @@ bool Program::getSymbolsFromCodeObj(std::vector* var_names, return ret_val; } -#endif /* USE_COMGR_LIBRARY */ const bool Program::getLoweredNames(std::vector* mangledNames) const { -#if defined(USE_COMGR_LIBRARY) /* Iterate thru kernel names first */ for (auto const& kernelMeta : kernelMetadataMap_) { mangledNames->emplace_back(kernelMeta.first); @@ -2810,15 +2097,9 @@ const bool Program::getLoweredNames(std::vector* mangledNames) cons } return true; - -#else - assert(!"No COMGR loaded"); - return false; -#endif } bool Program::getDemangledName(const std::string& mangledName, std::string& demangledName) const { -#if defined(USE_COMGR_LIBRARY) amd_comgr_data_t mangled_data; amd_comgr_data_t demangled_data; @@ -2855,26 +2136,14 @@ bool Program::getDemangledName(const std::string& mangledName, std::string& dema amd::Comgr::release_data(mangled_data); amd::Comgr::release_data(demangled_data); return true; -#else - assert(!"No COMGR loaded"); - return false; -#endif } bool Program::getGlobalFuncFromCodeObj(std::vector* func_names) const { -#if defined(USE_COMGR_LIBRARY) return getSymbolsFromCodeObj(func_names, AMD_COMGR_SYMBOL_TYPE_FUNC); -#else - return true; -#endif } bool Program::getGlobalVarFromCodeObj(std::vector* var_names) const { -#if defined(USE_COMGR_LIBRARY) return getSymbolsFromCodeObj(var_names, AMD_COMGR_SYMBOL_TYPE_OBJECT); -#else - return true; -#endif } // Init Fini Launch Lock diff --git a/projects/clr/rocclr/device/devprogram.hpp b/projects/clr/rocclr/device/devprogram.hpp index e5c4cc773e..ca6e9b5b97 100644 --- a/projects/clr/rocclr/device/devprogram.hpp +++ b/projects/clr/rocclr/device/devprogram.hpp @@ -20,16 +20,10 @@ #pragma once -#if defined(WITH_COMPILER_LIB) -#include "aclTypes.h" -#endif #include "platform/context.hpp" #include "platform/object.hpp" #include "platform/memory.hpp" - -#if defined(USE_COMGR_LIBRARY) #include "amd_comgr/amd_comgr.h" -#endif // defined(USE_COMGR_LIBRARY) namespace amd { namespace hsa { @@ -120,16 +114,11 @@ class Program : public amd::HeapObject { bool runInitFiniKernel(const std::vector& kernels) const; -#if defined(WITH_COMPILER_LIB) - static amd::Monitor buildLock_; //!< Global build lock for HSAIL which isn't thread-safe -#endif - protected: union { struct { uint32_t isNull_ : 1; //!< Null program no memory allocations uint32_t internal_ : 1; //!< Internal blit program - uint32_t isLC_ : 1; //!< LC was used for the program compilation uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables uint32_t isHIP_ : 1; //!< Determine if the program is for HIP uint32_t coLoaded_ : 1; //!< Has the code objected been loaded @@ -143,30 +132,20 @@ class Program : public amd::HeapObject { amd::Elf::ElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format std::string compileOptions_; //!< compile/build options. std::string linkOptions_; //!< link options. - //!< the option arg passed in to clCompileProgram(), clLinkProgram(), - //! or clBuildProgram(), whichever is called last -#if defined(WITH_COMPILER_LIB) - aclBinaryOptions binOpts_; //!< Binary options to create aclBinary - aclBinary* binaryElf_; //!< Binary for the new compiler library -#endif - + //!< the option arg passed in to clCompileProgram(), clLinkProgram(), + //!< or clBuildProgram(), whichever is called last std::string lastBuildOptionsArg_; mutable std::string buildLog_; //!< build log. int32_t buildStatus_; //!< build status. int32_t buildError_; //!< build error -#if defined(WITH_COMPILER_LIB) - aclTargetInfo info_; //!< The info target for this binary. -#endif size_t globalVariableTotalSize_; amd::option::Options* programOptions_; -#if defined(USE_COMGR_LIBRARY) amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata uint32_t codeObjectVer_; //!< version of code object std::map kernelMetadataMap_; //!< Map of kernel metadata -#endif //! Sanitizer lock - lock when launching init/fini kernels static amd::Monitor initFiniLock_; @@ -249,20 +228,12 @@ class Program : public amd::HeapObject { size_t globalVariableTotalSize() const { return globalVariableTotalSize_; } -#if defined(WITH_COMPILER_LIB) - //! Returns the aclBinary associated with the program - aclBinary* binaryElf() const { return static_cast(binaryElf_); } -#endif - //! Returns TRUE if the program just compiled bool isNull() const { return isNull_; } //! Returns TRUE if the program used internally by runtime bool isInternal() const { return internal_; } - //! Returns TRUE if Lightning compiler was used for this program - bool isLC() const { return isLC_; } - //! Global variables are a part of the code segment bool hasGlobalStores() const { return hasGlobalStores_; } @@ -272,7 +243,6 @@ class Program : public amd::HeapObject { //! Returns TRUE if the program is a trap handler for debugger support bool isTrapHandler() const { return trapHandler_; } -#if defined(USE_COMGR_LIBRARY) amd_comgr_metadata_node_t metadata() const { return metadata_; } //! Get the kernel metadata @@ -286,7 +256,6 @@ class Program : public amd::HeapObject { } const uint32_t codeObjectVer() const { return codeObjectVer_; } -#endif //! Check if program is HIP based const bool isHIP() const { return (isHIP_ == 1); } @@ -324,18 +293,18 @@ class Program : public amd::HeapObject { * * \return True if we successefully compiled a GPU program */ - virtual bool compileImpl(const std::string& sourceCode, //!< the program's source code - const std::vector& headers, - const char** headerIncludeNames, - amd::option::Options* options //!< compile options's object + bool compileImpl(const std::string& sourceCode, //!< the program's source code + const std::vector& headers, + const char** headerIncludeNames, + amd::option::Options* options //!< compile options's object ); //! Link the device program. - virtual bool linkImpl(amd::option::Options* options); + bool linkImpl(amd::option::Options* options); //! Link the device programs. - virtual bool linkImpl(const std::vector& inputPrograms, amd::option::Options* options, - bool createLibrary); + bool linkImpl(const std::vector& inputPrograms, amd::option::Options* options, + bool createLibrary); virtual bool createBinary(amd::option::Options* options) = 0; @@ -347,15 +316,9 @@ class Program : public amd::HeapObject { //! Initialize Binary virtual bool initClBinary(); - virtual bool saveBinaryAndSetType(type_t type) = 0; - //! Release the Binary void releaseClBinary(); -#if defined(WITH_COMPILER_LIB) - //! return target info - virtual const aclTargetInfo& info() = 0; -#endif virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, bool internalKernel) { return true; @@ -402,44 +365,13 @@ class Program : public amd::HeapObject { return false; } -#if defined(USE_COMGR_LIBRARY) bool getSymbolsFromCodeObj(std::vector* var_names, amd_comgr_symbol_type_t sym_type) const; -#endif bool getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size); bool defineUndefinedVars(); private: - //! Compile the device program with LC path - bool compileImplLC(const std::string& sourceCode, const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options); - //! Compile the device program with HSAIL path - bool compileImplHSAIL(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options); - - //! Link the device programs with LC path - bool linkImplLC(const std::vector& inputPrograms, amd::option::Options* options, - bool createLibrary); - - //! Link the device programs with HSAIL path - bool linkImplHSAIL(const std::vector& inputPrograms, amd::option::Options* options, - bool createLibrary); - - //! Link the device program with LC path - bool linkImplLC(amd::option::Options* options); - - //! Link the device program with HSAIL path - bool linkImplHSAIL(amd::option::Options* options); - - //! Load the device program with LC path - bool loadLC(); - - //! Load the device program with HSAIL path - bool loadHSAIL(); - -#if defined(USE_COMGR_LIBRARY) //! Dump the log data object to the build log, if a log data object is present void extractBuildLog(amd_comgr_data_set_t dataSet); //! Dump the code object data @@ -477,7 +409,6 @@ class Program : public amd::HeapObject { //! Create the map for the kernel name and its metadata for fast access bool createKernelMetadataMap(void* binary, size_t binSize); -#endif bool trySubstObjFile(const char* SubstCfgFile, const std::string& sourceCode, const amd::option::Options* options); @@ -489,8 +420,6 @@ class Program : public amd::HeapObject { Program& operator=(const Program&); }; -#if defined(USE_COMGR_LIBRARY) - class ComgrBinaryData { public: ComgrBinaryData() : binaryData_({0}), created_(false) {} @@ -503,6 +432,4 @@ class ComgrBinaryData { bool created_; }; -#endif - } // namespace amd::device diff --git a/projects/clr/rocclr/device/hsailctx.cpp b/projects/clr/rocclr/device/hsailctx.cpp deleted file mode 100644 index 3732c93558..0000000000 --- a/projects/clr/rocclr/device/hsailctx.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#if defined(WITH_COMPILER_LIB) -#include "os/os.hpp" -#include "utils/flags.hpp" -#include "hsailctx.hpp" - -namespace amd { -std::once_flag Hsail::initialized; -HsailEntryPoints Hsail::cep_; -bool Hsail::is_ready_ = false; - -bool Hsail::LoadLib() { -#if defined(HSAIL_DYN_DLL) - ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loading HSAIL library."); - static constexpr const char* HsailLibName = - LP64_SWITCH(WINDOWS_SWITCH("amdhsail32.dll", "libamdhsail32.so"), - WINDOWS_SWITCH("amdhsail64.dll", "libamdhsail64.so")); - cep_.handle = Os::loadLibrary(HsailLibName); - if (nullptr == cep_.handle) { - return false; - } -#endif - GET_HSAIL_SYMBOL(aclCompilerInit) - GET_HSAIL_SYMBOL(aclCompilerFini) - GET_HSAIL_SYMBOL(aclCompilerVersion) - GET_HSAIL_SYMBOL(aclVersionSize) - GET_HSAIL_SYMBOL(aclGetErrorString) - GET_HSAIL_SYMBOL(aclGetArchInfo) - GET_HSAIL_SYMBOL(aclGetDeviceInfo) - GET_HSAIL_SYMBOL(aclGetTargetInfo) - GET_HSAIL_SYMBOL(aclGetTargetInfoFromChipID) - GET_HSAIL_SYMBOL(aclGetArchitecture) - GET_HSAIL_SYMBOL(aclGetChipOptions) - GET_HSAIL_SYMBOL(aclGetFamily) - GET_HSAIL_SYMBOL(aclGetChip) - GET_HSAIL_SYMBOL(aclBinaryInit) - GET_HSAIL_SYMBOL(aclBinaryFini) - GET_HSAIL_SYMBOL(aclReadFromFile) - GET_HSAIL_SYMBOL(aclReadFromMem) - GET_HSAIL_SYMBOL(aclWriteToFile) - GET_HSAIL_SYMBOL(aclWriteToMem) - GET_HSAIL_SYMBOL(aclCreateFromBinary) - GET_HSAIL_SYMBOL(aclBinaryVersion) - GET_HSAIL_SYMBOL(aclInsertSection) - GET_HSAIL_SYMBOL(aclInsertSymbol) - GET_HSAIL_SYMBOL(aclExtractSection) - GET_HSAIL_SYMBOL(aclExtractSymbol) - GET_HSAIL_SYMBOL(aclRemoveSection) - GET_HSAIL_SYMBOL(aclRemoveSymbol) - GET_HSAIL_SYMBOL(aclQueryInfo) - GET_HSAIL_SYMBOL(aclDbgAddArgument) - GET_HSAIL_SYMBOL(aclDbgRemoveArgument) - GET_HSAIL_SYMBOL(aclCompile) - GET_HSAIL_SYMBOL(aclLink) - GET_HSAIL_SYMBOL(aclGetCompilerLog) - GET_HSAIL_SYMBOL(aclRetrieveType) - GET_HSAIL_SYMBOL(aclSetType) - GET_HSAIL_SYMBOL(aclConvertType) - GET_HSAIL_SYMBOL(aclDisassemble) - GET_HSAIL_SYMBOL(aclGetDeviceBinary) - GET_HSAIL_SYMBOL(aclValidateBinaryImage) - GET_HSAIL_SYMBOL(aclJITObjectImageCreate) - GET_HSAIL_SYMBOL(aclJITObjectImageCopy) - GET_HSAIL_SYMBOL(aclJITObjectImageDestroy) - GET_HSAIL_SYMBOL(aclJITObjectImageFinalize) - GET_HSAIL_SYMBOL(aclJITObjectImageSize) - GET_HSAIL_SYMBOL(aclJITObjectImageData) - GET_HSAIL_SYMBOL(aclJITObjectImageGetGlobalsSize) - GET_HSAIL_SYMBOL(aclJITObjectImageIterateSymbols) - GET_HSAIL_SYMBOL(aclDumpBinary) - GET_HSAIL_SYMBOL(aclGetKstatsSI) - GET_HSAIL_SYMBOL(aclInsertKernelStatistics) - GET_HSAIL_SYMBOL(aclFreeMem) - is_ready_ = true; - return true; -} - -} // namespace amd -#endif diff --git a/projects/clr/rocclr/device/hsailctx.hpp b/projects/clr/rocclr/device/hsailctx.hpp deleted file mode 100644 index 0d498cf768..0000000000 --- a/projects/clr/rocclr/device/hsailctx.hpp +++ /dev/null @@ -1,394 +0,0 @@ -/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#pragma once - -#include -#if defined(WITH_COMPILER_LIB) -#include "top.hpp" -#include "acl.h" - -#ifndef ACL_API_ENTRY -#if defined(_WIN32) || defined(__CYGWIN__) -#define ACL_API_ENTRY __stdcall -#else -#define ACL_API_ENTRY -#endif -#endif - -namespace amd { -typedef aclCompiler*(ACL_API_ENTRY* t_aclCompilerInit)(aclCompilerOptions* opts, - acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclCompilerFini)(aclCompiler* cl); -typedef aclCLVersion(ACL_API_ENTRY* t_aclCompilerVersion)(aclCompiler* cl, acl_error* error_code); -typedef uint32_t(ACL_API_ENTRY* t_aclVersionSize)(aclCLVersion num, acl_error* error_code); -typedef const char*(ACL_API_ENTRY* t_aclGetErrorString)(acl_error error_code); -typedef acl_error(ACL_API_ENTRY* t_aclGetArchInfo)(const char** arch_names, size_t* arch_size); -typedef acl_error(ACL_API_ENTRY* t_aclGetDeviceInfo)(const char* arch, const char** names, - size_t* device_size); -typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfo)(const char* arch, const char* device, - acl_error* error_code); -typedef aclTargetInfo(ACL_API_ENTRY* t_aclGetTargetInfoFromChipID)(const char* arch, - const uint32_t chip_id, - acl_error* error_code); -typedef const char*(ACL_API_ENTRY* t_aclGetArchitecture)(const aclTargetInfo& target); -typedef const uint64_t(ACL_API_ENTRY* t_aclGetChipOptions)(const aclTargetInfo& target); -typedef const char*(ACL_API_ENTRY* t_aclGetFamily)(const aclTargetInfo& target); -typedef const char*(ACL_API_ENTRY* t_aclGetChip)(const aclTargetInfo& target); -typedef aclBinary*(ACL_API_ENTRY* t_aclBinaryInit)(size_t struct_version, - const aclTargetInfo* target, - const aclBinaryOptions* options, - acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclBinaryFini)(aclBinary* bin); -typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromFile)(const char* str, acl_error* error_code); -typedef aclBinary*(ACL_API_ENTRY* t_aclReadFromMem)(const void* mem, size_t size, - acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclWriteToFile)(aclBinary* bin, const char* str); -typedef acl_error(ACL_API_ENTRY* t_aclWriteToMem)(aclBinary* bin, void** mem, size_t* size); -typedef aclBinary*(ACL_API_ENTRY* t_aclCreateFromBinary)(const aclBinary* binary, - aclBIFVersion version); -typedef aclBIFVersion(ACL_API_ENTRY* t_aclBinaryVersion)(const aclBinary* binary); -typedef acl_error(ACL_API_ENTRY* t_aclInsertSection)(aclCompiler* cl, aclBinary* binary, - const void* data, size_t data_size, - aclSections id); -typedef acl_error(ACL_API_ENTRY* t_aclInsertSymbol)(aclCompiler* cl, aclBinary* binary, - const void* data, size_t data_size, - aclSections id, const char* symbol); -typedef const void*(ACL_API_ENTRY* t_aclExtractSection)(aclCompiler* cl, const aclBinary* binary, - size_t* size, aclSections id, - acl_error* error_code); -typedef const void*(ACL_API_ENTRY* t_aclExtractSymbol)(aclCompiler* cl, const aclBinary* binary, - size_t* size, aclSections id, - const char* symbol, acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclRemoveSection)(aclCompiler* cl, aclBinary* binary, - aclSections id); -typedef acl_error(ACL_API_ENTRY* t_aclRemoveSymbol)(aclCompiler* cl, aclBinary* binary, - aclSections id, const char* symbol); -typedef acl_error(ACL_API_ENTRY* t_aclQueryInfo)(aclCompiler* cl, const aclBinary* binary, - aclQueryType query, const char* kernel, - void* data_ptr, size_t* ptr_size); -typedef acl_error(ACL_API_ENTRY* t_aclDbgAddArgument)(aclCompiler* cl, aclBinary* binary, - const char* kernel, const char* name, - bool byVal); -typedef acl_error(ACL_API_ENTRY* t_aclDbgRemoveArgument)(aclCompiler* cl, aclBinary* binary, - const char* kernel, const char* name); -typedef acl_error(ACL_API_ENTRY* t_aclCompile)(aclCompiler* cl, aclBinary* bin, const char* options, - aclType from, aclType to, - aclLogFunction compile_callback); -typedef acl_error(ACL_API_ENTRY* t_aclLink)(aclCompiler* cl, aclBinary* src_bin, - unsigned int num_libs, aclBinary** libs, - aclType link_mode, const char* options, - aclLogFunction link_callback); -typedef const char*(ACL_API_ENTRY* t_aclGetCompilerLog)(aclCompiler* cl); -typedef const void*(ACL_API_ENTRY* t_aclRetrieveType)(aclCompiler* cl, const aclBinary* bin, - const char* name, size_t* data_size, - aclType type, acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclSetType)(aclCompiler* cl, aclBinary* bin, const char* name, - aclType type, const void* data, size_t size); -typedef acl_error(ACL_API_ENTRY* t_aclConvertType)(aclCompiler* cl, aclBinary* bin, - const char* name, aclType type); -typedef acl_error(ACL_API_ENTRY* t_aclDisassemble)(aclCompiler* cl, aclBinary* bin, - const char* kernel, - aclLogFunction disasm_callback); -typedef const void*(ACL_API_ENTRY* t_aclGetDeviceBinary)(aclCompiler* cl, const aclBinary* bin, - const char* kernel, size_t* size, - acl_error* error_code); -typedef bool(ACL_API_ENTRY* t_aclValidateBinaryImage)(const void* binary, size_t length, - unsigned type); -typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCreate)(aclCompiler* cl, - const void* buffer, - size_t length, aclBinary* bin, - acl_error* error_code); -typedef aclJITObjectImage(ACL_API_ENTRY* t_aclJITObjectImageCopy)(aclCompiler* cl, - const void* buffer, size_t length, - acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageDestroy)(aclCompiler* cl, - aclJITObjectImage buffer); -typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageFinalize)(aclCompiler* cl, - aclJITObjectImage image); -typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageSize)(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code); -typedef const char*(ACL_API_ENTRY* t_aclJITObjectImageData)(aclCompiler* cl, - aclJITObjectImage image, - acl_error* error_code); -typedef size_t(ACL_API_ENTRY* t_aclJITObjectImageGetGlobalsSize)(aclCompiler* cl, - aclJITObjectImage image, - acl_error* error_code); -typedef acl_error(ACL_API_ENTRY* t_aclJITObjectImageIterateSymbols)(aclCompiler* cl, - aclJITObjectImage image, - aclJITSymbolCallback callback, - void* data); -typedef void(ACL_API_ENTRY* t_aclDumpBinary)(const aclBinary* bin); -typedef void(ACL_API_ENTRY* t_aclGetKstatsSI)(const void* shader, aclKernelStats& kstats); -typedef acl_error(ACL_API_ENTRY* t_aclInsertKernelStatistics)(aclCompiler* cl, aclBinary* bin); -typedef acl_error(ACL_API_ENTRY* t_aclFreeMem)(aclBinary* bin, void* mem); - -struct HsailEntryPoints { - void* handle; - t_aclCompilerInit aclCompilerInit; - t_aclCompilerFini aclCompilerFini; - t_aclCompilerVersion aclCompilerVersion; - t_aclVersionSize aclVersionSize; - t_aclGetErrorString aclGetErrorString; - t_aclGetArchInfo aclGetArchInfo; - t_aclGetDeviceInfo aclGetDeviceInfo; - t_aclGetTargetInfo aclGetTargetInfo; - t_aclGetTargetInfoFromChipID aclGetTargetInfoFromChipID; - t_aclGetArchitecture aclGetArchitecture; - t_aclGetChipOptions aclGetChipOptions; - t_aclGetFamily aclGetFamily; - t_aclGetChip aclGetChip; - t_aclBinaryInit aclBinaryInit; - t_aclBinaryFini aclBinaryFini; - t_aclReadFromFile aclReadFromFile; - t_aclReadFromMem aclReadFromMem; - t_aclWriteToFile aclWriteToFile; - t_aclWriteToMem aclWriteToMem; - t_aclCreateFromBinary aclCreateFromBinary; - t_aclBinaryVersion aclBinaryVersion; - t_aclInsertSection aclInsertSection; - t_aclInsertSymbol aclInsertSymbol; - t_aclExtractSection aclExtractSection; - t_aclExtractSymbol aclExtractSymbol; - t_aclRemoveSection aclRemoveSection; - t_aclRemoveSymbol aclRemoveSymbol; - t_aclQueryInfo aclQueryInfo; - t_aclDbgAddArgument aclDbgAddArgument; - t_aclDbgRemoveArgument aclDbgRemoveArgument; - t_aclCompile aclCompile; - t_aclLink aclLink; - t_aclGetCompilerLog aclGetCompilerLog; - t_aclRetrieveType aclRetrieveType; - t_aclSetType aclSetType; - t_aclConvertType aclConvertType; - t_aclDisassemble aclDisassemble; - t_aclGetDeviceBinary aclGetDeviceBinary; - t_aclValidateBinaryImage aclValidateBinaryImage; - t_aclJITObjectImageCreate aclJITObjectImageCreate; - t_aclJITObjectImageCopy aclJITObjectImageCopy; - t_aclJITObjectImageDestroy aclJITObjectImageDestroy; - t_aclJITObjectImageFinalize aclJITObjectImageFinalize; - t_aclJITObjectImageSize aclJITObjectImageSize; - t_aclJITObjectImageData aclJITObjectImageData; - t_aclJITObjectImageGetGlobalsSize aclJITObjectImageGetGlobalsSize; - t_aclJITObjectImageIterateSymbols aclJITObjectImageIterateSymbols; - t_aclDumpBinary aclDumpBinary; - t_aclGetKstatsSI aclGetKstatsSI; - t_aclInsertKernelStatistics aclInsertKernelStatistics; - t_aclFreeMem aclFreeMem; -}; - -#ifdef HSAIL_DYN_DLL -#define HSAIL_DYN(NAME) cep_.NAME -#define GET_HSAIL_SYMBOL(NAME) \ - cep_.NAME = reinterpret_cast(Os::getSymbol(cep_.handle, #NAME)); \ - if (nullptr == cep_.NAME) { \ - return false; \ - } -#else -#define HSAIL_DYN(NAME) NAME -#define GET_HSAIL_SYMBOL(NAME) -#endif - -class Hsail : public amd::AllStatic { - public: - static std::once_flag initialized; - - static bool LoadLib(); - - static bool IsReady() { return is_ready_; } - - static aclCompiler* CompilerInit(aclCompilerOptions* opts, acl_error* error_code) { - return HSAIL_DYN(aclCompilerInit)(opts, error_code); - } - static acl_error CompilerFini(aclCompiler* cl) { return HSAIL_DYN(aclCompilerFini)(cl); } - static aclCLVersion CompilerVersion(aclCompiler* cl, acl_error* error_code) { - return HSAIL_DYN(aclCompilerVersion)(cl, error_code); - } - static uint32_t VersionSize(aclCLVersion num, acl_error* error_code) { - return HSAIL_DYN(aclVersionSize)(num, error_code); - } - static const char* GetErrorString(acl_error error_code) { - return HSAIL_DYN(aclGetErrorString)(error_code); - } - static acl_error GetArchInfo(const char** arch_names, size_t* arch_size) { - return HSAIL_DYN(aclGetArchInfo)(arch_names, arch_size); - } - static acl_error GetDeviceInfo(const char* arch, const char** names, size_t* device_size) { - return HSAIL_DYN(aclGetDeviceInfo)(arch, names, device_size); - } - static aclTargetInfo GetTargetInfo(const char* arch, const char* device, acl_error* error_code) { - return HSAIL_DYN(aclGetTargetInfo)(arch, device, error_code); - } - static aclTargetInfo GetTargetInfoFromChipID(const char* arch, const uint32_t chip_id, - acl_error* error_code) { - return HSAIL_DYN(aclGetTargetInfoFromChipID)(arch, chip_id, error_code); - } - static const char* GetArchitecture(const aclTargetInfo& target) { - return HSAIL_DYN(aclGetArchitecture)(target); - } - static uint64_t GetChipOptions(const aclTargetInfo& target) { - return HSAIL_DYN(aclGetChipOptions)(target); - } - static const char* GetFamily(const aclTargetInfo& target) { - return HSAIL_DYN(aclGetFamily)(target); - } - static const char* GetChip(const aclTargetInfo& target) { return HSAIL_DYN(aclGetChip)(target); } - static aclBinary* BinaryInit(size_t struct_version, const aclTargetInfo* target, - const aclBinaryOptions* options, acl_error* error_code) { - return HSAIL_DYN(aclBinaryInit)(struct_version, target, options, error_code); - } - static acl_error BinaryFini(aclBinary* bin) { return HSAIL_DYN(aclBinaryFini)(bin); } - static aclBinary* ReadFromFile(const char* str, acl_error* error_code) { - return HSAIL_DYN(aclReadFromFile)(str, error_code); - } - static aclBinary* ReadFromMem(const void* mem, size_t size, acl_error* error_code) { - return HSAIL_DYN(aclReadFromMem)(mem, size, error_code); - } - static acl_error WriteToFile(aclBinary* bin, const char* str) { - return HSAIL_DYN(aclWriteToFile)(bin, str); - } - static acl_error WriteToMem(aclBinary* bin, void** mem, size_t* size) { - return HSAIL_DYN(aclWriteToMem)(bin, mem, size); - } - static aclBinary* CreateFromBinary(const aclBinary* binary, aclBIFVersion version) { - return HSAIL_DYN(aclCreateFromBinary)(binary, version); - } - static aclBIFVersion BinaryVersion(const aclBinary* binary) { - return HSAIL_DYN(aclBinaryVersion)(binary); - } - static acl_error InsertSection(aclCompiler* cl, aclBinary* binary, const void* data, - size_t data_size, aclSections id) { - return HSAIL_DYN(aclInsertSection)(cl, binary, data, data_size, id); - } - static const acl_error InsertSymbol(aclCompiler* cl, aclBinary* binary, const void* data, - size_t data_size, aclSections id, const char* symbol) { - return HSAIL_DYN(aclInsertSymbol)(cl, binary, data, data_size, id, symbol); - } - static const void* ExtractSection(aclCompiler* cl, const aclBinary* binary, size_t* size, - aclSections id, acl_error* error_code) { - return HSAIL_DYN(aclExtractSection)(cl, binary, size, id, error_code); - } - static const void* ExtractSymbol(aclCompiler* cl, const aclBinary* binary, size_t* size, - aclSections id, const char* symbol, acl_error* error_code) { - return HSAIL_DYN(aclExtractSymbol)(cl, binary, size, id, symbol, error_code); - } - static acl_error RemoveSection(aclCompiler* cl, aclBinary* binary, aclSections id) { - return HSAIL_DYN(aclRemoveSection)(cl, binary, id); - } - static acl_error RemoveSymbol(aclCompiler* cl, aclBinary* binary, aclSections id, - const char* symbol) { - return HSAIL_DYN(aclRemoveSymbol)(cl, binary, id, symbol); - } - static acl_error QueryInfo(aclCompiler* cl, const aclBinary* binary, aclQueryType query, - const char* kernel, void* data_ptr, size_t* ptr_size) { - return HSAIL_DYN(aclQueryInfo)(cl, binary, query, kernel, data_ptr, ptr_size); - } - static acl_error DbgAddArgument(aclCompiler* cl, aclBinary* binary, const char* kernel, - const char* name, bool byVal) { - return HSAIL_DYN(aclDbgAddArgument)(cl, binary, kernel, name, byVal); - } - static acl_error DbgRemoveArgument(aclCompiler* cl, aclBinary* binary, const char* kernel, - const char* name) { - return HSAIL_DYN(aclDbgRemoveArgument)(cl, binary, kernel, name); - } - static acl_error Compile(aclCompiler* cl, aclBinary* bin, const char* options, aclType from, - aclType to, aclLogFunction compile_callback) { - return HSAIL_DYN(aclCompile)(cl, bin, options, from, to, compile_callback); - } - static acl_error Link(aclCompiler* cl, aclBinary* src_bin, unsigned int num_libs, - aclBinary** libs, aclType link_mode, const char* options, - aclLogFunction link_callback) { - return HSAIL_DYN(aclLink)(cl, src_bin, num_libs, libs, link_mode, options, link_callback); - } - static const char* GetCompilerLog(aclCompiler* cl) { return HSAIL_DYN(aclGetCompilerLog)(cl); } - static const void* RetrieveType(aclCompiler* cl, const aclBinary* bin, const char* name, - size_t* data_size, aclType type, acl_error* error_code) { - return HSAIL_DYN(aclRetrieveType)(cl, bin, name, data_size, type, error_code); - } - static acl_error SetType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type, - const void* data, size_t size) { - return HSAIL_DYN(aclSetType)(cl, bin, name, type, data, size); - } - static acl_error ConvertType(aclCompiler* cl, aclBinary* bin, const char* name, aclType type) { - return HSAIL_DYN(aclConvertType)(cl, bin, name, type); - } - static acl_error Disassemble(aclCompiler* cl, aclBinary* bin, const char* kernel, - aclLogFunction disasm_callback) { - return HSAIL_DYN(aclDisassemble)(cl, bin, kernel, disasm_callback); - } - static const void* GetDeviceBinary(aclCompiler* cl, const aclBinary* bin, const char* kernel, - size_t* size, acl_error* error_code) { - return HSAIL_DYN(aclGetDeviceBinary)(cl, bin, kernel, size, error_code); - } - static const bool ValidateBinaryImage(const void* binary, size_t length, unsigned type) { -#if defined(HSAIL_DYN_DLL) - if (cep_.aclValidateBinaryImage == nullptr) { - return false; - } -#endif // defined(HSAIL_DYN_DLL) - return HSAIL_DYN(aclValidateBinaryImage)(binary, length, type); - } - static aclJITObjectImage JITObjectImageCreate(aclCompiler* cl, const void* buffer, size_t length, - aclBinary* bin, acl_error* error_code) { - return HSAIL_DYN(aclJITObjectImageCreate)(cl, buffer, length, bin, error_code); - } - static aclJITObjectImage JITObjectImageCopy(aclCompiler* cl, const void* buffer, size_t length, - acl_error* error_code) { - return HSAIL_DYN(aclJITObjectImageCopy)(cl, buffer, length, error_code); - } - static acl_error JITObjectImageDestroy(aclCompiler* cl, aclJITObjectImage buffer) { - return HSAIL_DYN(aclJITObjectImageDestroy)(cl, buffer); - } - static acl_error JITObjectImageFinalize(aclCompiler* cl, aclJITObjectImage image) { - return HSAIL_DYN(aclJITObjectImageFinalize)(cl, image); - } - static size_t JITObjectImageSize(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code) { - return HSAIL_DYN(aclJITObjectImageSize)(cl, image, error_code); - } - static const char* JITObjectImageData(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code) { - return HSAIL_DYN(aclJITObjectImageData)(cl, image, error_code); - } - static size_t JITObjectImageGetGlobalsSize(aclCompiler* cl, aclJITObjectImage image, - acl_error* error_code) { - return HSAIL_DYN(aclJITObjectImageGetGlobalsSize)(cl, image, error_code); - } - static acl_error JITObjectImageIterateSymbols(aclCompiler* cl, aclJITObjectImage image, - aclJITSymbolCallback callback, void* data) { - return HSAIL_DYN(aclJITObjectImageIterateSymbols)(cl, image, callback, data); - } - static void DumpBinary(const aclBinary* bin) { HSAIL_DYN(aclDumpBinary)(bin); } - static void GetKstatsSI(const void* shader, aclKernelStats& kstats) { - return HSAIL_DYN(aclGetKstatsSI)(shader, kstats); - } - static acl_error InsertKernelStatistics(aclCompiler* cl, aclBinary* bin) { - return HSAIL_DYN(aclInsertKernelStatistics)(cl, bin); - } - static acl_error FreeMem(aclBinary* bin, void* mem) { return HSAIL_DYN(aclFreeMem)(bin, mem); } - - private: - static HsailEntryPoints cep_; - static bool is_ready_; -}; - -} // namespace amd -#endif diff --git a/projects/clr/rocclr/device/pal/palblitcl.cpp b/projects/clr/rocclr/device/pal/palblitcl.cpp index c60c2bd5af..5c1350328c 100644 --- a/projects/clr/rocclr/device/pal/palblitcl.cpp +++ b/projects/clr/rocclr/device/pal/palblitcl.cpp @@ -49,7 +49,7 @@ extern void __amd_scheduler_pal(__global void*, __global void*, uint); * The trap handler source is copied from the above URL, with the following * modifications: * - Add the following directive to declare the trap_entry symbol (this is - * later used by LightningProgram::GetTrapHandlerAddress to locate the load + * later used by pal::Program::GetTrapHandlerAddress to locate the load * address of the trap handler): * * .globl trap_entry diff --git a/projects/clr/rocclr/device/pal/palcapturemgr.hpp b/projects/clr/rocclr/device/pal/palcapturemgr.hpp index f307acd5d9..9c676d853f 100644 --- a/projects/clr/rocclr/device/pal/palcapturemgr.hpp +++ b/projects/clr/rocclr/device/pal/palcapturemgr.hpp @@ -27,7 +27,7 @@ namespace amd::pal { class Device; class VirtualGPU; -class HSAILKernel; +class Kernel; // ================================================================================================ // RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1) @@ -256,7 +256,7 @@ class ICaptureMgr { public: virtual bool Update(Pal::IPlatform* platform) = 0; - virtual void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, + virtual void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) = 0; virtual void PostDispatch(VirtualGPU* gpu) = 0; diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 9a8c709f3a..ff1e94f24d 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -37,7 +37,6 @@ #include "palPlatform.h" #include "palDevice.h" #include "palQueueSemaphore.h" -#include "hsailctx.hpp" #include "vdi_common.hpp" @@ -167,10 +166,6 @@ namespace amd::pal { Util::GenericAllocator NullDevice::allocator_; char* Device::platformObj_; Pal::IPlatform* Device::platform_; - -#if defined(WITH_COMPILER_LIB) -NullDevice::Compiler* NullDevice::compiler_; -#endif AppProfile Device::appProfile_; Pal::IDevice* gDeviceList[Pal::MaxDevices] = {}; @@ -261,25 +256,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId()); return false; } - if (!settings().useLightning_) { - if ((isa.hsailName() != nullptr)) { - palName_ = isa.hsailName(); - } else { - return false; - } - } - if (!ValidateComgr()) { LogPrintfError("Code object manager initialization failed for offline PAL device %s", isa.targetId()); return false; } - if (!ValidateHsail()) { - LogPrintfError("HSAIL initialization failed for offline PAL device %s", isa.targetId()); - return false; - } - if (!amd::Device::create(isa)) { LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId()); return false; @@ -293,37 +275,12 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64; - if (!settings().useLightning_) { -#if defined(WITH_COMPILER_LIB) - const char* library = getenv("HSA_COMPILER_LIBRARY"); - aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), - library, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr}; - // Initialize the compiler handle - acl_error error; - compiler_ = amd::Hsail::CompilerInit(&opts, &error); - if (error != ACL_SUCCESS) { - LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId()); - return false; - } -#endif // defined(WITH_COMPILER_LIB) - } - return true; } device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) { device::Program* program; - if (settings().useLightning_) { - program = new LightningProgram(*this, owner); - } else { - program = new HSAILProgram(*this, owner); - } + program = new pal::Program(*this, owner); if (program == nullptr) { LogError("Memory allocation has failed!"); @@ -471,10 +428,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.globalMemSize_ = std::min(4 * info_.maxMemAllocSize_, info_.globalMemSize_); // Use 64 bit pointers - if (settings().use64BitPtr_) { - info_.addressBits_ = 64; - } else { - info_.addressBits_ = (settings().useLightning_) ? 64 : 32; + info_.addressBits_ = 64; + if (!settings().use64BitPtr_) { // Limit total size with 3GB for 32 bit info_.globalMemSize_ = std::min(info_.globalMemSize_, uint64_t(3 * Gi)); } @@ -531,11 +486,10 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.platform_ = AMD_PLATFORM; - ::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_, - sizeof(info_.name_)); + ::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_)); ::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1); - ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s", - settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]"); + ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, + AMD_BUILD_STRING " (PAL,LC)%s", isOnline() ? "" : " [Offline]"); info_.profile_ = "FULL_PROFILE"; info_.spirVersions_ = ""; @@ -1037,11 +991,6 @@ bool Device::create(Pal::IDevice* device) { return false; } - if (!ValidateHsail()) { - LogError("Hsail initialization failed!"); - return false; - } - computeEnginesId_.resize(std::min(numComputeEngines(), settings().numComputeRings_)); amd::Context::Info info = {0}; @@ -1086,27 +1035,6 @@ bool Device::create(Pal::IDevice* device) { allocedMem[i] = 0; } - if (!settings().useLightning_) { -#if defined(WITH_COMPILER_LIB) - const char* library = getenv("HSA_COMPILER_LIBRARY"); - aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), - library, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr}; - // Initialize the compiler handle - acl_error error; - compiler_ = amd::Hsail::CompilerInit(&opts, &error); - if (error != ACL_SUCCESS) { - LogError("Error initializing the compiler"); - return false; - } -#endif // defined(WITH_COMPILER_LIB) - } - // Allocate SRD manager srdManager_ = new SrdManager(*this, std::max(HsaImageObjectSize, HsaSamplerObjectSize), 64 * Ki); if (srdManager_ == nullptr) { @@ -1117,7 +1045,7 @@ bool Device::create(Pal::IDevice* device) { } // ================================================================================================ -// Master function that handles developer callbacks from PAL. +// Primary function that handles developer callbacks from PAL. void PAL_STDCALL Device::PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex, Pal::Developer::CallbackType type, void* pCbData) { #ifdef PAL_GPUOPEN_OCL @@ -1248,7 +1176,7 @@ bool Device::initializeHeapResources() { // Setup trap handler if available if (trap_handler_ != nullptr) { auto program = - reinterpret_cast(trap_handler_->getDeviceProgram(*this)); + reinterpret_cast(trap_handler_->getDeviceProgram(*this)); if (program != nullptr) { Pal::Result result{Pal::Result::Success}; Pal::GpuMemoryRef memRef = {}; @@ -1314,12 +1242,7 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { } device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) { - device::Program* program; - if (settings().useLightning_) { - program = new LightningProgram(*this, owner); - } else { - program = new HSAILProgram(*this, owner); - } + device::Program* program = new pal::Program(*this, owner); if (program == nullptr) { LogError("We failed memory allocation for program!"); } @@ -1534,12 +1457,6 @@ void Device::tearDown() { delete platformObj_; platform_ = nullptr; } -#if defined(WITH_COMPILER_LIB) - if (compiler_ != nullptr) { - amd::Hsail::CompilerFini(compiler_); - compiler_ = nullptr; - } -#endif // defined(WITH_COMPILER_LIB) } Memory* Device::getGpuMemory(amd::Memory* mem) const { @@ -2361,7 +2278,7 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi } } - const HSAILKernel* hsaKernel = static_cast(devKernel); + const pal::Kernel* hsaKernel = static_cast(devKernel); if (hsaKernel->dynamicParallelism()) { if (settings().useDeviceQueue_) { amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(*this); @@ -2805,39 +2722,33 @@ bool Device::createBlitProgram() { } else { if (settings().oclVersion_ >= OpenCL20) { extraBlits = iDev()->GetDispatchKernelSource(); - if (settings().useLightning_) { - extraBlits.append(SchedulerSourceCode20); - } else { - extraBlits.append(SchedulerSourceCode); - } + extraBlits.append(SchedulerSourceCode20); ocl20 = "-cl-std=CL2.0"; } } - if (settings().useLightning_) { - const std::string TrapHandlerAsm = TrapHandlerCode; - // Create a program for trap handler - // note: It's not critical for runtime functionality to fail trap handler initialization - auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly); - if (asm_program != nullptr) { - std::vector devices; - devices.push_back(this); - std::string opt = "-cl-internal-kernel "; - if (auto retval = - asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) { - DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval); - asm_program->release(); - } else { - if (asm_program->load()) { - trap_handler_ = asm_program; - } else { - DevLogError("Could not load the trap handler \n"); - asm_program->release(); - } - } + const std::string TrapHandlerAsm = TrapHandlerCode; + // Create a program for trap handler + // note: It's not critical for runtime functionality to fail trap handler initialization + auto asm_program = new amd::Program(*context_, TrapHandlerAsm.c_str(), amd::Program::Assembly); + if (asm_program != nullptr) { + std::vector devices; + devices.push_back(this); + std::string opt = "-cl-internal-kernel "; + if (auto retval = + asm_program->build(devices, opt.c_str(), nullptr, nullptr, false) != CL_SUCCESS) { + DevLogPrintfError("Build failed for trap handler with error code: %d\n", retval); + asm_program->release(); } else { - DevLogError("Trap handler creation failed\n"); + if (asm_program->load()) { + trap_handler_ = asm_program; + } else { + DevLogError("Could not load the trap handler \n"); + asm_program->release(); + } } + } else { + DevLogError("Trap handler creation failed\n"); } blitProgram_ = new BlitProgram(context_); diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 5461430639..2c7d0e539d 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -38,7 +38,6 @@ #include "device/pal/palappprofile.hpp" #include "device/pal/palcapturemgr.hpp" #include "device/pal/palsignal.hpp" -#include "acl.h" #include "memory" #include @@ -58,16 +57,6 @@ namespace amd::pal { //! A nil device object class NullDevice : public amd::Device { - protected: -#if defined(WITH_COMPILER_LIB) - static Compiler* compiler_; -#endif - - public: -#if defined(WITH_COMPILER_LIB) - Compiler* compiler() const { return compiler_; } -#endif - public: static bool init(void); diff --git a/projects/clr/rocclr/device/pal/palgpuopen.cpp b/projects/clr/rocclr/device/pal/palgpuopen.cpp index de16c0ab0a..c8875ae9a4 100644 --- a/projects/clr/rocclr/device/pal/palgpuopen.cpp +++ b/projects/clr/rocclr/device/pal/palgpuopen.cpp @@ -358,7 +358,7 @@ Pal::Result RgpCaptureMgr::CheckForTraceResults() { // ================================================================================================ // Called after a swap chain presents. This signals a (next) frame-begin boundary and is // used to coordinate RGP trace start/stop. -void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, +void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) { // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); diff --git a/projects/clr/rocclr/device/pal/palgpuopen.hpp b/projects/clr/rocclr/device/pal/palgpuopen.hpp index b6fc464410..f7c872d1cd 100644 --- a/projects/clr/rocclr/device/pal/palgpuopen.hpp +++ b/projects/clr/rocclr/device/pal/palgpuopen.hpp @@ -41,7 +41,7 @@ namespace amd::pal { class Settings; class Device; class VirtualGPU; -class HSAILKernel; +class Kernel; // ================================================================================================ enum class RgpSqqtBarrierReason : uint32_t { @@ -99,7 +99,7 @@ class RgpCaptureMgr final : public ICaptureMgr { static RgpCaptureMgr* Create(Pal::IPlatform* platform, const Device& device); - void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, + void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) override; void PostDispatch(VirtualGPU* gpu) override; @@ -230,7 +230,7 @@ class RgpCaptureMgr { Pal::SubmitInfo& submitInfo) const { return Pal::Result::Success; } - void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, size_t z) {} + void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) {} void PostDispatch(VirtualGPU* gpu) {} void FinishRGPTrace(VirtualGPU* gpu, bool aborted) {} bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const { diff --git a/projects/clr/rocclr/device/pal/palkernel.cpp b/projects/clr/rocclr/device/pal/palkernel.cpp index ad351358cb..4d51bd2729 100644 --- a/projects/clr/rocclr/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/device/pal/palkernel.cpp @@ -25,7 +25,6 @@ #include "device/pal/palsched.hpp" #include "platform/commandqueue.hpp" #include "utils/options.hpp" -#include "hsailctx.hpp" #include #include #include @@ -36,9 +35,9 @@ namespace amd::pal { -void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize, - const uint32_t groupSegmentSize, const uint16_t numSGPRs, - const uint16_t numVGPRs) { +void Kernel::setWorkGroupInfo(const uint32_t privateSegmentSize, + const uint32_t groupSegmentSize, const uint16_t numSGPRs, + const uint16_t numVGPRs) { workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t); // Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave constexpr uint32_t ScratchRegAlignment = 256; @@ -71,7 +70,7 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize, static_cast(workGroupInfo_.availableLDSSize_ - workGroupInfo_.localMemSize_); } -bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) { +bool Kernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) { if (!sym) { return false; } @@ -86,185 +85,94 @@ bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t return true; } -HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel) +Kernel::Kernel(std::string name, pal::Program* prog, bool internalKernel) : device::Kernel(prog->device(), name, *prog), index_(0), code_(0), codeSize_(0) { flags_.hsa_ = true; flags_.internalKernel_ = internalKernel; } -HSAILKernel::~HSAILKernel() {} +Kernel::~Kernel() {} -bool HSAILKernel::postLoad() { return true; } +bool Kernel::postLoad() { + if (codeObjectVer() == 2) { + symbolName_ = name(); + } -bool HSAILKernel::init() { -#if defined(WITH_COMPILER_LIB) + // Copy codeobject of this kernel from the program CPU segment hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; - std::string openClKernelName = openclMangledName(name()); - amd::hsa::loader::Symbol* sym = prog().getSymbol(openClKernelName.c_str(), &agent); - if (!sym) { - LogPrintfError("Error: Getting kernel ISA code symbol %s from AMD HSA Code Object failed.\n", - openClKernelName.c_str()); + + auto sym = prog().getSymbol(symbolName().c_str(), &agent); + + if (!setKernelDescriptor(sym, &akd_)) { return false; } - - amd_kernel_code_t* akc = &akc_; - - if (!setKernelCode(sym, akc)) { - LogError("Error: setKernelCode() failed."); + if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, + reinterpret_cast(&kernelHasDynamicCallStack_))) { return false; } + if (!prog().isNull()) { + codeSize_ = prog().codeSegGpu().owner()->getSize(); - if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE, - reinterpret_cast(&codeSize_))) { - LogError("Error: sym->GetInfo() failed."); - return false; + // handle device enqueue + if (!RuntimeHandle().empty()) { + amd::hsa::loader::Symbol* rth_symbol; + + // Get the runtime handle symbol GPU address + rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent); + uint64_t symbol_address; + rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); + + // Copy the kernel_object pointer to the runtime handle symbol GPU address + const Memory& codeSegGpu = prog().codeSegGpu(); + uint64_t offset = symbol_address - codeSegGpu.vmAddress(); + uint64_t kernel_object = gpuAqlCode(); + VirtualGPU* gpu = codeSegGpu.dev().xferQueue(); + + const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()}; + + codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true); + } } // Setup the the workgroup info - setWorkGroupInfo(akc->workitem_private_segment_byte_size, akc->workgroup_group_segment_byte_size, - akc->wavefront_sgpr_count, akc->workitem_vgpr_count); - - workgroupGroupSegmentByteSize_ = workGroupInfo_.usedLDSSize_; - kernargSegmentByteSize_ = akc->kernarg_segment_byte_size; - - // Pull out metadata from the ELF - size_t sizeOfArgList; - acl_error error = - amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY, - openClKernelName.c_str(), nullptr, &sizeOfArgList); - if (error != ACL_SUCCESS) { - return false; - } - - char* aclArgList = new char[sizeOfArgList]; - if (nullptr == aclArgList) { - return false; - } - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_ARGUMENT_ARRAY, - openClKernelName.c_str(), aclArgList, &sizeOfArgList); - if (error != ACL_SUCCESS) { - return false; - } - // Set the argList - InitParameters(reinterpret_cast(aclArgList), argsBufferSize()); - delete[] aclArgList; - - size_t sizeOfWorkGroupSize; - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE, - openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize); - if (error != ACL_SUCCESS) { - return false; - } - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_WORK_GROUP_SIZE, - openClKernelName.c_str(), workGroupInfo_.compileSize_, - &sizeOfWorkGroupSize); - if (error != ACL_SUCCESS) { - return false; - } + setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(), + workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_); // Copy wavefront size workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_; - // Find total workgroup size - if (workGroupInfo_.compileSize_[0] != 0) { - workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] * - workGroupInfo_.compileSize_[2]; - } else { - workGroupInfo_.size_ = device().info().preferredWorkGroupSize_; + workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_; + if (workGroupInfo_.size_ == 0) { + return false; + } + if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) { + workGroupInfo_.scratchRegs_ = + std::max(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t)); + workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t); + workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t); } - // Pull out printf metadata from the ELF - size_t sizeOfPrintfList; - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY, - openClKernelName.c_str(), nullptr, &sizeOfPrintfList); - if (error != ACL_SUCCESS) { + // handle the printf metadata if any + std::vector printfStr; + if (!GetPrintfStr(&printfStr)) { return false; } - // Make sure kernel has any printf info - if (0 != sizeOfPrintfList) { - char* aclPrintfList = new char[sizeOfPrintfList]; - if (nullptr == aclPrintfList) { - return false; - } - error = - amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_GPU_PRINTF_ARRAY, - openClKernelName.c_str(), aclPrintfList, &sizeOfPrintfList); - if (error != ACL_SUCCESS) { - return false; - } - - // Set the PrintfList - InitPrintf(reinterpret_cast(aclPrintfList)); - delete[] aclPrintfList; + if (!printfStr.empty()) { + InitPrintf(printfStr); } - aclMetadata md; - md.enqueue_kernel = false; - size_t sizeOfDeviceEnqueue = sizeof(md.enqueue_kernel); - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_DEVICE_ENQUEUE, - openClKernelName.c_str(), &md.enqueue_kernel, &sizeOfDeviceEnqueue); - if (error != ACL_SUCCESS) { - return false; - } - flags_.dynamicParallelism_ = md.enqueue_kernel; - - md.kernel_index = -1; - size_t sizeOfIndex = sizeof(md.kernel_index); - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_KERNEL_INDEX, - openClKernelName.c_str(), &md.kernel_index, &sizeOfIndex); - if (error != ACL_SUCCESS) { - return false; - } - index_ = md.kernel_index; - - size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_); - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), - RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(), - &workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint); - if (error != ACL_SUCCESS) { - return false; - } - - size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_); - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), - RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(), - workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint); - if (error != ACL_SUCCESS) { - return false; - } - - size_t sizeOfVecTypeHint; - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT, - openClKernelName.c_str(), NULL, &sizeOfVecTypeHint); - if (error != ACL_SUCCESS) { - return false; - } - - if (0 != sizeOfVecTypeHint) { - char* VecTypeHint = new char[sizeOfVecTypeHint + 1]; - if (NULL == VecTypeHint) { - return false; - } - error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), RT_VEC_TYPE_HINT, - openClKernelName.c_str(), VecTypeHint, &sizeOfVecTypeHint); - if (error != ACL_SUCCESS) { - return false; - } - VecTypeHint[sizeOfVecTypeHint] = '\0'; - workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint); - delete[] VecTypeHint; - } - -#endif // defined(WITH_COMPILER_LIB) return true; } -const HSAILProgram& HSAILKernel::prog() const { - return reinterpret_cast(prog_); +bool Kernel::init() { + return GetAttrCodePropMetadata(); } -// ================================================================================================ -hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel, +const pal::Program& Kernel::prog() const { + return reinterpret_cast(prog_); +} + +hsa_kernel_dispatch_packet_t* Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel, const amd::NDRangeContainer& sizes, const_address params, size_t ldsAddress, uint64_t vmDefQueue, @@ -496,86 +404,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const return hsaDisp; } -// ================================================================================================ -const LightningProgram& LightningKernel::prog() const { - return reinterpret_cast(prog_); -} - -#if defined(USE_COMGR_LIBRARY) -bool LightningKernel::init() { return GetAttrCodePropMetadata(); } - -bool LightningKernel::postLoad() { - if (codeObjectVer() == 2) { - symbolName_ = name(); - } - - // Copy codeobject of this kernel from the program CPU segment - hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; - - auto sym = prog().getSymbol(symbolName().c_str(), &agent); - - if (!setKernelDescriptor(sym, &akd_)) { - return false; - } - if (!sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK, - reinterpret_cast(&kernelHasDynamicCallStack_))) { - return false; - } - if (!prog().isNull()) { - codeSize_ = prog().codeSegGpu().owner()->getSize(); - - // handle device enqueue - if (!RuntimeHandle().empty()) { - amd::hsa::loader::Symbol* rth_symbol; - - // Get the runtime handle symbol GPU address - rth_symbol = prog().getSymbol(RuntimeHandle().c_str(), &agent); - uint64_t symbol_address; - rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); - - // Copy the kernel_object pointer to the runtime handle symbol GPU address - const Memory& codeSegGpu = prog().codeSegGpu(); - uint64_t offset = symbol_address - codeSegGpu.vmAddress(); - uint64_t kernel_object = gpuAqlCode(); - VirtualGPU* gpu = codeSegGpu.dev().xferQueue(); - - const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()}; - - codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true); - } - } - - // Setup the the workgroup info - setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(), - workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_); - - // Copy wavefront size - workGroupInfo_.wavefrontSize_ = device().info().wavefrontWidth_; - workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_; - if (workGroupInfo_.size_ == 0) { - return false; - } - if ((workGroupInfo_.usedStackSize_ & 0x1) == 0x1) { - workGroupInfo_.scratchRegs_ = - std::max(device().StackSize(), workGroupInfo_.scratchRegs_ * sizeof(uint32_t)); - workGroupInfo_.scratchRegs_ = amd::alignUp(workGroupInfo_.scratchRegs_, 16) / sizeof(uint32_t); - workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t); - } - - // handle the printf metadata if any - std::vector printfStr; - if (!GetPrintfStr(&printfStr)) { - return false; - } - - if (!printfStr.empty()) { - InitPrintf(printfStr); - } - - return true; -} - -bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym, +bool Kernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd) { if (!sym) { return false; @@ -591,6 +420,4 @@ bool LightningKernel::setKernelDescriptor(amd::hsa::loader::Symbol* sym, return true; } -#endif // defined(USE_COMGR_LIBRARY) - } // namespace amd::pal diff --git a/projects/clr/rocclr/device/pal/palkernel.hpp b/projects/clr/rocclr/device/pal/palkernel.hpp index e94ff6a754..ccbd9a2093 100644 --- a/projects/clr/rocclr/device/pal/palkernel.hpp +++ b/projects/clr/rocclr/device/pal/palkernel.hpp @@ -52,17 +52,16 @@ namespace amd::pal { class VirtualGPU; class Device; class NullDevice; -class HSAILProgram; -class LightningProgram; +class Program; /*! \addtogroup pal PAL Device Implementation * @{ */ -class HSAILKernel : public device::Kernel { +class Kernel : public device::Kernel { public: - HSAILKernel(std::string name, HSAILProgram* prog, bool internalKernel); + Kernel(std::string name, pal::Program* prog, bool internalKernel); - virtual ~HSAILKernel(); + virtual ~Kernel(); //! Initializes the metadata required for this kernel, bool init(); @@ -80,7 +79,7 @@ class HSAILKernel : public device::Kernel { } //! Returns HSA program associated with this kernel - const HSAILProgram& prog() const; + const pal::Program& prog() const; //! Returns LDS size used in this kernel uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); } @@ -119,12 +118,15 @@ class HSAILKernel : public device::Kernel { //! Returns the kernel index in the program uint index() const { return index_; } + //! Get the kernel descriptor and copy the code object from the program CPU segment + bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd); + private: //! Disable copy constructor - HSAILKernel(const HSAILKernel&); + Kernel(const pal::Kernel&); //! Disable operator= - HSAILKernel& operator=(const HSAILKernel&); + Kernel& operator=(const pal::Kernel&); protected: //! Get the kernel code and copy the code object from the program CPU segment @@ -144,24 +146,5 @@ class HSAILKernel : public device::Kernel { size_t codeSize_; //!< Size of ISA code }; -class LightningKernel : public HSAILKernel { - public: - LightningKernel(const std::string& name, HSAILProgram* prog, bool internalKernel) - : HSAILKernel(name, prog, internalKernel) {} - - //! Returns Lightning program associated with this kernel - const LightningProgram& prog() const; - -#if defined(USE_COMGR_LIBRARY) - //! Get the kernel descriptor and copy the code object from the program CPU segment - bool setKernelDescriptor(amd::hsa::loader::Symbol* sym, llvm::amdhsa::kernel_descriptor_t* akd); - //! Initializes the metadata required for this kernel - bool init(); - - //! Setup after code object loading - bool postLoad(); -#endif -}; - /*@}*/ // namespace amd::pal } // namespace amd::pal diff --git a/projects/clr/rocclr/device/pal/palprogram.cpp b/projects/clr/rocclr/device/pal/palprogram.cpp index 1fc3897235..ba5c5e153a 100644 --- a/projects/clr/rocclr/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/device/pal/palprogram.cpp @@ -20,7 +20,6 @@ #include "os/os.hpp" #include "utils/flags.hpp" -#include "aclTypes.h" #include "device/pal/palprogram.hpp" #include "device/pal/palblit.hpp" #include "utils/options.hpp" @@ -67,7 +66,7 @@ bool Segment::gpuAddressOffset(uint64_t offAddr, size_t* offset) { return true; } -bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align, +bool Segment::alloc(pal::Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align, bool zero) { if (prog.isNull()) { LogError("[OCL] cannot create a mem object on an offline device!"); @@ -174,8 +173,9 @@ bool Segment::freeze(bool destroySysmem) { return result; } -HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) - : Program(device, owner), +// ================================================================================================ +Program::Program(Device& device, amd::Program& owner) + : device::Program(device, owner), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), @@ -186,10 +186,11 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) loaderContext_(this) { assert(device.isOnline()); loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); + isHIP_ = (owner.language() == amd::Program::HIP); } -HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) - : Program(device, owner), +Program::Program(NullDevice& device, amd::Program& owner) + : device::Program(device, owner), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), @@ -201,26 +202,14 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) assert(!device.isOnline()); isNull_ = true; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); + isHIP_ = (owner.language() == amd::Program::HIP); } -HSAILProgram::~HSAILProgram() { +Program::~Program() { // Destroy internal static samplers for (auto& it : staticSamplers_) { delete it; } -#if defined(WITH_COMPILER_LIB) - if (rawBinary_ != nullptr) { - amd::Hsail::FreeMem(binaryElf_, rawBinary_); - } - acl_error error; - // Free the elf binary - if (binaryElf_ != nullptr) { - error = amd::Hsail::BinaryFini(binaryElf_); - if (error != ACL_SUCCESS) { - LogWarning("Error while destroying the acl binary \n"); - } - } -#endif // defined(WITH_COMPILER_LIB) releaseClBinary(); if (executable_) { loader_->DestroyExecutable(executable_); @@ -233,15 +222,6 @@ HSAILProgram::~HSAILProgram() { } } - -inline static std::vector splitSpaceSeparatedString(char* str) { - std::string s(str); - std::stringstream ss(s); - std::istream_iterator beg(ss), end; - std::vector vec(beg, end); - return vec; -} - inline static std::string GetUriFromMemoryAddress(const void* memory, size_t size) { int pid = amd::Os::getProcessId(); std::ostringstream uri_stream; @@ -250,100 +230,7 @@ inline static std::string GetUriFromMemoryAddress(const void* memory, size_t siz return uri_stream.str(); } -bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, - bool internalKernel) { -#if defined(WITH_COMPILER_LIB) - // ACL_TYPE_CG stage is not performed for offline compilation - executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr); - if (executable_ == nullptr) { - buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n"; - return false; - } - size_t size = binSize; - hsa_code_object_t code_object; - code_object.handle = reinterpret_cast(binary); - - hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; - auto uri = GetUriFromMemoryAddress(binary, binSize); - hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: AMD HSA Code Object loading failed.\n"; - return false; - } - status = loader_->FreezeExecutable(executable_, nullptr); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: AMD HSA Code Object freeze failed.\n"; - return false; - } - - size_t kernelNamesSize = 0; - acl_error errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, - RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: Querying of kernel names size from the binary failed.\n"; - return false; - } - if (kernelNamesSize > 0) { - std::vector kernelNames(kernelNamesSize); - errorCode = amd::Hsail::QueryInfo(palNullDevice().compiler(), binaryElf_, RT_KERNEL_NAMES, - nullptr, kernelNames.data(), &kernelNamesSize); - if (errorCode != ACL_SUCCESS) { - buildLog_ += "Error: Querying of kernel names from the binary failed.\n"; - return false; - } - std::vector vKernels = splitSpaceSeparatedString(kernelNames.data()); - for (const auto& it : vKernels) { - std::string kernelName(it); - - HSAILKernel* aKernel = new HSAILKernel(kernelName, this, internalKernel); - addKernel(aKernel); - - if (!aKernel->init()) { - buildLog_ += "Error: Kernel initialization failed.\n"; - return false; - } - - aKernel->setUniformWorkGroupSize(useUniformWorkGroupSize); - } - } - - DestroySegmentCpuAccess(); -#endif // defined(WITH_COMPILER_LIB) - return true; -} - -bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset, - std::string uri) { -#if defined(WITH_COMPILER_LIB) - if (!device().isOnline()) { - return true; - } - - bool dynamicParallelism = false; - for (auto& kit : kernels()) { - HSAILKernel* aKernel = static_cast(kit.second); - if (!aKernel->postLoad()) { - return false; - } - dynamicParallelism |= aKernel->dynamicParallelism(); - // Find max scratch regs used in the program. It's used for scratch buffer preallocation - // with dynamic parallelism, since runtime doesn't know which child kernel will be called - maxScratchRegs_ = - std::max(static_cast(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_); - maxVgprs_ = std::max(static_cast(aKernel->workGroupInfo()->usedVGPRs_), maxVgprs_); - } - - // Allocate kernel table for device enqueuing - if (!isNull() && dynamicParallelism && !allocKernelTable()) { - return false; - } -#endif // defined(WITH_COMPILER_LIB) - return true; -} - -bool HSAILProgram::createBinary(amd::option::Options* options) { return true; } - -bool HSAILProgram::allocKernelTable() { +bool Program::allocKernelTable() { if (isNull()) { // Cannot create a kernel table for offline devices. return false; @@ -359,7 +246,7 @@ bool HSAILProgram::allocKernelTable() { } else { size_t* table = reinterpret_cast(kernels_->map(nullptr, pal::Resource::WriteOnly)); for (auto& it : kernels()) { - HSAILKernel* kernel = static_cast(it.second); + pal::Kernel* kernel = static_cast(it.second); table[kernel->index()] = static_cast(kernel->gpuAqlCode()); } kernels_->unmap(nullptr); @@ -367,41 +254,9 @@ bool HSAILProgram::allocKernelTable() { return true; } -void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); } +void Program::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); } -#if defined(WITH_COMPILER_LIB) -const aclTargetInfo& HSAILProgram::info() { - acl_error err; - info_ = amd::Hsail::GetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail", - device().isa().hsailName(), &err); - if (err != ACL_SUCCESS) { - LogWarning("aclGetTargetInfo failed"); - } - return info_; -} -#endif - -bool HSAILProgram::saveBinaryAndSetType(type_t type) { -#if defined(WITH_COMPILER_LIB) - // Write binary to memory - if (rawBinary_ != nullptr) { - // Free memory containing rawBinary - amd::Hsail::FreeMem(binaryElf_, rawBinary_); - rawBinary_ = nullptr; - } - size_t size = 0; - if (amd::Hsail::WriteToMem(binaryElf_, &rawBinary_, &size) != ACL_SUCCESS) { - buildLog_ += "Failed to write binary to memory \n"; - return false; - } - setBinary(static_cast(rawBinary_), size); - // Set the type of binary - setType(type); -#endif // defined(WITH_COMPILER_LIB) - return true; -} - -bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) { +bool Program::defineGlobalVar(const char* name, void* dptr) { if (!device().isOnline()) { return false; } @@ -419,7 +274,7 @@ bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) { return true; } -bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes, +bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes, const char* global_name) const { if (!device().isOnline()) { return false; @@ -528,6 +383,107 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p return true; } +bool Program::createBinary(amd::option::Options* options) { + if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { + LogError("Failed to create ELF binary image!"); + return false; + } + return true; +} + +bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, + bool internalKernel) { + // Skip metadata look-up and kernel creation for assembly and internal kernel. + // @note: Runtime compiles only the second level trap handler from assembly + if ((owner()->language() != amd::Program::Assembly) || !internal_) { + // Find the size of global variables from the binary + if (!FindGlobalVarSize(binary, binSize)) { + buildLog_ += "Error: Cannot Find Global Var Sizes\n"; + return false; + } + + for (const auto& kernelMeta : kernelMetadataMap_) { + auto kernelName = kernelMeta.first; + auto kernel = new pal::Kernel(kernelName, this, internalKernel); + if (kernel == nullptr) { + return false; + } + if (!kernel->init()) { + buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n"; + return false; + } + addKernel(kernel); + + if (codeObjectVer() < 5) { + kernel->setUniformWorkGroupSize(useUniformWorkGroupSize); + } + } + } + executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr); + if (executable_ == nullptr) { + LogError("Error: Executable for AMD HSA Code Object isn't created."); + return false; + } + + hsa_code_object_t code_object; + code_object.handle = reinterpret_cast(binary); + + hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; + auto uri = GetUriFromMemoryAddress(binary, binSize); + hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri); + if (status != HSA_STATUS_SUCCESS) { + LogError("Error: AMD HSA Code Object loading failed."); + return false; + } + + if (isInternal() && (owner()->language() == amd::Program::Assembly)) { + // Don't register trap handler with the debugger, since user shouldn't see this kernel + status = executable_->Freeze(nullptr); + trapHandler_ = true; + } else { + status = loader_->FreezeExecutable(executable_, nullptr); + } + if (status != HSA_STATUS_SUCCESS) { + LogError("Error: Freezing the executable failed."); + return false; + } + return true; +} + +bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, + size_t foffset, std::string uri) { + // Collect the information about compiled binary, except the trap handler + if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) { + apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize, + codeSegGpu_->iMem(), codeSegGpu_->offset()); + } + + for (auto& kit : kernels()) { + pal::Kernel* kernel = static_cast(kit.second); + if (!kernel->postLoad()) { + return false; + } + // Find max scratch regs used in the program. It's used for scratch buffer preallocation + // with dynamic parallelism, since runtime doesn't know which child kernel will be called + maxScratchRegs_ = + std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_); + maxVgprs_ = std::max(static_cast(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_); + } + DestroySegmentCpuAccess(); + return true; +} + +uint64_t Program::GetTrapHandlerAddress() const { + uint64_t address = 0; + hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; + auto trap_sym = executable_->GetSymbol("trap_entry", &agent); + if (trap_sym != nullptr) { + trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address); + } + return address; +} + +// ================================================================================================ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) { const amd::Isa* isa_p = amd::Isa::findIsa(name); return {amd::Isa::toHandle(isa_p)}; @@ -696,12 +652,10 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent, if (!sampler_handle.handle) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - // Samplers will be destroyed by the pal::HSAILProgam destructor. + // Samplers will be destroyed by the pal::Program destructor. return HSA_STATUS_SUCCESS; } -#if defined(USE_COMGR_LIBRARY) - static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol, void* data) { auto symbol = amd::hsa::loader::Symbol::Object(hSymbol); @@ -729,115 +683,4 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl return HSA_STATUS_SUCCESS; } -#endif // defined(USE_COMGR_LIBRARY) - -bool LightningProgram::createBinary(amd::option::Options* options) { -#if defined(USE_COMGR_LIBRARY) - if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { - LogError("Failed to create ELF binary image!"); - return false; - } -#endif // defined(USE_COMGR_LIBRARY) - return true; -} - -// ================================================================================================ -bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, - bool internalKernel) { -#if defined(USE_COMGR_LIBRARY) - // Skip metadata look-up and kernel creation for assembly and internal kernel. - // @note: Runtime compiles only the second level trap handler from assembly - if ((owner()->language() != amd::Program::Assembly) || !internal_) { - // Find the size of global variables from the binary - if (!FindGlobalVarSize(binary, binSize)) { - buildLog_ += "Error: Cannot Find Global Var Sizes\n"; - return false; - } - - for (const auto& kernelMeta : kernelMetadataMap_) { - auto kernelName = kernelMeta.first; - auto kernel = new LightningKernel(kernelName, this, internalKernel); - if (kernel == nullptr) { - return false; - } - if (!kernel->init()) { - buildLog_ += "[ROC][Kernel] Could not get Code Prop Meta Data \n"; - return false; - } - addKernel(kernel); - - if (codeObjectVer() < 5) { - kernel->setUniformWorkGroupSize(useUniformWorkGroupSize); - } - } - } - executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr); - if (executable_ == nullptr) { - LogError("Error: Executable for AMD HSA Code Object isn't created."); - return false; - } - - hsa_code_object_t code_object; - code_object.handle = reinterpret_cast(binary); - - hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; - auto uri = GetUriFromMemoryAddress(binary, binSize); - hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr, uri); - if (status != HSA_STATUS_SUCCESS) { - LogError("Error: AMD HSA Code Object loading failed."); - return false; - } - - if (isInternal() && (owner()->language() == amd::Program::Assembly)) { - // Don't register trap handler with the debugger, since user shouldn't see this kernel - status = executable_->Freeze(nullptr); - trapHandler_ = true; - } else { - status = loader_->FreezeExecutable(executable_, nullptr); - } - if (status != HSA_STATUS_SUCCESS) { - LogError("Error: Freezing the executable failed."); - return false; - } -#endif - return true; -} - -// ================================================================================================ -bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, - size_t foffset, std::string uri) { -#if defined(USE_COMGR_LIBRARY) - // Collect the information about compiled binary, except the trap handler - if (!isNull() && (palDevice().captureMgr() != nullptr) && !isTrapHandler()) { - apiHash_ = palDevice().captureMgr()->AddElfBinary(binary, binSize, binary, binSize, - codeSegGpu_->iMem(), codeSegGpu_->offset()); - } - - for (auto& kit : kernels()) { - LightningKernel* kernel = static_cast(kit.second); - if (!kernel->postLoad()) { - return false; - } - // Find max scratch regs used in the program. It's used for scratch buffer preallocation - // with dynamic parallelism, since runtime doesn't know which child kernel will be called - maxScratchRegs_ = - std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_); - maxVgprs_ = std::max(static_cast(kernel->workGroupInfo()->usedVGPRs_), maxVgprs_); - } - DestroySegmentCpuAccess(); -#endif // defined(USE_COMGR_LIBRARY) - return true; -} - -// ================================================================================================ -uint64_t LightningProgram::GetTrapHandlerAddress() const { - uint64_t address = 0; - hsa_agent_t agent = {amd::Device::toHandle(&(device()))}; - auto trap_sym = executable_->GetSymbol("trap_entry", &agent); - if (trap_sym != nullptr) { - trap_sym->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &address); - } - return address; -} - } // namespace amd::pal diff --git a/projects/clr/rocclr/device/pal/palprogram.hpp b/projects/clr/rocclr/device/pal/palprogram.hpp index 7ae4cc0926..f10b9dbe95 100644 --- a/projects/clr/rocclr/device/pal/palprogram.hpp +++ b/projects/clr/rocclr/device/pal/palprogram.hpp @@ -44,7 +44,7 @@ namespace amd::pal { */ using namespace amd::hsa::loader; -class HSAILProgram; +class Program; class Segment : public amd::HeapObject { public: @@ -52,7 +52,7 @@ class Segment : public amd::HeapObject { ~Segment(); //! Allocates a segment - bool alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align, + bool alloc(Program& prog, amdgpu_hsa_elf_segment_t segment, size_t size, size_t align, bool zero); //! Copies data from host to the segment @@ -81,7 +81,7 @@ class Segment : public amd::HeapObject { class PALHSALoaderContext final : public hsa::loader::Context { public: - PALHSALoaderContext(HSAILProgram* program) : program_(program) {} + PALHSALoaderContext(pal::Program* program) : program_(program) {} virtual ~PALHSALoaderContext() {} @@ -127,26 +127,26 @@ class PALHSALoaderContext final : public hsa::loader::Context { const hsa_ext_sampler_descriptor_t* sampler_descriptor, hsa_ext_sampler_t* sampler_handle) override; - //! All samplers are owned by HSAILProgram and are deleted in its destructor. + //! All samplers are owned by pal program and are deleted in its destructor. hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override; private: PALHSALoaderContext(const PALHSALoaderContext& c); PALHSALoaderContext& operator=(const PALHSALoaderContext& c); - pal::HSAILProgram* program_; + pal::Program* program_; }; -//! \class HSAIL program -class HSAILProgram : public device::Program { +//! \class pal program +class Program : public device::Program { friend class ClBinary; public: //! Default constructor - HSAILProgram(Device& device, amd::Program& owner); - HSAILProgram(NullDevice& device, amd::Program& owner); + Program(Device& device, amd::Program& owner); + Program(NullDevice& device, amd::Program& owner); //! Default destructor - virtual ~HSAILProgram(); + virtual ~Program(); void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); } @@ -201,14 +201,9 @@ class HSAILProgram : public device::Program { //! Returns API hash value of the program for RGP thread trace uint64_t ApiHash() const { return apiHash_; } - protected: - bool saveBinaryAndSetType(type_t type); + //! Returns the load address of the trap handler + uint64_t GetTrapHandlerAddress() const; - virtual bool createBinary(amd::option::Options* options); - -#if defined(WITH_COMPILER_LIB) - virtual const aclTargetInfo& info(); -#endif virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, bool internalKernel) override; @@ -216,6 +211,9 @@ class HSAILProgram : public device::Program { amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0, std::string uri = std::string()) override; + virtual bool createBinary(amd::option::Options* options) override; + + protected: //! Destroys CPU allocations in the code segment void DestroySegmentCpuAccess() const { if (codeSegment_ != nullptr) { @@ -229,10 +227,10 @@ class HSAILProgram : public device::Program { private: //! Disable default copy constructor - HSAILProgram(const HSAILProgram&); + Program(const Program&); //! Disable operator= - HSAILProgram& operator=(const HSAILProgram&); + Program& operator=(const Program&); protected: //! Allocate kernel table @@ -256,31 +254,5 @@ class HSAILProgram : public device::Program { PALHSALoaderContext loaderContext_; //!< Context for HSA Loader }; -//! \class Lightning Compiler Program -class LightningProgram : public HSAILProgram { - public: - LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) { - isLC_ = true; - isHIP_ = (owner.language() == amd::Program::HIP); - } - - LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) { - isLC_ = true; - isHIP_ = (owner.language() == amd::Program::HIP); - } - virtual ~LightningProgram() {} - uint64_t GetTrapHandlerAddress() const; - - protected: - virtual bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, - bool internalKernel) override; - - virtual bool setKernels(void* binary, size_t binSize, - amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0, - std::string uri = std::string()) override; - - virtual bool createBinary(amd::option::Options* options) override; -}; - /*@}*/ // namespace amd::pal } // namespace amd::pal diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index c7be6ab9ec..c7a8c50340 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -119,10 +119,8 @@ Settings::Settings() { std::min(static_cast(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_); maxCmdBuffers_ = 12; - useLightning_ = amd::IS_HIP ? true : ((!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : false); enableWgpMode_ = false; enableWave32Mode_ = false; - hsailExplicitXnack_ = false; lcWavefrontSize64_ = true; enableHwP2P_ = false; imageBufferWar_ = false; @@ -152,7 +150,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, } enableXNACK_ = (isa.xnack() == amd::Isa::Feature::Enabled); - hsailExplicitXnack_ = enableXNACK_; bool useWavefront64 = false; std::string appName = {}; @@ -192,11 +189,8 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Navi14: case Pal::AsicRevision::Navi12: case Pal::AsicRevision::Navi10: - useLightning_ = GPU_ENABLE_LC; enableWgpMode_ = GPU_ENABLE_WGP_MODE; - if (useLightning_) { - enableWave32Mode_ = true; - } + enableWave32Mode_ = true; if (!flagIsDefault(GPU_ENABLE_WAVE32_MODE)) { enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE; } @@ -211,9 +205,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, enableHwP2P_ = true; enableCoopGroups_ = IS_LINUX; enableCoopMultiDeviceGroups_ = IS_LINUX; - if (useLightning_) { - singleFpDenorm_ = true; - } + singleFpDenorm_ = true; enableExtension(ClKhrFp16); threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE; // Cache line size is 64 bytes @@ -279,11 +271,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, enableExtension(ClAmdCopyBufferP2P); } - if (!useLightning_) { - enableExtension(ClAmdPopcnt); - enableExtension(ClAmdVec3); - enableExtension(ClAmdPrintf); - } // Enable some platform extensions enableExtension(ClAmdDeviceAttributeQuery); @@ -306,12 +293,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, enableExtension(ClKhrFp64); } - if (!useLightning_) { - // Enable AMD double precision extension - doublePrecision_ = true; - enableExtension(ClAmdFp64); - } - if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) { // Enable bus addressable memory extension enableExtension(ClAMDBusAddressableMemory); diff --git a/projects/clr/rocclr/device/pal/palubercapturemgr.cpp b/projects/clr/rocclr/device/pal/palubercapturemgr.cpp index 650c8e2a08..d0153d1448 100644 --- a/projects/clr/rocclr/device/pal/palubercapturemgr.cpp +++ b/projects/clr/rocclr/device/pal/palubercapturemgr.cpp @@ -216,7 +216,7 @@ bool UberTraceCaptureMgr::Init(Pal::IPlatform* platform) { } // ================================================================================================ -void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, +void UberTraceCaptureMgr::PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) { // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); diff --git a/projects/clr/rocclr/device/pal/palubercapturemgr.hpp b/projects/clr/rocclr/device/pal/palubercapturemgr.hpp index c3bce414c8..8d8009711f 100644 --- a/projects/clr/rocclr/device/pal/palubercapturemgr.hpp +++ b/projects/clr/rocclr/device/pal/palubercapturemgr.hpp @@ -45,7 +45,7 @@ class UberTraceCaptureMgr final : public ICaptureMgr { bool Update(Pal::IPlatform* platform) override; - void PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y, + void PreDispatch(VirtualGPU* gpu, const pal::Kernel& kernel, size_t x, size_t y, size_t z) override; void PostDispatch(VirtualGPU* gpu) override; diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 9cd3be0f95..33445a1ceb 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -2346,7 +2346,7 @@ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { } // ================================================================================================ -void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQueue) { +void VirtualGPU::PrintChildren(const pal::Kernel& hsaKernel, VirtualGPU* gpuDefQueue) { AmdAqlWrap* wraps = (AmdAqlWrap*)(&((AmdVQueueHeader*)gpuDefQueue->virtualQueue_->data())[1]); uint p = 0; for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) { @@ -2381,11 +2381,11 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ print << wraps[i].aql.grid_size_y << ", "; print << wraps[i].aql.grid_size_z << "]\n"; - HSAILKernel* child = nullptr; + pal::Kernel* child = nullptr; for (auto it = hsaKernel.prog().kernels().begin(); it != hsaKernel.prog().kernels().end(); ++it) { - if (wraps[i].aql.kernel_object == static_cast(it->second)->gpuAqlCode()) { - child = static_cast(it->second); + if (wraps[i].aql.kernel_object == static_cast(it->second)->gpuAqlCode()) { + child = static_cast(it->second); } } if (child == nullptr) { @@ -2449,7 +2449,7 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ } // ================================================================================================ -bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel, +bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel, VirtualGPU** gpuDefQueue, uint64_t* vmDefQueue) { amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev()); if (nullptr == defQueue) { @@ -2482,7 +2482,7 @@ bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& } // ================================================================================================ -void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel, +void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const pal::Kernel& hsaKernel, VirtualGPU* gpuDefQueue, uint64_t vmDefQueue, uint64_t vmParentWrap, GpuEvent* gpuEvent) { uint32_t id = gpuEvent->id_; @@ -2628,7 +2628,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const state_.anyOrder_ = anyOrder; // Get the HSA kernel object - const HSAILKernel& hsaKernel = static_cast(*(kernel.getDeviceKernel(dev()))); + const pal::Kernel& hsaKernel = static_cast(*(kernel.getDeviceKernel(dev()))); // If RGP capturing is enabled, then start SQTT trace if (rgpCaptureEna()) { @@ -2696,7 +2696,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const assert((nullptr != aqlPkt) && "Couldn't load kernel arguments"); // Dynamic call stack size is considered to calculate private segment size and scratch regs - // in LightningKernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike + // in pal::Kernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike // hipLaunchKernel/hipLaunchKernelGGL, Updated value is passed to dispatch packet. size_t privateMemSize = hsaKernel.spillSegSize(); if ((hsaKernel.workGroupInfo()->usedStackSize_ & 0x1) == 0x1) { @@ -2725,13 +2725,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const } dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlKd(); dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress(); - if (!hsaKernel.prog().isLC() && hsaKernel.workGroupInfo()->wavesPerSimdHint_ != 0) { - constexpr uint32_t kWavesPerSimdLimit = 4; - dispatchParam.wavesPerSh = - kWavesPerSimdLimit * dev().info().cuPerShaderArray_ * dev().info().simdPerCU_; - } else { - dispatchParam.wavesPerSh = 0; - } + dispatchParam.wavesPerSh = 0; dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false; dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize(); dispatchParam.aqlPacketIndex = aql_index; @@ -3584,7 +3578,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p bool srdResource = false; amd::Memory* const* memories = reinterpret_cast(params + kernelParams.memoryObjOffset()); - const HSAILKernel& hsaKernel = static_cast(*(kernel.getDeviceKernel(dev()))); + const pal::Kernel& hsaKernel = static_cast(*(kernel.getDeviceKernel(dev()))); const amd::KernelSignature& signature = kernel.signature(); ldsAddress = hsaKernel.ldsSize(); diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 98d276429b..30d216f7a6 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -45,14 +45,13 @@ namespace amd::pal { class Device; -class Kernel; class Memory; class CalCounterReference; class VirtualGPU; class Program; class BlitManager; class ThreadTrace; -class HSAILKernel; +class Kernel; struct AqlPacketMgmt : public amd::EmbeddedObject { static constexpr uint32_t kAqlPacketsListSize = 4 * Ki; @@ -693,19 +692,19 @@ class VirtualGPU : public device::VirtualDevice { amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData ); - void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel + void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel VirtualGPU* gpuDefQueue //!< Device queue for children execution ); bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object - const HSAILKernel& hsaKernel, //!< Parent HSAIL object + const pal::Kernel& hsaKernel, //!< Parent HSAIL object VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue ); void PostDeviceEnqueue( const amd::Kernel& kernel, //!< Parent amd kernel object - const HSAILKernel& hsaKernel, //!< Parent HSAIL object + const pal::Kernel& hsaKernel, //!< Parent HSAIL object VirtualGPU* gpuDefQueue, //!< GPU default queue uint64_t vmDefQueue, //!< VM handle to the virtual queue uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 03dae4db2f..2a21335698 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -125,8 +125,7 @@ bool NullDevice::create(const amd::Isa& isa) { info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " "; info_.spirVersions_ = ""; std::stringstream ss; - ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL"); - ss << ") [Offline]"; + ss << AMD_BUILD_STRING " (HSA,LC) [Offline]"; ::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1); info_.version_ = "OpenCL " OPENCL_VERSION_STR " "; return true; @@ -704,12 +703,7 @@ bool Device::create() { // ================================================================================================ device::Program* NullDevice::createProgram(amd::Program& owner, amd::option::Options* options) { - device::Program* program; - if (settings().useLightning_) { - program = new LightningProgram(*this, owner); - } else { - program = new HSAILProgram(*this, owner); - } + device::Program* program = new roc::Program(*this, owner); if (program == nullptr) { LogError("Memory allocation has failed!"); @@ -722,19 +716,15 @@ bool Device::createBlitProgram() { bool result = true; std::string extraKernel; -#if defined(USE_COMGR_LIBRARY) - if (settings().useLightning_) { - if (amd::IS_HIP) { - if (settings().gwsInitSupported_) { - extraKernel = device::HipExtraSourceCode; - } else { - extraKernel = device::HipExtraSourceCodeNoGWS; - } + if (amd::IS_HIP) { + if (settings().gwsInitSupported_) { + extraKernel = device::HipExtraSourceCode; } else { - extraKernel = SchedulerSourceCode; + extraKernel = device::HipExtraSourceCodeNoGWS; } + } else { + extraKernel = SchedulerSourceCode; } -#endif // USE_COMGR_LIBRARY blitProgram_ = new BlitProgram(context_); // Create blit programs @@ -749,12 +739,7 @@ bool Device::createBlitProgram() { } device::Program* Device::createProgram(amd::Program& owner, amd::option::Options* options) { - device::Program* program; - if (settings().useLightning_) { - program = new LightningProgram(*this, owner); - } else { - program = new HSAILProgram(*this, owner); - } + device::Program* program = new roc::Program(*this, owner); if (program == nullptr) { LogError("Memory allocation has failed!"); @@ -1305,9 +1290,7 @@ bool Device::populateOCLDeviceConstants() { return false; } std::stringstream ss; - ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << "," - << (settings().useLightning_ ? "LC" : "HSAIL"); - ss << ")"; + ss << AMD_BUILD_STRING " (HSA" << major << "." << minor << ",LC)"; ::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1); @@ -1480,10 +1463,6 @@ bool Device::populateOCLDeviceConstants() { if (info_.iommuv2_ || isa().versionMajor() >= 8) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } - } else if (!settings().useLightning_) { - if (info_.iommuv2_ || (isa().versionMajor() == 8)) { - info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; - } } } diff --git a/projects/clr/rocclr/device/rocm/rockernel.cpp b/projects/clr/rocclr/device/rocm/rockernel.cpp index af36fa78cd..c32a3ba18a 100644 --- a/projects/clr/rocclr/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/device/rocm/rockernel.cpp @@ -24,7 +24,6 @@ namespace amd::roc { -#if defined(USE_COMGR_LIBRARY) bool Kernel::init() { return GetAttrCodePropMetadata(); } bool Kernel::postLoad() { @@ -157,6 +156,5 @@ bool Kernel::postLoad() { program()->rocDevice().AddKernel(*this); return true; } -#endif // defined(USE_COMGR_LIBRARY) } // namespace amd::roc diff --git a/projects/clr/rocclr/device/rocm/rocprogram.cpp b/projects/clr/rocclr/device/rocm/rocprogram.cpp index 63502560fc..31fb6599bb 100644 --- a/projects/clr/rocclr/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/device/rocm/rocprogram.cpp @@ -55,6 +55,7 @@ Program::~Program() { Program::Program(roc::NullDevice& device, amd::Program& owner) : device::Program(device, owner) { hsaExecutable_.handle = 0; hsaCodeObjectReader_.handle = 0; + isHIP_ = (owner.language() == amd::Program::HIP); } bool Program::initClBinary(char* binaryIn, size_t size) { @@ -201,58 +202,16 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, return true; } -HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) - : roc::Program(device, owner) {} - -HSAILProgram::~HSAILProgram() {} - -bool HSAILProgram::saveBinaryAndSetType(type_t type) { return true; } - -bool HSAILProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset, - std::string uri) { - return true; -} - - -LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner) - : roc::Program(device, owner) { - isLC_ = true; - isHIP_ = (owner.language() == amd::Program::HIP); -} - -bool LightningProgram::createBinary(amd::option::Options* options) { -#if defined(USE_COMGR_LIBRARY) +bool Program::createBinary(amd::option::Options* options) { if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { LogError("Failed to create ELF binary image!"); return false; } -#endif // defined(USE_COMGR_LIBRARY) return true; } -bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) { -#if defined(USE_COMGR_LIBRARY) - // Write binary to memory - if (type == TYPE_EXECUTABLE) { // handle code object binary - assert(rawBinary != nullptr && size != 0 && "must pass in the binary"); - } else { // handle LLVM binary - if (llvmBinary_.empty()) { - buildLog_ += "ERROR: Tried to save empty LLVM binary \n"; - return false; - } - rawBinary = (void*)llvmBinary_.data(); - size = llvmBinary_.size(); - } - clBinary()->saveBIFBinary((char*)rawBinary, size); - - // Set the type of binary - setType(type); -#endif // defined(USE_COMGR_LIBRARY) - return true; -} - -bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, - bool internalKernel) { +bool Program::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, + bool internalKernel) { // Find the size of global variables from the binary if (!FindGlobalVarSize(binary, binSize)) { buildLog_ += "Error: Cannot Find Global Var Sizes\n"; @@ -274,9 +233,8 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo return true; } -bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, - size_t foffset, std::string uri) { -#if defined(USE_COMGR_LIBRARY) +bool Program::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, + size_t foffset, std::string uri) { // Stop compilation if it is an offline device - HSA runtime does not // support ISA compiled offline if (!device().isOnline()) { @@ -330,7 +288,6 @@ bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDes return false; } } -#endif // defined(USE_COMGR_LIBRARY) return true; } diff --git a/projects/clr/rocclr/device/rocm/rocprogram.hpp b/projects/clr/rocclr/device/rocm/rocprogram.hpp index 6d97cfce7e..8c518c0445 100644 --- a/projects/clr/rocclr/device/rocm/rocprogram.hpp +++ b/projects/clr/rocclr/device/rocm/rocprogram.hpp @@ -29,9 +29,6 @@ //! \namespace amd::roc HSA Device Implementation namespace amd::roc { -class HSAILProgram; -class LightningProgram; - //! \class empty program class Program : public device::Program { friend class ClBinary; @@ -62,15 +59,6 @@ class Program : public device::Program { virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes, const char* global_name) const; - protected: - /*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen) - * - * \return The build error code - */ - int compileBinaryToHSAIL(amd::option::Options* options //!< options for compilation - ); - virtual bool createBinary(amd::option::Options* options) = 0; - protected: //! Disable default copy constructor Program(const Program&) = delete; @@ -79,48 +67,17 @@ class Program : public device::Program { virtual bool defineGlobalVar(const char* name, void* dptr); - protected: - /* HSA executable */ - hsa_executable_t hsaExecutable_; //!< Handle to HSA executable - hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader -}; - -class HSAILProgram : public roc::Program { - public: - HSAILProgram(roc::NullDevice& device, amd::Program& owner); - virtual ~HSAILProgram(); - - protected: - bool createBinary(amd::option::Options* options) override { return true; } - - virtual bool setKernels(void* binary, size_t binSize, - amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0, - std::string uri = std::string()) override; - - private: - std::string codegenOptions(amd::option::Options* options); - - bool saveBinaryAndSetType(type_t type) override; -}; - -class LightningProgram final : public roc::Program { - public: - LightningProgram(roc::NullDevice& device, amd::Program& owner); - virtual ~LightningProgram() {} - - protected: - bool createBinary(amd::option::Options* options) final; - - bool saveBinaryAndSetType(type_t type) final { return true; } - - private: - bool saveBinaryAndSetType(type_t type, void* rawBinary, size_t size); + bool createBinary(amd::option::Options* options) override final; bool createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, bool internalKernel) override final; bool setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0, std::string uri = std::string()) override final; + protected: + /* HSA executable */ + hsa_executable_t hsaExecutable_; //!< Handle to HSA executable + hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader }; /*@}*/ // namespace amd::roc diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index ff1b73f327..48aa4328ca 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -71,8 +71,6 @@ Settings::Settings() { numDeviceEvents_ = 1024; numWaitEvents_ = 8; - useLightning_ = (!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : true; - lcWavefrontSize64_ = true; imageBufferWar_ = false; @@ -116,7 +114,6 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_); } enableXNACK_ = enableXNACK; - hsailExplicitXnack_ = enableXNACK; // Enable extensions enableExtension(ClKhrByteAddressableStore); @@ -146,17 +143,12 @@ bool Settings::create(bool fullProfile, const amd::Isa& isa, bool enableXNACK, b enableExtension(ClKhrFp16); supportDepthsRGB_ = true; - if (useLightning_) { - enableExtension(ClAmdAssemblyProgram); - // enable subnormals for gfx900 and later - if (gfxipMajor >= 9) { - singleFpDenorm_ = true; - enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups; - enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups; - } - } else { - // Also enable AMD double precision extension? - enableExtension(ClAmdFp64); + enableExtension(ClAmdAssemblyProgram); + // enable subnormals for gfx900 and later + if (gfxipMajor >= 9) { + singleFpDenorm_ = true; + enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups; + enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS & coop_groups; } if ((gfxipMajor == 9 && gfxipMinor == 0 && gfxStepping == 10) || diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 96ae29e4e3..fb73e9b97b 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -806,11 +806,8 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) && "Unsupported address qualifier"); - const bool readOnly = -#if defined(USE_COMGR_LIBRARY) - desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST || -#endif // defined(USE_COMGR_LIBRARY) - (mem->getMemFlags() & CL_MEM_READ_ONLY) != 0; + const bool readOnly = (desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST) || + ((mem->getMemFlags() & CL_MEM_READ_ONLY) != 0); if (!readOnly) { mem->signalWrite(&dev()); diff --git a/projects/clr/rocclr/elf/test/CMakeLists.txt b/projects/clr/rocclr/elf/test/CMakeLists.txt index 5ff4318f7b..2924616e0c 100644 --- a/projects/clr/rocclr/elf/test/CMakeLists.txt +++ b/projects/clr/rocclr/elf/test/CMakeLists.txt @@ -55,8 +55,8 @@ target_include_directories(elf_test PRIVATE $) -add_definitions(-DUSE_COMGR_LIBRARY -DCOMGR_DYN_DLL -DWITH_LIGHTNING_COMPILER -DDEBUG) +add_definitions(-DCOMGR_DYN_DLL -DDEBUG) target_link_libraries(elf_test PRIVATE amdrocclr_static) -#-------------------------------------elf_test--------------------------------------# \ No newline at end of file +#-------------------------------------elf_test--------------------------------------# diff --git a/projects/clr/rocclr/platform/kernel.hpp b/projects/clr/rocclr/platform/kernel.hpp index 17f7f12634..0749e6f071 100644 --- a/projects/clr/rocclr/platform/kernel.hpp +++ b/projects/clr/rocclr/platform/kernel.hpp @@ -340,7 +340,6 @@ class Kernel : public RuntimeObject { virtual ObjectType objectType() const { return ObjectTypeKernel; } -#if defined(USE_COMGR_LIBRARY) // Templated find function to retrieve the right value based on string template static V FindValue(const T (&structure)[N], const std::string& name); @@ -417,8 +416,7 @@ class Kernel : public RuntimeObject { static const KernelFieldMapV3Type kKernelFieldMapV3[]; static const ArgValueKindV3Type kArgValueKindV3[]; static const ArgFieldMapV3Type kArgFieldMapV3[]; -#endif -}; // defined(USE_COMGR_LIBRARY) +}; /*! @} diff --git a/projects/clr/rocclr/platform/kernel_init.hpp b/projects/clr/rocclr/platform/kernel_init.hpp index 8c2804ddb2..559549719d 100644 --- a/projects/clr/rocclr/platform/kernel_init.hpp +++ b/projects/clr/rocclr/platform/kernel_init.hpp @@ -20,8 +20,6 @@ #pragma once -#if defined(USE_COMGR_LIBRARY) - // Static values initialization from class Kernel. const amd::Kernel::ArgFieldMapType amd::Kernel::kArgFieldMap[] = { {"Name", ArgField::Name}, @@ -189,5 +187,3 @@ cl_int amd::Kernel::FindValue(const T (&structure)[N], const std::string& name) } return 0; } - -#endif // defined(USE_COMGR_LIBRARY) \ No newline at end of file diff --git a/projects/clr/rocclr/platform/program.cpp b/projects/clr/rocclr/platform/program.cpp index 8f53f813ad..a62ca6ab73 100644 --- a/projects/clr/rocclr/platform/program.cpp +++ b/projects/clr/rocclr/platform/program.cpp @@ -23,11 +23,6 @@ #include "platform/program.hpp" #include "platform/context.hpp" #include "utils/options.hpp" -#if defined(WITH_COMPILER_LIB) -#include "utils/libUtils.h" -#include "utils/bif_section_labels.hpp" -#include "hsailctx.hpp" -#endif #include // for malloc #include // for strcmp @@ -38,21 +33,6 @@ namespace amd { -#if defined(WITH_COMPILER_LIB) -static aclTargetInfo* aclutGetTargetInfo(aclBinary* binary) { - aclTargetInfo* tgt = NULL; - if (binary->struct_size == sizeof(aclBinary_0_8)) { - tgt = &reinterpret_cast(binary)->target; - } else if (binary->struct_size == sizeof(aclBinary_0_8_1)) { - tgt = &reinterpret_cast(binary)->target; - } else { - assert(!"Binary format not supported!"); - tgt = &binary->target; - } - return tgt; -} -#endif - static void remove_g_option(std::string& option) { // Remove " -g " option from application. // People can still add -g in AMD_OCL_BUILD_OPTIONS_APPEND, if it is so desired. @@ -114,16 +94,7 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng amd::option::Options* options, const amd::Program* same_prog, amd::Os::FileDesc fdesc, size_t foffset, std::string uri) { if (image != NULL && !amd::Elf::isElfMagic((const char*)image)) { - if (device.settings().useLightning_) { - return CL_INVALID_BINARY; - } -#if defined(WITH_COMPILER_LIB) - else if (!amd::Hsail::ValidateBinaryImage( - image, length, - language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)) { - return CL_INVALID_BINARY; - } -#endif // !defined(WITH_COMPILER_LIB) + return CL_INVALID_BINARY; } // Check if the device is already associated with this program @@ -138,43 +109,11 @@ int32_t Program::addDeviceProgram(Device& device, const void* image, size_t leng return CL_SUCCESS; } -#if defined(WITH_COMPILER_LIB) - bool emptyOptions = (options == nullptr); -#endif amd::option::Options emptyOpts; if (options == NULL) { options = &emptyOpts; } -#if defined(WITH_COMPILER_LIB) - if (image != NULL && length != 0 && - amd::Hsail::ValidateBinaryImage(image, length, BINARY_TYPE_ELF)) { - acl_error errorCode; - aclBinary* binary = amd::Hsail::ReadFromMem(image, length, &errorCode); - if (errorCode != ACL_SUCCESS) { - return CL_INVALID_BINARY; - } - const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions); - assert(symbol && "symbol not found"); - std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]); - size_t symSize = 0; - const void* opts = amd::Hsail::ExtractSymbol(device.binCompiler(), binary, &symSize, aclCOMMENT, - symName.c_str(), &errorCode); - // if we have options from binary and input options was not specified - if (opts != NULL && emptyOptions) { - std::string sBinOptions = std::string((char*)opts, symSize); - if (!amd::option::parseAllOptions(sBinOptions, *options, false, false)) { - programLog_ = options->optionsLog(); - LogError("Parsing compilation options from binary failed."); - return CL_INVALID_COMPILER_OPTIONS; - } - } - options->oVariables->Legacy = !device.settings().useLightning_ - ? isAMDILTarget(*amd::aclutGetTargetInfo(binary)) - : isHSAILTarget(*amd::aclutGetTargetInfo(binary)); - amd::Hsail::BinaryFini(binary); - } -#endif // defined(WITH_COMPILER_LIB) options->oVariables->BinaryIsSpirv = language_ == SPIRV; device::Program* program = rootDev.createProgram(*this, options); if (program == NULL) { @@ -272,8 +211,7 @@ int32_t Program::compile(const std::vector& devices, size_t numHeaders, for (const auto& it : devices) { option::Options parsedOptions; constexpr bool LinkOptsOnly = false; - if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly, - it->settings().useLightning_)) { + if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) { programLog_ = parsedOptions.optionsLog(); LogError("Parsing compile options failed."); return CL_INVALID_COMPILER_OPTIONS; @@ -345,8 +283,7 @@ int32_t Program::link(const std::vector& devices, size_t numInputs, for (const auto& it : devices) { option::Options parsedOptions; constexpr bool LinkOptsOnly = true; - if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly, - it->settings().useLightning_)) { + if (!ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) { programLog_ = parsedOptions.optionsLog(); LogError("Parsing link options failed."); return CL_INVALID_LINKER_OPTIONS; @@ -366,29 +303,6 @@ int32_t Program::link(const std::vector& devices, size_t numInputs, continue; } inputDevPrograms[i] = findIt->second; -// Check the binary's target for the first found device program. -// TODO: Revise these binary's target checks -// and possibly remove them after switching to HSAIL by default. -#if defined(WITH_COMPILER_LIB) - device::Program::binary_t binary = inputDevPrograms[i]->binary(); - if (!found && binary.first != NULL && binary.second > 0 && - amd::Hsail::ValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) { - acl_error errorCode = ACL_SUCCESS; - void* mem = const_cast(binary.first); - aclBinary* aclBin = amd::Hsail::ReadFromMem(mem, binary.second, &errorCode); - if (errorCode != ACL_SUCCESS) { - LogWarning("Error while linking: Could not read from raw binary."); - return CL_INVALID_BINARY; - } - if (isHSAILTarget(*amd::aclutGetTargetInfo(aclBin))) { - parsedOptions.oVariables->Frontend = "clang"; - parsedOptions.oVariables->Legacy = it->settings().useLightning_; - } else if (isAMDILTarget(*amd::aclutGetTargetInfo(aclBin))) { - parsedOptions.oVariables->Frontend = "edg"; - } - amd::Hsail::BinaryFini(aclBin); - } -#endif // defined(WITH_COMPILER_LIB) found = true; } if (inputDevPrograms.size() == 0) { @@ -522,8 +436,7 @@ int32_t Program::build(const std::vector& devices, const char* options, for (const auto& it : devices) { option::Options parsedOptions; constexpr bool LinkOptsOnly = false; - if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly, - it->settings().useLightning_)) { + if ((language_ != HIP) && !ParseAllOptions(cppstr, parsedOptions, optionChangable, LinkOptsOnly)) { programLog_ = parsedOptions.optionsLog(); LogError("Parsing compile options failed."); return CL_INVALID_COMPILER_OPTIONS; @@ -677,7 +590,7 @@ int Program::GetOclCVersion(const char* clVer) { } bool Program::ParseAllOptions(const std::string& options, option::Options& parsedOptions, - bool optionChangable, bool linkOptsOnly, bool isLC) { + bool optionChangable, bool linkOptsOnly) { std::string allOpts = options; if (optionChangable) { if (linkOptsOnly) { @@ -704,7 +617,7 @@ bool Program::ParseAllOptions(const std::string& options, option::Options& parse } } } - return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly, isLC); + return amd::option::parseAllOptions(allOpts, parsedOptions, linkOptsOnly); } bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func) { diff --git a/projects/clr/rocclr/platform/program.hpp b/projects/clr/rocclr/platform/program.hpp index 5104a0d61b..e9444d6d82 100644 --- a/projects/clr/rocclr/platform/program.hpp +++ b/projects/clr/rocclr/platform/program.hpp @@ -225,7 +225,7 @@ class Program : public RuntimeObject { static int GetOclCVersion(const char* clVer); bool static ParseAllOptions(const std::string& options, option::Options& parsedOptions, - bool optionChangable, bool linkOptsOnly, bool isLC); + bool optionChangable, bool linkOptsOnly); void setVarInfoCallBack(VarInfoCallback callback) { varcallback = callback; } diff --git a/projects/clr/rocclr/utils/flags.hpp b/projects/clr/rocclr/utils/flags.hpp index a8e8daa7f2..b31b3834e6 100644 --- a/projects/clr/rocclr/utils/flags.hpp +++ b/projects/clr/rocclr/utils/flags.hpp @@ -141,8 +141,6 @@ release(uint, PAL_MALL_POLICY, 0, \ "2 = Allocations will always be put through the MALL") \ release(bool, GPU_ENABLE_WAVE32_MODE, true, \ "Enables Wave32 compilation in HW if available") \ -release(bool, GPU_ENABLE_LC, true, \ - "Enables LC path") \ release(bool, GPU_ENABLE_HW_P2P, false, \ "Enables HW P2P path") \ release(bool, GPU_ENABLE_COOP_GROUPS, true, \