diff --git a/projects/clr/opencl/README.md b/projects/clr/opencl/README.md index 0e7af8a4d2..832b687efe 100644 --- a/projects/clr/opencl/README.md +++ b/projects/clr/opencl/README.md @@ -38,7 +38,7 @@ Run these commands: ```bash cd "$CLR_DIR" mkdir -p build; cd build -cmake -DUSE_COMGR_LIBRARY=ON -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON .. +cmake -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON .. make -j$(nproc) ``` diff --git a/projects/clr/rocclr/compiler/lib/backends/common/library.hpp b/projects/clr/rocclr/compiler/lib/backends/common/library.hpp deleted file mode 100644 index a8cf0dd7af..0000000000 --- a/projects/clr/rocclr/compiler/lib/backends/common/library.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef LIBRARY_H_ -#define LIBRARY_H_ - -#include -#include -namespace amd { - -typedef enum _library_selector { - LibraryUndefined = 0, - GPU_Library_7xx, - GPU_Library_Evergreen, - GPU_Library_SI, - CPU_Library_Generic, - CPU_Library_AVX, - CPU_Library_FMA4, - GPU_Library_Generic, - CPU64_Library_Generic, - CPU64_Library_AVX, - CPU64_Library_FMA4, - GPU64_Library_Evergreen, - GPU64_Library_SI, - GPU64_Library_Generic, - GPU_Library_CI, - GPU64_Library_CI, - GPU_Library_HSAIL, - LibraryTotal -} LibrarySelector; - -/** Integrated Bitcode Libararies **/ -class LibraryDescriptor { - public: - enum { MAX_NUM_LIBRARY_DESCS = 11 }; - - const char* start; - size_t size; -}; - -int getLibDescs(LibrarySelector LibType, // input - LibraryDescriptor* LibDesc, // output - int& LibDescSize // output -- LibDesc[0:LibDescSize-1] -); - -static constexpr const char* amdRTFuns[] = {"__amdrt_div_i64", "__amdrt_div_u64", - "__amdrt_mod_i64", "__amdrt_mod_u64", - "__amdrt_cvt_f64_to_u64", "__amdrt_cvt_f32_to_u64"}; -} // namespace amd - -#endif // LIBRARY_H_ diff --git a/projects/clr/rocclr/compiler/lib/utils/options.cpp b/projects/clr/rocclr/compiler/lib/utils/options.cpp index 618a52b8af..19ad08e5a3 100644 --- a/projects/clr/rocclr/compiler/lib/utils/options.cpp +++ b/projects/clr/rocclr/compiler/lib/utils/options.cpp @@ -1233,8 +1233,7 @@ Options::Options() dumpFileRoot(), currKernelName(NULL), encryptCode(0), - MemoryHandles(), - libraryType_(amd::LibraryUndefined) { + MemoryHandles() { oVariables = new OptionVariables(); ::memset(flags, 0, sizeof(flags)); diff --git a/projects/clr/rocclr/compiler/lib/utils/options.hpp b/projects/clr/rocclr/compiler/lib/utils/options.hpp index c12cc27ea7..2be2d33edf 100644 --- a/projects/clr/rocclr/compiler/lib/utils/options.hpp +++ b/projects/clr/rocclr/compiler/lib/utils/options.hpp @@ -25,7 +25,6 @@ #include #include #include "top.hpp" -#include "library.hpp" #include #include #ifdef __linux__ @@ -316,7 +315,6 @@ class Options { void setDumpFileName(const char* val); public: - LibrarySelector libraryType_; std::string sourceFileName_; }; diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 6f421f98b4..a1585bb870 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -107,159 +107,159 @@ std::pair Isa::supportedIsas() { // the ANY value. If the target feature is disabled then use a '-' suffix, // and if enabled use a '+' suffix. // - // If the HSAIL or AMD IL compilers do not support the target, then use + // If the AMD IL compilers do not support the target, then use // nullptr for the ID. // - // -------------- Compiler ---------- - Runtime - ---- IP ---- -- Target -- ---------- - // Target Properties ---------- - // Supported Version Features - // SIMD/ - // SIMD - // Instr - // Bank LDS - // Mem - // Target ID HSAIL ID ROC PAL Maj/Min/Stp SRAMECC XNACK CU Width - // Width Width Size Banks LDSAlignment - {"gfx801", nullptr, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx801:xnack-", nullptr, true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx801:xnack+", "gfx801", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx802", "gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx803", "gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx810", nullptr, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx810:xnack-", nullptr, true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx810:xnack+", "gfx810", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx900:xnack-", "gfx900", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx900:xnack+", "gfx901", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx902:xnack-", "gfx902", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx902:xnack+", "gfx903", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx904:xnack-", "gfx904", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx904:xnack+", "gfx905", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc-", "gfx907", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc+", nullptr, true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:xnack-", "gfx906", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:xnack+", "gfx907", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc-:xnack-", "gfx906", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki, + // -- Compiler --|-- Runtime --|-- IP --|-- Target --|-- Target Properties -- + // | Supported | Version| Features | + // --------------|-------------|--------|------------|----------------------- + // Target ID | ROC PAL | Major | SRAMECC | SIMD/CU + // | | Minor | XNACK | SIMD Width + // | | Step | | Instr Width + // | | | | Bank Width + // | | | | LDS Size + // | | | | Mem Banks + // | | | | LDS Alignment + {"gfx801", true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx801:xnack-", true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx801:xnack+", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx805", true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx810", true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx810:xnack-", true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx810:xnack+", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx900", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx900:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx900:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx902", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx902:xnack-", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx902:xnack+", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx904", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx904:xnack-", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx904:xnack+", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:sramecc-", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:sramecc+", true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:xnack-", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:xnack+", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:sramecc-:xnack-", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc-:xnack+", "gfx907", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki, + {"gfx906:sramecc-:xnack+", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc+:xnack-", nullptr, true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx906:sramecc+:xnack+", nullptr, true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908", nullptr, true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc-", nullptr, true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc+", nullptr, true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:xnack-", nullptr, true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:xnack+", nullptr, true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc-:xnack-", nullptr, true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx906:sramecc+:xnack-", true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx906:sramecc+:xnack+", true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908", true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908:sramecc-", true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908:sramecc+", true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908:xnack-", true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908:xnack+", true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx908:sramecc-:xnack-", true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc-:xnack+", nullptr, true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, + {"gfx908:sramecc-:xnack+", true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc+:xnack-", nullptr, true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx908:sramecc+:xnack-", true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx908:sramecc+:xnack+", nullptr, true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx909:xnack-", nullptr, false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx909:xnack+", nullptr, false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a", nullptr, true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc-", nullptr, true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc+", nullptr, true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:xnack-", nullptr, true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:xnack+", nullptr, true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc-:xnack-", nullptr, true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx908:sramecc+:xnack+", true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx909", false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx909:xnack-", false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx909:xnack+", false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a", true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a:sramecc-", true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a:sramecc+", true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a:xnack-", true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a:xnack+", true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90a:sramecc-:xnack-", true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc-:xnack+", nullptr, true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki, + {"gfx90a:sramecc-:xnack+", true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc+:xnack-", nullptr, true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx90a:sramecc+:xnack-", true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90a:sramecc+:xnack+", nullptr, true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki, + {"gfx90a:sramecc+:xnack+", true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942", nullptr, true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc-", nullptr, true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc+", nullptr, true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:xnack-", nullptr, true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:xnack+", nullptr, true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc-:xnack-", nullptr, true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx942", true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx942:sramecc-", true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx942:sramecc+", true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx942:xnack-", true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx942:xnack+", true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx942:sramecc-:xnack-", true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc-:xnack+", nullptr, true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki, + {"gfx942:sramecc-:xnack+", true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc+:xnack-", nullptr, true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki, + {"gfx942:sramecc+:xnack-", true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx942:sramecc+:xnack+", nullptr, true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx950", nullptr, true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc-", nullptr, true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc+", nullptr, true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:xnack-", nullptr, true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:xnack+", nullptr, true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc-:xnack-", nullptr, true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki, + {"gfx942:sramecc+:xnack+", true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90c", true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90c:xnack-", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx90c:xnack+", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx950", true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, + {"gfx950:sramecc-", true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, + {"gfx950:sramecc+", true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280}, + {"gfx950:xnack-", true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280}, + {"gfx950:xnack+", true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280}, + {"gfx950:sramecc-:xnack-", true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc-:xnack+", nullptr, true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki, + {"gfx950:sramecc-:xnack+", true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc+:xnack-", nullptr, true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki, + {"gfx950:sramecc+:xnack-", true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx950:sramecc+:xnack+", nullptr, true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki, + {"gfx950:sramecc+:xnack+", true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280}, - {"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-generic:xnack-", nullptr, true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-generic:xnack+", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic", nullptr, true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc-", nullptr, true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki, + {"gfx9-generic", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-generic:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-generic:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-4-generic", true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-4-generic:sramecc-", true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc+", nullptr, true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki, + {"gfx9-4-generic:sramecc+", true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:xnack-", nullptr, true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:xnack+", nullptr, true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc-:xnack-", nullptr, true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256, + {"gfx9-4-generic:xnack-", true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-4-generic:xnack+", true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, + {"gfx9-4-generic:sramecc-:xnack-", true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc-:xnack+", nullptr, true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256, + {"gfx9-4-generic:sramecc-:xnack+", true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc+:xnack-", nullptr, true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256, + {"gfx9-4-generic:sramecc+:xnack-", true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx9-4-generic:sramecc+:xnack+", nullptr, true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256, + {"gfx9-4-generic:sramecc+:xnack+", true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512}, - {"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1011", "gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1011:xnack-", "gfx1011", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1011:xnack+", nullptr, true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1012", "gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1012:xnack-", "gfx1012", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1012:xnack+", nullptr, true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx10-1-generic:xnack-", nullptr, true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, + {"gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1010:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1010:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1011:xnack-", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1011:xnack+", true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1012:xnack-", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1012:xnack+", true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1013:xnack-", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1013:xnack+", true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx10-1-generic", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx10-1-generic:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx10-1-generic:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, + {"gfx10-1-generic:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1033", "gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1103", "gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1153", "gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, - {"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx10-3-generic", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx11-generic", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, + {"gfx12-generic", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024}, }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } @@ -1240,7 +1240,7 @@ bool ClBinary::setElfTarget() { static const uint32_t Target = 21; assert(((0xFFFF8000 & Target) == 0) && "ASIC target ID >= 2^15"); uint16_t elf_target = static_cast(0x7FFF & Target); - return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM); + return elfOut()->setTarget(elf_target); } void ClBinary::init(amd::option::Options* optionsObj) { @@ -1272,37 +1272,6 @@ void ClBinary::init(amd::option::Options* optionsObj) { } } -bool ClBinary::isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform) { - /* It is recompilable if there is llvmir that was generated for - the same platform (CPU or GPU) and with the same bitness. - - Note: the bitness has been checked in initClBinary(), no need - to check it here. - */ - if (llvmBinary.empty()) { - DevLogError("LLVM Binary string is empty \n"); - return false; - } - - uint16_t elf_target; - amd::Elf::ElfPlatform platform; - if (elfIn()->getTarget(elf_target, platform)) { - if (platform == thePlatform) { - return true; - } - if ((platform == amd::Elf::COMPLIB_PLATFORM) && - (((thePlatform == amd::Elf::CAL_PLATFORM) && - ((elf_target == (uint16_t)EM_HSAIL) || (elf_target == (uint16_t)EM_HSAIL_64))) || - ((thePlatform == amd::Elf::CPU_PLATFORM) && - ((elf_target == (uint16_t)EM_386) || (elf_target == (uint16_t)EM_X86_64))))) { - return true; - } - } - - DevLogPrintfError("LLVM_Binary: %s is not recompilable \n", llvmBinary.c_str()); - return false; -} - void ClBinary::release() { if (isBinaryAllocated() && (binary_ != nullptr)) { delete[] binary_; diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index faa9830e58..254ef55e69 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1083,9 +1083,6 @@ class ClBinary : public amd::HeapObject { amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format ) const; - //! Check if the binary is recompilable - bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform); - void saveOrigBinary(const char* origBinary, size_t origSize) { origBinary_ = origBinary; origSize_ = origSize; @@ -1455,9 +1452,6 @@ class Isa { /// @returns This Isa's target ID name. const char* targetId() const { return targetId_; } - /// @returns This Isa's name to use with the HSAIL compiler. - const char* hsailName() const { return hsailId_; } - /// @returns If the ROCm runtime supports the ISA. bool runtimeRocSupported() const { if (!IS_HIP && (versionMajor_ == 8)) { @@ -1534,13 +1528,12 @@ class Isa { static const Isa* end(); private: - constexpr Isa(const char* targetId, const char* hsailId, bool runtimeRocSupported, - bool runtimePalSupported, uint32_t versionMajor, uint32_t versionMinor, - uint32_t versionStepping, Feature sramecc, Feature xnack, uint32_t simdPerCU, - uint32_t simdWidth, uint32_t simdInstructionWidth, uint32_t memChannelBankWidth, + constexpr Isa(const char* targetId, bool runtimeRocSupported, bool runtimePalSupported, + uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping, + Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth, + uint32_t simdInstructionWidth, uint32_t memChannelBankWidth, uint32_t localMemSizePerCU, uint32_t localMemBanks, uint32_t ldsAlignment) : targetId_(targetId), - hsailId_(hsailId), runtimeRocSupported_(runtimeRocSupported), runtimePalSupported_(runtimePalSupported), versionMajor_(versionMajor), @@ -1563,11 +1556,6 @@ class Isa { // compilations. const char* targetId_; - // @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL - // compilation using the Shader Compiler Finalizer. Empty string if - // unsupported. - const char* hsailId_; - bool runtimeRocSupported_; //!< ROCm runtime is supported. bool runtimePalSupported_; //!< PAL runtime is supported. uint32_t versionMajor_; //!< Isa's major version. diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index c031b77140..ea9925e648 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -997,7 +997,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) { uint32_t numParams = params.size(); // Append the hidden arguments to the OCL arguments params.insert(params.end(), hiddenParams.begin(), hiddenParams.end()); - createSignature(params, numParams, amd::KernelSignature::ABIVersion_2); + createSignature(params, numParams, amd::KernelSignature::ABIVersion_LC); } // ================================================================================================ diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp index 8a8b72effe..5589cc1078 100644 --- a/projects/clr/rocclr/device/devprogram.cpp +++ b/projects/clr/rocclr/device/devprogram.cpp @@ -1702,7 +1702,6 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt bool recompile = false; //! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT? switch (continueCompileFrom) { - case FILE_TYPE_HSAIL_BINARY: case FILE_TYPE_CG: case FILE_TYPE_ISA: { // Compare options loaded from binary with current ones, recompile if differ; diff --git a/projects/clr/rocclr/device/devprogram.hpp b/projects/clr/rocclr/device/devprogram.hpp index ca6e9b5b97..87a45d536b 100644 --- a/projects/clr/rocclr/device/devprogram.hpp +++ b/projects/clr/rocclr/device/devprogram.hpp @@ -87,18 +87,16 @@ class Program : public amd::HeapObject { FILE_TYPE_SPIR_BINARY = 5, FILE_TYPE_AMDIL_TEXT = 6, FILE_TYPE_AMDIL_BINARY = 7, - FILE_TYPE_HSAIL_TEXT = 8, - FILE_TYPE_HSAIL_BINARY = 9, - FILE_TYPE_X86_TEXT = 10, - FILE_TYPE_X86_BINARY = 11, - FILE_TYPE_CG = 12, - FILE_TYPE_SOURCE = 13, - FILE_TYPE_ISA = 14, - FILE_TYPE_HEADER = 15, - FILE_TYPE_RSLLVMIR_BINARY = 16, - FILE_TYPE_SPIRV_BINARY = 17, - FILE_TYPE_ASM_TEXT = 18, - FILE_TYPE_LAST = 19 + FILE_TYPE_X86_TEXT = 8, + FILE_TYPE_X86_BINARY = 9, + FILE_TYPE_CG = 10, + FILE_TYPE_SOURCE = 11, + FILE_TYPE_ISA = 12, + FILE_TYPE_HEADER = 13, + FILE_TYPE_RSLLVMIR_BINARY = 14, + FILE_TYPE_SPIRV_BINARY = 15, + FILE_TYPE_ASM_TEXT = 16, + FILE_TYPE_LAST = 17 } file_type_t; private: diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index ff1e94f24d..6de934fb48 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -122,7 +122,7 @@ static std::tuple findIsa(uint32_t gfxipMajor, uin sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled, xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled); return std::make_tuple( - isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->hsailName() : palDeviceIter->palName_); + isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->targetId() : palDeviceIter->palName_); } static std::tuple findPal(uint32_t gfxipMajor, diff --git a/projects/clr/rocclr/device/pal/palkernel.cpp b/projects/clr/rocclr/device/pal/palkernel.cpp index e8074e1ee5..0a6bd674f8 100644 --- a/projects/clr/rocclr/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/device/pal/palkernel.cpp @@ -193,7 +193,7 @@ Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel, } // The check below handles a special case of single context with multiple devices - // when the devices use different compilers(HSAIL and LC) and have different signatures + // when the devices have different signatures const amd::KernelSignature& signature = (this->signature().version() == kernel.signature().version()) ? kernel.signature() : this->signature(); diff --git a/projects/clr/rocclr/device/pal/palkernel.hpp b/projects/clr/rocclr/device/pal/palkernel.hpp index afd2e6b468..6ebab27cfb 100644 --- a/projects/clr/rocclr/device/pal/palkernel.hpp +++ b/projects/clr/rocclr/device/pal/palkernel.hpp @@ -84,9 +84,6 @@ class Kernel : public device::Kernel { //! Returns LDS size used in this kernel uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); } - //! Returns pointer on CPU to AQL code info - const amd_kernel_code_t* cpuAqlCode() const { return &akc_; } - //! Returns pointer on CPU to AQL kernel descriptor info const llvm::amdhsa::kernel_descriptor_t* cpuAqlKd() const { return &akd_; } @@ -135,14 +132,10 @@ class Kernel : public device::Kernel { void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize, const uint16_t numSGPRs, const uint16_t numVGPRs); - union { - amd_kernel_code_t akc_; //!< AQL kernel code on CPU, used by HSAIL - llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC - }; - uint index_; //!< Kernel index in the program - - uint64_t code_; //!< GPU memory pointer to the kernel - size_t codeSize_; //!< Size of ISA code + llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC + uint index_; //!< Kernel index in the program + uint64_t code_; //!< GPU memory pointer to the kernel + size_t codeSize_; //!< Size of ISA code }; /*@}*/ // namespace amd::pal diff --git a/projects/clr/rocclr/device/pal/palprintf.cpp b/projects/clr/rocclr/device/pal/palprintf.cpp index 0e4ec849db..52c0601fc4 100644 --- a/projects/clr/rocclr/device/pal/palprintf.cpp +++ b/projects/clr/rocclr/device/pal/palprintf.cpp @@ -591,7 +591,7 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, return false; } - // Get memory pointer to the satged buffer + // Get memory pointer to the staged buffer uint32_t* dbgBufferPtr = reinterpret_cast(xferBufRead_->map(&gpu)); if (nullptr == dbgBufferPtr) { return false; diff --git a/projects/clr/rocclr/device/pal/palresource.cpp b/projects/clr/rocclr/device/pal/palresource.cpp index 77d40a871f..6bf0a6b5ca 100644 --- a/projects/clr/rocclr/device/pal/palresource.cpp +++ b/projects/clr/rocclr/device/pal/palresource.cpp @@ -407,7 +407,7 @@ Resource::~Resource() { } // ================================================================================================ -static uint32_t GetHSAILImageFormatType(const cl_image_format& format) { +static uint32_t GetHSAImageFormatType(const cl_image_format& format) { static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8, HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8, @@ -431,7 +431,7 @@ static uint32_t GetHSAILImageFormatType(const cl_image_format& format) { } // ================================================================================================ -static uint32_t GetHSAILImageOrderType(const cl_image_format& format) { +static uint32_t GetHSAImageOrderType(const cl_image_format& format) { static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R, HSA_EXT_IMAGE_CHANNEL_ORDER_A, HSA_EXT_IMAGE_CHANNEL_ORDER_RG, @@ -569,8 +569,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) { } dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_); - hwState_[8] = GetHSAILImageFormatType(desc().format_); - hwState_[9] = GetHSAILImageOrderType(desc().format_); + hwState_[8] = GetHSAImageFormatType(desc().format_); + hwState_[9] = GetHSAImageOrderType(desc().format_); hwState_[10] = static_cast(desc().width_); hwState_[11] = 0; // one extra reserved field in the argument return true; @@ -741,8 +741,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) { viewInfo.subresRange = ImgSubresRange; dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_); - hwState_[8] = GetHSAILImageFormatType(desc().format_); - hwState_[9] = GetHSAILImageOrderType(desc().format_); + hwState_[8] = GetHSAImageFormatType(desc().format_); + hwState_[9] = GetHSAImageOrderType(desc().format_); hwState_[10] = static_cast(desc().width_); hwState_[11] = 0; // one extra reserved field in the argument @@ -961,8 +961,8 @@ bool Resource::CreateInterop(CreateParams* params) { viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite; dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_); - hwState_[8] = GetHSAILImageFormatType(desc().format_); - hwState_[9] = GetHSAILImageOrderType(desc().format_); + hwState_[8] = GetHSAImageFormatType(desc().format_); + hwState_[9] = GetHSAImageOrderType(desc().format_); hwState_[10] = static_cast(desc().width_); hwState_[11] = 0; // one extra reserved field in the argument } @@ -983,8 +983,8 @@ bool Resource::CreateInterop(CreateParams* params) { } dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_); - hwState_[8] = GetHSAILImageFormatType(desc().format_); - hwState_[9] = GetHSAILImageOrderType(desc().format_); + hwState_[8] = GetHSAImageFormatType(desc().format_); + hwState_[9] = GetHSAImageOrderType(desc().format_); hwState_[10] = static_cast(desc().width_); hwState_[11] = 0; // one extra reserved field in the argument } else { @@ -1047,8 +1047,8 @@ bool Resource::CreateInterop(CreateParams* params) { (desc().format_.image_channel_data_type == CL_UNORM_INT24)) { hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000; } - hwState_[8] = GetHSAILImageFormatType(desc().format_); - hwState_[9] = GetHSAILImageOrderType(desc().format_); + hwState_[8] = GetHSAImageFormatType(desc().format_); + hwState_[9] = GetHSAImageOrderType(desc().format_); hwState_[10] = static_cast(desc().width_); hwState_[11] = 0; // one extra reserved field in the argument } diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index c7a8c50340..1f242f46d1 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -58,8 +58,6 @@ Settings::Settings() { hostMemDirectAccess_ = HostMemDisable; - libSelector_ = amd::LibraryUndefined; - // By default use host blit blitEngine_ = BlitEngineHost; pinnedXferSize_ = GPU_PINNED_XFER_SIZE * Mi; @@ -213,7 +211,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, // L1 cache size is 16KB cacheSize_ = 16 * Ki; - libSelector_ = amd::GPU_Library_CI; if (LP64_SWITCH(false, true)) { oclVersion_ = !reportAsOCL12Device ? XCONCAT(OpenCL, XCONCAT(OPENCL_MAJOR, OPENCL_MINOR)) : OpenCL12; diff --git a/projects/clr/rocclr/device/pal/palsettings.hpp b/projects/clr/rocclr/device/pal/palsettings.hpp index 7e94a28aba..eb83475980 100644 --- a/projects/clr/rocclr/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/device/pal/palsettings.hpp @@ -21,7 +21,6 @@ #pragma once #include "top.hpp" -#include "library.hpp" #include "palDevice.h" /*! \addtogroup pal PAL Resource Implementation @@ -63,7 +62,7 @@ class Settings : public device::Settings { uint imageSupport_ : 1; //!< Report images support uint doublePrecision_ : 1; //!< Enables double precision support uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU - uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU + uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG path on GPU uint imageDMA_ : 1; //!< Enable direct image DMA transfers uint threadTraceEnable_ : 1; //!< Thread trace enable uint svmAtomics_ : 1; //!< SVM device atomics @@ -108,8 +107,6 @@ class Settings : public device::Settings { uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons - amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler - size_t prepinnedMinSize_; //!< minimal memory size for prepinned transfer uint32_t limit_blit_wg_; //!< The number of workgroups for blit execution diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 4f3feb103a..2901d2a314 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -700,19 +700,19 @@ class VirtualGPU : public device::VirtualDevice { amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData ); - void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel + void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSA kernel VirtualGPU* gpuDefQueue //!< Device queue for children execution ); bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object - const pal::Kernel& hsaKernel, //!< Parent HSAIL object + const pal::Kernel& hsaKernel, //!< Parent HSA kernel object VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue ); void PostDeviceEnqueue( const amd::Kernel& kernel, //!< Parent amd kernel object - const pal::Kernel& hsaKernel, //!< Parent HSAIL object + const pal::Kernel& hsaKernel, //!< Parent HSA kernel object VirtualGPU* gpuDefQueue, //!< GPU default queue uint64_t vmDefQueue, //!< VM handle to the virtual queue uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index e233524a96..dcc5e54071 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -148,7 +148,7 @@ class NullDevice : public amd::Device { const Settings& settings() const { return static_cast(*settings_); } - //! Construct an HSAIL program object from the ELF assuming it is valid + //! Construct an device program object from the ELF assuming it is valid device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr) override; @@ -373,7 +373,7 @@ class Device : public NullDevice { //! Instantiate a new virtual device virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = nullptr); - //! Construct an HSAIL program object from the ELF assuming it is valid + //! Construct an device program object from the ELF assuming it is valid virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index db9db931b7..7567703b8c 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -53,22 +53,22 @@ #endif /** - * HSA image object size in bytes (see HSAIL spec) + * HSA image object size in bytes (see HSA spec) */ #define HSA_IMAGE_OBJECT_SIZE 48 /** - * HSA image object alignment in bytes (see HSAIL spec) + * HSA image object alignment in bytes (see HSA spec) */ #define HSA_IMAGE_OBJECT_ALIGNMENT 16 /** - * HSA sampler object size in bytes (see HSAIL spec) + * HSA sampler object size in bytes (see HSA spec) */ #define HSA_SAMPLER_OBJECT_SIZE 32 /** - * HSA sampler object alignment in bytes (see HSAIL spec) + * HSA sampler object alignment in bytes (see HSA spec) */ #define HSA_SAMPLER_OBJECT_ALIGNMENT 16 diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 5e3e37b0e6..7f23fa4a28 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -411,7 +411,7 @@ class VirtualGPU : public device::VirtualDevice { //! Returns memory dependency class MemoryDependency& memoryDependency() { return memoryDependency_; } - //! Detects memory dependency for HSAIL kernels and uses appropriate AQL header + //! Detects memory dependency for HSA kernels and uses appropriate AQL header bool processMemObjects(const amd::Kernel& kernel, //!< AMD kernel object for execution const_address params, //!< Pointer to the param's store size_t& ldsAddress, //!< LDS usage diff --git a/projects/clr/rocclr/elf/elf.cpp b/projects/clr/rocclr/elf/elf.cpp index 704c035464..9d7fa2d210 100644 --- a/projects/clr/rocclr/elf/elf.cpp +++ b/projects/clr/rocclr/elf/elf.cpp @@ -369,36 +369,9 @@ bool Elf::setupShdr(ElfSections id, section* section, Elf64_Word shlink) const { return true; } -bool Elf::getTarget(uint16_t& machine, ElfPlatform& platform) const { - Elf64_Half mach = _elfio.get_machine(); - if ((mach >= CPU_FIRST) && (mach <= CPU_LAST)) { - platform = CPU_PLATFORM; - machine = mach - CPU_BASE; - } else if (mach == EM_386 || mach == EM_HSAIL || mach == EM_HSAIL_64 || mach == EM_AMDIL || - mach == EM_AMDIL_64 || mach == EM_X86_64) { - platform = COMPLIB_PLATFORM; - machine = mach; - } else { - // Invalid machine - LogElfError("failed: Invalid machine=0x%04x(%d)", mach, mach); - return false; - } - LogElfInfo("succeeded: machine=0x%04x, platform=%d", machine, platform); - return true; -} - -bool Elf::setTarget(uint16_t machine, ElfPlatform platform) { - Elf64_Half mach; - if (platform == CPU_PLATFORM) - mach = machine + CPU_BASE; - else if (platform == CAL_PLATFORM) - mach = machine + CAL_BASE; - else - mach = machine; - +bool Elf::setTarget(uint16_t machine) { + Elf64_Half mach = machine + CAL_BASE; _elfio.set_machine(mach); - LogElfInfo("succeeded: machine=0x%04x(%d), platform=%d", machine, machine, platform); - return true; } diff --git a/projects/clr/rocclr/elf/elf.hpp b/projects/clr/rocclr/elf/elf.hpp index 4d3a8cab81..160b3f4b3b 100644 --- a/projects/clr/rocclr/elf/elf.hpp +++ b/projects/clr/rocclr/elf/elf.hpp @@ -36,12 +36,6 @@ using amd::ELFIO::Elf64_Shdr; // These two definitions need to stay in sync with // the definitions elfdefinitions.h until they get // properly upstreamed to gcc/libelf. -#ifndef EM_HSAIL -#define EM_HSAIL 0xAF5A -#endif -#ifndef EM_HSAIL_64 -#define EM_HSAIL_64 0xAF5B -#endif #ifndef EM_AMDIL #define EM_AMDIL 0x4154 #endif @@ -57,9 +51,6 @@ using amd::ELFIO::Elf64_Shdr; #ifndef ELFOSABI_AMD_OPENCL #define ELFOSABI_AMD_OPENCL 201 #endif -#ifndef ELFOSABI_HSAIL -#define ELFOSABI_HSAIL 202 -#endif #ifndef ELFOSABI_AMDIL #define ELFOSABI_AMDIL 203 #endif @@ -89,14 +80,6 @@ class Elf { OCL_TARGETS_LAST, } ElfTargets; - typedef enum { - CAL_PLATFORM = 0, - CPU_PLATFORM = 1, - COMPLIB_PLATFORM = 2, - LC_PLATFORM = 3, - LAST_PLATFORM = 4 - } ElfPlatform; - typedef enum { LLVMIR = 0, SOURCE, @@ -301,9 +284,8 @@ class Elf { bool getNote(const char* noteName, char** noteDesc, size_t* descSize); - /* Get/set machine and platform (target) for which elf is built */ - bool getTarget(uint16_t& machine, ElfPlatform& platform) const; - bool setTarget(uint16_t machine, ElfPlatform platform); + /* Set machine and platform (target) for which elf is built */ + bool setTarget(uint16_t machine); /* Get/set elf type field from header */ bool getType(uint16_t& type) const; diff --git a/projects/clr/rocclr/elf/test/CMakeLists.txt b/projects/clr/rocclr/elf/test/CMakeLists.txt deleted file mode 100644 index 2924616e0c..0000000000 --- a/projects/clr/rocclr/elf/test/CMakeLists.txt +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -#-------------------------------------elf_test--------------------------------------# -# This is unit test for amd::Elf. -# The test is on top of rocclr, so rocclr must be built and installed firstly. -# This file is seperate from cmake file of rocclr to prevent interference. - -find_package(amd_comgr REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/amd_comgr - lib/cmake/amd_comgr) - -find_package(hsa-runtime64 REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/hsa-runtime64) - -find_package(Threads REQUIRED) - -# Look for ROCclr which contains elfio -find_package(ROCclr REQUIRED CONFIG - PATHS - /opt/rocm - /opt/rocm/rocclr) - -add_executable(elf_test main.cpp) -set_target_properties( - elf_test PROPERTIES - CXX_STANDARD 11 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) -target_include_directories(elf_test - PRIVATE - $) - -add_definitions(-DCOMGR_DYN_DLL -DDEBUG) - -target_link_libraries(elf_test PRIVATE amdrocclr_static) - -#-------------------------------------elf_test--------------------------------------# diff --git a/projects/clr/rocclr/elf/test/Readme.txt b/projects/clr/rocclr/elf/test/Readme.txt deleted file mode 100644 index cd701a1556..0000000000 --- a/projects/clr/rocclr/elf/test/Readme.txt +++ /dev/null @@ -1,21 +0,0 @@ -1. To build release version -In test folder, -mkdir release (if release doesn't exist) -cd release -cmake .. -make - - -2. To build debug version -In test folder, -mkdir debug (if debug doesn't exist) -cd debug -cmake -DCMAKE_BUILD_TYPE=Debug .. -make - -3. Run test -rm -f *.bin -./elf_test - -To get debug log, -AMD_LOG_LEVEL=5 ./elf_test diff --git a/projects/clr/rocclr/elf/test/main.cpp b/projects/clr/rocclr/elf/test/main.cpp deleted file mode 100644 index c77d80a4d6..0000000000 --- a/projects/clr/rocclr/elf/test/main.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include - -using namespace amd::ELFIO; - -static constexpr uint32_t target_ = 11; -static constexpr char comment_[] = "comment text"; -static constexpr size_t commentSize_ = strlen(comment_) + 1; - -// Elf::RODATA, ".rodata", 1, SHT_PROGBITS, SHF_ALLOC, -static const amd::Elf::SymbolInfo rodataSymbolInfos_[] = { - {".rodata", nullptr, 0, "data__fmetadata", "fmetatdata", strlen("fmetatdata") + 1}, - {".rodata", nullptr, 0, "data__amdil", "amdildata", strlen("amdildata") + 1}, - {".rodata", nullptr, 0, "data__metadata", "metadata", strlen("metadata") + 1}, - {".rodata", nullptr, 0, "data__header", "header", strlen("header") + 1}, - {".rodata", nullptr, 0, "data__global", "global", strlen("global") + 1}, - {".rodata", nullptr, 0, "data__randome0", "xu\0e\0\0l", sizeof("xu\0e\0\0l")}, // binary - {".rodata", nullptr, 0, "data__randome1", "\0j\0\0w\0", sizeof("\0j\0\0w\0")}, // binary -}; - -static constexpr size_t rodataSymbolInfosSize_ = - sizeof(rodataSymbolInfos_) / sizeof(rodataSymbolInfos_[0]); - -// Elf::COMMENT, ".comment", 1, SHT_PROGBITS, 0, -static const amd::Elf::SymbolInfo commentSymbolInfos_[] = { - {".comment", nullptr, 0, "compile", "-g -I/opt/include", strlen("-g -I/opt/include") + 1}, - {".comment", nullptr, 0, "link", "-g -l/opt/rocm/lib", strlen("-g -l/opt/rocm/lib") + 1}, -}; -static constexpr size_t commentSymbolInfosSize_ = - sizeof(commentSymbolInfos_) / sizeof(commentSymbolInfos_[0]); - -struct NoteInfo { - const char* noteName; - const char* noteDesc; - size_t descSize; -}; - -static constexpr NoteInfo noteInfos_[] = { - {"notename0", "sjfasdfe2Afs", strlen("sjfasdfe2Afs") + 1}, - {"notename1", "AsdmvdfFfkd", strlen("AsdmvdfFfkd") + 1}, - {"notename2", "d\0kelH\0D", sizeof("d\0kelH\0D")}, // binary - {"notename3", "\0F\0kA\0", sizeof("\0F\0kA\0")}, // binary -}; - -static const size_t noteInfosSize_ = sizeof(noteInfos_) / sizeof(noteInfos_[0]); - -bool set(amd::Elf* elf) { - if (!elf->setTarget(target_, amd::Elf::CPU_PLATFORM)) { - LogError("elf->setTarget() failed"); - return false; - } - - if (!elf->setType(ET_EXEC)) { - LogError("elf->elf() failed"); - return false; - } - - if (!elf->addSection(amd::Elf::COMMENT, comment_, commentSize_)) { - LogError("elf->addSection() failed"); - return false; - } - - size_t i = 0; - LogInfo("writing rodataSymbolInfo"); - - for (i = 0; i < rodataSymbolInfosSize_; i++) { - auto& info = rodataSymbolInfos_[i]; - if (!elf->addSymbol(amd::Elf::RODATA, info.sym_name.c_str(), info.address, info.size)) { - LogPrintfError("elf->addSymbol(RODATA) failed at index %zu", i); - return false; - } - } - - LogInfo("Succeeded"); - LogInfo("writing commentSymbolInfo"); - - for (i = 0; i < commentSymbolInfosSize_; i++) { - auto& info = commentSymbolInfos_[i]; - if (!elf->addSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), info.address, info.size)) { - LogPrintfError("elf->addSymbol(COMMENT) failed at index %zu", i); - return false; - } - } - - LogInfo("Succeeded"); - LogInfo("writing noteInfos"); - - for (i = 0; i < noteInfosSize_; i++) { - auto& info = noteInfos_[i]; - if (!elf->addNote(info.noteName, info.noteDesc, info.descSize)) { - LogPrintfError("elf->addNote() failed at index %zu", i); - return false; - } - } - - LogInfo("Succeeded"); - - return true; -} - -bool verify(amd::Elf* elf) { - uint16_t machine = amd::Elf::OCL_TARGETS_LAST; - amd::Elf::ElfPlatform platform = amd::Elf::LAST_PLATFORM; - if (!elf->getTarget(machine, platform)) { - LogError("elf->getTarget() failed"); - return false; - } - - LogPrintfInfo("getTarget(machine=%u, platform=%d)", machine, platform); - - if (machine != target_) { - LogPrintfError("machine(%u) != target_(%d)", machine, target_); - return false; - } - - if (platform != amd::Elf::CPU_PLATFORM) { - LogPrintfError("platform(%d) != CAL_PLATFORM(%d)", platform, amd::Elf::CPU_PLATFORM); - return false; - } - - uint16_t type = ET_NONE; - - if (!elf->getType(type)) { - LogError("elf->elf() failed"); - return false; - } - - LogPrintfInfo("getType(%u)", type); - - if (type != ET_EXEC) { - LogError("type != ET_EXEC"); - return false; - } - - char* buffer = nullptr; - size_t size = 0; - - if (!elf->getSection(amd::Elf::COMMENT, &buffer, &size)) { - LogError("elf->getSection(COMMENT) failed"); - return false; - } - - LogPrintfInfo("getSection(COMMENT, buffer=%s, size=%zu)", buffer, size); - - if (size < commentSize_ || memcmp(comment_, buffer, commentSize_) != 0) { - LogPrintfError("Not matched section: size = %zu, buffer = %s, expected: %zu, %s", size, buffer, - commentSize_, comment_); - return false; - } - - LogInfo("Reading rodataSymbolInfo"); - - size_t i = 0; - buffer = nullptr; - size = 0; - for (i = 0; i < rodataSymbolInfosSize_; i++) { - auto& info = rodataSymbolInfos_[i]; - if (!elf->getSymbol(amd::Elf::RODATA, info.sym_name.c_str(), &buffer, &size)) { - LogPrintfError("elf->getSymbol(RODATA, %s) failed at index %zu", info.sym_name.c_str(), i); - return false; - } - LogPrintfInfo("getSymbol(amd::Elf::RODATA, sym_name=%s, buffer=%s, size=%zu)", - info.sym_name.c_str(), buffer, size); // Will possibly print part of buffer - - if (size != info.size || memcmp(buffer, info.address, info.size)) { - LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s", - info.sym_name.c_str(), size, buffer, info.size, info.address); - return false; - } - } - - LogInfo("Succeeded"); - LogInfo("reading commentSymbolInfo"); - - buffer = nullptr; - size = 0; - for (i = 0; i < commentSymbolInfosSize_; i++) { - auto& info = commentSymbolInfos_[i]; - if (!elf->getSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), &buffer, &size)) { - LogPrintfError("elf->getSymbol(COMMENT, %s) failed at index %zu", info.sym_name.c_str(), i); - return false; - } - LogPrintfInfo("getSymbol(COMMENT, sym_name=%s, buffer=%s, size=%zu)", info.sym_name.c_str(), - buffer, size); // Will possibly print part of buffer - if (size != info.size || memcmp(buffer, info.address, info.size)) { - LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s", - info.sym_name.c_str(), size, buffer, info.size, info.address); - return false; - } - } - - // Test another way - auto symbolNum = elf->getSymbolNum(); - if (symbolNum != (rodataSymbolInfosSize_ + commentSymbolInfosSize_)) { - LogPrintfError( - "Not matched: symbolNum(%u) != rodataSymbolInfosSize_(%u) +" - " commentSymbolInfosSize_(%u)", - symbolNum, rodataSymbolInfosSize_, commentSymbolInfosSize_); - return false; - } - - for (i = 0; i < rodataSymbolInfosSize_; i++) { - auto& info = rodataSymbolInfos_[i]; - amd::Elf::SymbolInfo symInfo; - - if (!elf->getSymbolInfo(i, &symInfo)) { - LogPrintfError("getSymbolInfo(%zu) failed", i); - return false; - } - LogPrintfInfo( - "getSymbolInfo(%zu): amd::Elf::RODATA: sec_name=%s, sym_name=%s, " - "address=%s, size=%lu, sec_addr=%s, sec_size=%lu)", - i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(), - symInfo.address, // Will possibly print part of buffer - symInfo.size, symInfo.sec_addr, symInfo.sec_size); - if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name && - symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) { - continue; - } - LogPrintfError("getSymbolInfo(%zu) returned not matched", i); - return false; - } - - for (; i < symbolNum; i++) { - auto& info = commentSymbolInfos_[i - rodataSymbolInfosSize_]; - amd::Elf::SymbolInfo symInfo; - - if (!elf->getSymbolInfo(i, &symInfo)) { - LogPrintfError("getSymbolInfo(%zu) failed", i); - return false; - } - LogPrintfInfo( - "getSymbolInfo(%zu): amd::Elf::COMMENT: sec_name=%s, sym_name=%s, " - "address=%s, size=%lu, sec_addr=%s, sec_size=%lu)", - i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(), - symInfo.address, // Will possibly print part of buffer - symInfo.size, symInfo.sec_addr, symInfo.sec_size); - if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name && - symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) { - continue; - } - LogPrintfError("getSymbolInfo(%zu) returned not matched", i); - return false; - } - - LogInfo("Succeeded"); - LogError("Reading noteInfos"); - - buffer = nullptr; - size = 0; - for (i = 0; i < noteInfosSize_; i++) { - auto& info = noteInfos_[i]; - if (!elf->getNote(info.noteName, &buffer, &size)) { - LogPrintfError("elf->getNote(%s) failed at index %zu", info.noteName, i); - return false; - } - // Will possibly print part of buffer - LogPrintfInfo("getNote(noteName=%s, buffer=%s, size=%zu)", info.noteName, buffer, size); - if (size != info.descSize || memcmp(buffer, info.noteDesc, info.descSize)) { - LogPrintfError("Not matched note(%s): size = %zu, buff = %s, expected: %zu, %s", - info.noteName, size, buffer, info.descSize, info.noteDesc); - return false; - } - } - - LogPrintfInfo("%s: Succeeded", __func__); - - return true; -} - -bool test(unsigned char eclass = ELFCLASS64, const char* outFile = nullptr) { - amd::Elf* writer = new amd::Elf(eclass, nullptr, 0, outFile, amd::Elf::ELF_C_WRITE); - amd::Elf* reader = nullptr; - bool ret = false; - do { - if ((writer == nullptr) || !writer->isSuccessful()) { - LogError("Creating writter ELF object failed"); - break; - } - - // Writing - if (!set(writer)) { - break; - } - - // Verifying - if (!verify(writer)) { - break; - } - - char* buff = nullptr; - unsigned long len = 0; - if (writer->dumpImage(&buff, &len)) { - LogPrintfInfo("dumpImage succeed: buff=%p, len=%u)", buff, len); - - reader = new amd::Elf(eclass, buff, len, nullptr, amd::Elf::ELF_C_READ); - - delete[] buff; - - if ((reader == nullptr) || !reader->isSuccessful()) { - LogError("Creating reader ELF object failed"); - break; - } - - ret = verify(reader); - - delete reader; - } - } while (false); - - if (writer) { - delete writer; - } - if (reader) { - delete reader; - } - LogPrintfError("%s(%s, %s): %s", __func__, eclass == ELFCLASS64 ? "ELFCLASS64" : "ELFCLASS32", - outFile ? outFile : "nullptr", ret ? "Succeeded" : "Failed"); - return ret; -} - -int main() { - bool ret = false; - amd::Flag::init(); - unsigned char eclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - const char* outFile = eclass == ELFCLASS32 ? "elf32.bin" : "elf64.bin"; - - ret = test(eclass, outFile); - printf("%s: test(%s, %s) %s!\n", __func__, eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64", - outFile, ret ? "Succeeded" : "Failed"); - - if (ret) { - ret = test(eclass, nullptr); - printf("%s: test(%s, nullptr) %s!\n", __func__, - eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64", ret ? "Succeeded" : "Failed"); - } - return 0; -} diff --git a/projects/clr/rocclr/platform/kernel.hpp b/projects/clr/rocclr/platform/kernel.hpp index 0749e6f071..2b789d175d 100644 --- a/projects/clr/rocclr/platform/kernel.hpp +++ b/projects/clr/rocclr/platform/kernel.hpp @@ -63,9 +63,8 @@ class KernelSignature : public HeapObject { public: enum { - ABIVersion_0 = 0, //! ABI constructed based on the OCL semantics - ABIVersion_1 = 1, //! ABI constructed based on the HW ABI returned from HSAIL - ABIVersion_2 = 2 //! ABI constructed based on the HW ABI returned from LC + ABIVersion_OCL = 0, //! ABI constructed based on the OCL semantics + ABIVersion_LC = 1 //! ABI constructed based on the HW ABI returned from LC }; //! Default constructor @@ -75,7 +74,7 @@ class KernelSignature : public HeapObject { numMemories_(0), numSamplers_(0), numQueues_(0), - version_(ABIVersion_0) {} + version_(ABIVersion_OCL) {} //! Construct a new signature. KernelSignature(const std::vector& params, const std::string& attrib,