SWDEV-556684 - HSAIL cleanup (#1657)
Этот коммит содержится в:
@@ -38,7 +38,7 @@ Run these commands:
|
||||
```bash
|
||||
cd "$CLR_DIR"
|
||||
mkdir -p build; cd build
|
||||
cmake -DUSE_COMGR_LIBRARY=ON -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON ..
|
||||
cmake -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef LIBRARY_H_
|
||||
#define LIBRARY_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
namespace amd {
|
||||
|
||||
typedef enum _library_selector {
|
||||
LibraryUndefined = 0,
|
||||
GPU_Library_7xx,
|
||||
GPU_Library_Evergreen,
|
||||
GPU_Library_SI,
|
||||
CPU_Library_Generic,
|
||||
CPU_Library_AVX,
|
||||
CPU_Library_FMA4,
|
||||
GPU_Library_Generic,
|
||||
CPU64_Library_Generic,
|
||||
CPU64_Library_AVX,
|
||||
CPU64_Library_FMA4,
|
||||
GPU64_Library_Evergreen,
|
||||
GPU64_Library_SI,
|
||||
GPU64_Library_Generic,
|
||||
GPU_Library_CI,
|
||||
GPU64_Library_CI,
|
||||
GPU_Library_HSAIL,
|
||||
LibraryTotal
|
||||
} LibrarySelector;
|
||||
|
||||
/** Integrated Bitcode Libararies **/
|
||||
class LibraryDescriptor {
|
||||
public:
|
||||
enum { MAX_NUM_LIBRARY_DESCS = 11 };
|
||||
|
||||
const char* start;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
int getLibDescs(LibrarySelector LibType, // input
|
||||
LibraryDescriptor* LibDesc, // output
|
||||
int& LibDescSize // output -- LibDesc[0:LibDescSize-1]
|
||||
);
|
||||
|
||||
static constexpr const char* amdRTFuns[] = {"__amdrt_div_i64", "__amdrt_div_u64",
|
||||
"__amdrt_mod_i64", "__amdrt_mod_u64",
|
||||
"__amdrt_cvt_f64_to_u64", "__amdrt_cvt_f32_to_u64"};
|
||||
} // namespace amd
|
||||
|
||||
#endif // LIBRARY_H_
|
||||
@@ -1233,8 +1233,7 @@ Options::Options()
|
||||
dumpFileRoot(),
|
||||
currKernelName(NULL),
|
||||
encryptCode(0),
|
||||
MemoryHandles(),
|
||||
libraryType_(amd::LibraryUndefined) {
|
||||
MemoryHandles() {
|
||||
oVariables = new OptionVariables();
|
||||
::memset(flags, 0, sizeof(flags));
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include "top.hpp"
|
||||
#include "library.hpp"
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#ifdef __linux__
|
||||
@@ -316,7 +315,6 @@ class Options {
|
||||
void setDumpFileName(const char* val);
|
||||
|
||||
public:
|
||||
LibrarySelector libraryType_;
|
||||
std::string sourceFileName_;
|
||||
};
|
||||
|
||||
|
||||
@@ -107,159 +107,159 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
// the ANY value. If the target feature is disabled then use a '-' suffix,
|
||||
// and if enabled use a '+' suffix.
|
||||
//
|
||||
// If the HSAIL or AMD IL compilers do not support the target, then use
|
||||
// If the AMD IL compilers do not support the target, then use
|
||||
// nullptr for the ID.
|
||||
//
|
||||
// -------------- Compiler ---------- - Runtime - ---- IP ---- -- Target -- ----------
|
||||
// Target Properties ----------
|
||||
// Supported Version Features
|
||||
// SIMD/
|
||||
// SIMD
|
||||
// Instr
|
||||
// Bank LDS
|
||||
// Mem
|
||||
// Target ID HSAIL ID ROC PAL Maj/Min/Stp SRAMECC XNACK CU Width
|
||||
// Width Width Size Banks LDSAlignment
|
||||
{"gfx801", nullptr, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx801:xnack-", nullptr, true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx801:xnack+", "gfx801", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx802", "gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx803", "gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810", nullptr, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810:xnack-", nullptr, true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810:xnack+", "gfx810", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900:xnack-", "gfx900", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900:xnack+", "gfx901", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902:xnack-", "gfx902", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902:xnack+", "gfx903", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904:xnack-", "gfx904", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904:xnack+", "gfx905", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc-", "gfx907", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc+", nullptr, true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:xnack-", "gfx906", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:xnack+", "gfx907", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc-:xnack-", "gfx906", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
// -- Compiler --|-- Runtime --|-- IP --|-- Target --|-- Target Properties --
|
||||
// | Supported | Version| Features |
|
||||
// --------------|-------------|--------|------------|-----------------------
|
||||
// Target ID | ROC PAL | Major | SRAMECC | SIMD/CU
|
||||
// | | Minor | XNACK | SIMD Width
|
||||
// | | Step | | Instr Width
|
||||
// | | | | Bank Width
|
||||
// | | | | LDS Size
|
||||
// | | | | Mem Banks
|
||||
// | | | | LDS Alignment
|
||||
{"gfx801", true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx801:xnack-", true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx801:xnack+", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx805", true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810", true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810:xnack-", true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx810:xnack+", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx900:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902:xnack-", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx902:xnack+", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904:xnack-", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx904:xnack+", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc-", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc+", true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:xnack-", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:xnack+", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc-:xnack-", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx906:sramecc-:xnack+", "gfx907", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx906:sramecc-:xnack+", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx906:sramecc+:xnack-", nullptr, true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc+:xnack+", nullptr, true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908", nullptr, true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc-", nullptr, true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc+", nullptr, true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:xnack-", nullptr, true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:xnack+", nullptr, true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc-:xnack-", nullptr, true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx906:sramecc+:xnack-", true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx906:sramecc+:xnack+", true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908", true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc-", true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc+", true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:xnack-", true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:xnack+", true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx908:sramecc-:xnack-", true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx908:sramecc-:xnack+", nullptr, true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx908:sramecc-:xnack+", true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx908:sramecc+:xnack-", nullptr, true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx908:sramecc+:xnack-", true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx908:sramecc+:xnack+", nullptr, true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909:xnack-", nullptr, false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909:xnack+", nullptr, false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a", nullptr, true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc-", nullptr, true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc+", nullptr, true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:xnack-", nullptr, true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:xnack+", nullptr, true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc-:xnack-", nullptr, true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx908:sramecc+:xnack+", true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909", false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909:xnack-", false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx909:xnack+", false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a", true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc-", true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc+", true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:xnack-", true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:xnack+", true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90a:sramecc-:xnack-", true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx90a:sramecc-:xnack+", nullptr, true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx90a:sramecc-:xnack+", true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx90a:sramecc+:xnack-", nullptr, true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx90a:sramecc+:xnack-", true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx90a:sramecc+:xnack+", nullptr, true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx90a:sramecc+:xnack+", true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx942", nullptr, true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc-", nullptr, true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc+", nullptr, true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:xnack-", nullptr, true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:xnack+", nullptr, true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc-:xnack-", nullptr, true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx942", true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc-", true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc+", true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:xnack-", true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:xnack+", true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx942:sramecc-:xnack-", true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx942:sramecc-:xnack+", nullptr, true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx942:sramecc-:xnack+", true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx942:sramecc+:xnack-", nullptr, true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx942:sramecc+:xnack-", true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx942:sramecc+:xnack+", nullptr, true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx950", nullptr, true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc-", nullptr, true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc+", nullptr, true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:xnack-", nullptr, true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:xnack+", nullptr, true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc-:xnack-", nullptr, true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki,
|
||||
{"gfx942:sramecc+:xnack+", true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c", true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c:xnack-", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx90c:xnack+", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx950", true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc-", true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc+", true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:xnack-", true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:xnack+", true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280},
|
||||
{"gfx950:sramecc-:xnack-", true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki,
|
||||
64, 1280},
|
||||
{"gfx950:sramecc-:xnack+", nullptr, true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki,
|
||||
{"gfx950:sramecc-:xnack+", true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki,
|
||||
64, 1280},
|
||||
{"gfx950:sramecc+:xnack-", nullptr, true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki,
|
||||
{"gfx950:sramecc+:xnack-", true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki,
|
||||
64, 1280},
|
||||
{"gfx950:sramecc+:xnack+", nullptr, true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki,
|
||||
{"gfx950:sramecc+:xnack+", true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki,
|
||||
64, 1280},
|
||||
{"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-generic:xnack-", nullptr, true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-generic:xnack+", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic", nullptr, true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc-", nullptr, true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx9-generic", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-generic:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-generic:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic", true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc-", true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx9-4-generic:sramecc+", nullptr, true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki,
|
||||
{"gfx9-4-generic:sramecc+", true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki,
|
||||
32, 512},
|
||||
{"gfx9-4-generic:xnack-", nullptr, true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:xnack+", nullptr, true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc-:xnack-", nullptr, true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256,
|
||||
{"gfx9-4-generic:xnack-", true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:xnack+", true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc-:xnack-", true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256,
|
||||
64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc-:xnack+", nullptr, true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256,
|
||||
{"gfx9-4-generic:sramecc-:xnack+", true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256,
|
||||
64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc+:xnack-", nullptr, true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256,
|
||||
{"gfx9-4-generic:sramecc+:xnack-", true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256,
|
||||
64 * Ki, 32, 512},
|
||||
{"gfx9-4-generic:sramecc+:xnack+", nullptr, true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256,
|
||||
{"gfx9-4-generic:sramecc+:xnack+", true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256,
|
||||
64 * Ki, 32, 512},
|
||||
{"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011", "gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011:xnack-", "gfx1011", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011:xnack+", nullptr, true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012", "gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012:xnack-", "gfx1012", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012:xnack+", nullptr, true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-1-generic:xnack-", nullptr, true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki,
|
||||
{"gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1010:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1010:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011:xnack-", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1011:xnack+", true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012:xnack-", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1012:xnack+", true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013:xnack-", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1013:xnack+", true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-1-generic", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-1-generic:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki,
|
||||
32, 1024},
|
||||
{"gfx10-1-generic:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki,
|
||||
{"gfx10-1-generic:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki,
|
||||
32, 1024},
|
||||
{"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1033", "gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1103", "gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1153", "gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx10-3-generic", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx11-generic", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
{"gfx12-generic", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
|
||||
};
|
||||
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
|
||||
}
|
||||
@@ -1240,7 +1240,7 @@ bool ClBinary::setElfTarget() {
|
||||
static const uint32_t Target = 21;
|
||||
assert(((0xFFFF8000 & Target) == 0) && "ASIC target ID >= 2^15");
|
||||
uint16_t elf_target = static_cast<uint16_t>(0x7FFF & Target);
|
||||
return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM);
|
||||
return elfOut()->setTarget(elf_target);
|
||||
}
|
||||
|
||||
void ClBinary::init(amd::option::Options* optionsObj) {
|
||||
@@ -1272,37 +1272,6 @@ void ClBinary::init(amd::option::Options* optionsObj) {
|
||||
}
|
||||
}
|
||||
|
||||
bool ClBinary::isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform) {
|
||||
/* It is recompilable if there is llvmir that was generated for
|
||||
the same platform (CPU or GPU) and with the same bitness.
|
||||
|
||||
Note: the bitness has been checked in initClBinary(), no need
|
||||
to check it here.
|
||||
*/
|
||||
if (llvmBinary.empty()) {
|
||||
DevLogError("LLVM Binary string is empty \n");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint16_t elf_target;
|
||||
amd::Elf::ElfPlatform platform;
|
||||
if (elfIn()->getTarget(elf_target, platform)) {
|
||||
if (platform == thePlatform) {
|
||||
return true;
|
||||
}
|
||||
if ((platform == amd::Elf::COMPLIB_PLATFORM) &&
|
||||
(((thePlatform == amd::Elf::CAL_PLATFORM) &&
|
||||
((elf_target == (uint16_t)EM_HSAIL) || (elf_target == (uint16_t)EM_HSAIL_64))) ||
|
||||
((thePlatform == amd::Elf::CPU_PLATFORM) &&
|
||||
((elf_target == (uint16_t)EM_386) || (elf_target == (uint16_t)EM_X86_64))))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
DevLogPrintfError("LLVM_Binary: %s is not recompilable \n", llvmBinary.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
void ClBinary::release() {
|
||||
if (isBinaryAllocated() && (binary_ != nullptr)) {
|
||||
delete[] binary_;
|
||||
|
||||
@@ -1083,9 +1083,6 @@ class ClBinary : public amd::HeapObject {
|
||||
amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format
|
||||
) const;
|
||||
|
||||
//! Check if the binary is recompilable
|
||||
bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform);
|
||||
|
||||
void saveOrigBinary(const char* origBinary, size_t origSize) {
|
||||
origBinary_ = origBinary;
|
||||
origSize_ = origSize;
|
||||
@@ -1455,9 +1452,6 @@ class Isa {
|
||||
/// @returns This Isa's target ID name.
|
||||
const char* targetId() const { return targetId_; }
|
||||
|
||||
/// @returns This Isa's name to use with the HSAIL compiler.
|
||||
const char* hsailName() const { return hsailId_; }
|
||||
|
||||
/// @returns If the ROCm runtime supports the ISA.
|
||||
bool runtimeRocSupported() const {
|
||||
if (!IS_HIP && (versionMajor_ == 8)) {
|
||||
@@ -1534,13 +1528,12 @@ class Isa {
|
||||
static const Isa* end();
|
||||
|
||||
private:
|
||||
constexpr Isa(const char* targetId, const char* hsailId, bool runtimeRocSupported,
|
||||
bool runtimePalSupported, uint32_t versionMajor, uint32_t versionMinor,
|
||||
uint32_t versionStepping, Feature sramecc, Feature xnack, uint32_t simdPerCU,
|
||||
uint32_t simdWidth, uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
|
||||
constexpr Isa(const char* targetId, bool runtimeRocSupported, bool runtimePalSupported,
|
||||
uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
|
||||
Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth,
|
||||
uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
|
||||
uint32_t localMemSizePerCU, uint32_t localMemBanks, uint32_t ldsAlignment)
|
||||
: targetId_(targetId),
|
||||
hsailId_(hsailId),
|
||||
runtimeRocSupported_(runtimeRocSupported),
|
||||
runtimePalSupported_(runtimePalSupported),
|
||||
versionMajor_(versionMajor),
|
||||
@@ -1563,11 +1556,6 @@ class Isa {
|
||||
// compilations.
|
||||
const char* targetId_;
|
||||
|
||||
// @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL
|
||||
// compilation using the Shader Compiler Finalizer. Empty string if
|
||||
// unsupported.
|
||||
const char* hsailId_;
|
||||
|
||||
bool runtimeRocSupported_; //!< ROCm runtime is supported.
|
||||
bool runtimePalSupported_; //!< PAL runtime is supported.
|
||||
uint32_t versionMajor_; //!< Isa's major version.
|
||||
|
||||
@@ -997,7 +997,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
uint32_t numParams = params.size();
|
||||
// Append the hidden arguments to the OCL arguments
|
||||
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_LC);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -1702,7 +1702,6 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt
|
||||
bool recompile = false;
|
||||
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
||||
switch (continueCompileFrom) {
|
||||
case FILE_TYPE_HSAIL_BINARY:
|
||||
case FILE_TYPE_CG:
|
||||
case FILE_TYPE_ISA: {
|
||||
// Compare options loaded from binary with current ones, recompile if differ;
|
||||
|
||||
@@ -87,18 +87,16 @@ class Program : public amd::HeapObject {
|
||||
FILE_TYPE_SPIR_BINARY = 5,
|
||||
FILE_TYPE_AMDIL_TEXT = 6,
|
||||
FILE_TYPE_AMDIL_BINARY = 7,
|
||||
FILE_TYPE_HSAIL_TEXT = 8,
|
||||
FILE_TYPE_HSAIL_BINARY = 9,
|
||||
FILE_TYPE_X86_TEXT = 10,
|
||||
FILE_TYPE_X86_BINARY = 11,
|
||||
FILE_TYPE_CG = 12,
|
||||
FILE_TYPE_SOURCE = 13,
|
||||
FILE_TYPE_ISA = 14,
|
||||
FILE_TYPE_HEADER = 15,
|
||||
FILE_TYPE_RSLLVMIR_BINARY = 16,
|
||||
FILE_TYPE_SPIRV_BINARY = 17,
|
||||
FILE_TYPE_ASM_TEXT = 18,
|
||||
FILE_TYPE_LAST = 19
|
||||
FILE_TYPE_X86_TEXT = 8,
|
||||
FILE_TYPE_X86_BINARY = 9,
|
||||
FILE_TYPE_CG = 10,
|
||||
FILE_TYPE_SOURCE = 11,
|
||||
FILE_TYPE_ISA = 12,
|
||||
FILE_TYPE_HEADER = 13,
|
||||
FILE_TYPE_RSLLVMIR_BINARY = 14,
|
||||
FILE_TYPE_SPIRV_BINARY = 15,
|
||||
FILE_TYPE_ASM_TEXT = 16,
|
||||
FILE_TYPE_LAST = 17
|
||||
} file_type_t;
|
||||
|
||||
private:
|
||||
|
||||
@@ -122,7 +122,7 @@ static std::tuple<const amd::Isa*, const char*> findIsa(uint32_t gfxipMajor, uin
|
||||
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
|
||||
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
|
||||
return std::make_tuple(
|
||||
isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->hsailName() : palDeviceIter->palName_);
|
||||
isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->targetId() : palDeviceIter->palName_);
|
||||
}
|
||||
|
||||
static std::tuple<Pal::GfxIpLevel, Pal::AsicRevision, const char*> findPal(uint32_t gfxipMajor,
|
||||
|
||||
@@ -193,7 +193,7 @@ Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
|
||||
}
|
||||
|
||||
// The check below handles a special case of single context with multiple devices
|
||||
// when the devices use different compilers(HSAIL and LC) and have different signatures
|
||||
// when the devices have different signatures
|
||||
const amd::KernelSignature& signature =
|
||||
(this->signature().version() == kernel.signature().version()) ? kernel.signature()
|
||||
: this->signature();
|
||||
|
||||
@@ -84,9 +84,6 @@ class Kernel : public device::Kernel {
|
||||
//! Returns LDS size used in this kernel
|
||||
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
|
||||
|
||||
//! Returns pointer on CPU to AQL code info
|
||||
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
|
||||
|
||||
//! Returns pointer on CPU to AQL kernel descriptor info
|
||||
const llvm::amdhsa::kernel_descriptor_t* cpuAqlKd() const { return &akd_; }
|
||||
|
||||
@@ -135,14 +132,10 @@ class Kernel : public device::Kernel {
|
||||
void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
|
||||
const uint16_t numSGPRs, const uint16_t numVGPRs);
|
||||
|
||||
union {
|
||||
amd_kernel_code_t akc_; //!< AQL kernel code on CPU, used by HSAIL
|
||||
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
|
||||
};
|
||||
uint index_; //!< Kernel index in the program
|
||||
|
||||
uint64_t code_; //!< GPU memory pointer to the kernel
|
||||
size_t codeSize_; //!< Size of ISA code
|
||||
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
|
||||
uint index_; //!< Kernel index in the program
|
||||
uint64_t code_; //!< GPU memory pointer to the kernel
|
||||
size_t codeSize_; //!< Size of ISA code
|
||||
};
|
||||
|
||||
/*@}*/ // namespace amd::pal
|
||||
|
||||
@@ -591,7 +591,7 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get memory pointer to the satged buffer
|
||||
// Get memory pointer to the staged buffer
|
||||
uint32_t* dbgBufferPtr = reinterpret_cast<uint32_t*>(xferBufRead_->map(&gpu));
|
||||
if (nullptr == dbgBufferPtr) {
|
||||
return false;
|
||||
|
||||
@@ -407,7 +407,7 @@ Resource::~Resource() {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
|
||||
static uint32_t GetHSAImageFormatType(const cl_image_format& format) {
|
||||
static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8,
|
||||
@@ -431,7 +431,7 @@ static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
|
||||
static uint32_t GetHSAImageOrderType(const cl_image_format& format) {
|
||||
static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_A,
|
||||
HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
|
||||
@@ -569,8 +569,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
|
||||
}
|
||||
|
||||
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
hwState_[8] = GetHSAImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAImageOrderType(desc().format_);
|
||||
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
||||
hwState_[11] = 0; // one extra reserved field in the argument
|
||||
return true;
|
||||
@@ -741,8 +741,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
|
||||
viewInfo.subresRange = ImgSubresRange;
|
||||
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
|
||||
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
hwState_[8] = GetHSAImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAImageOrderType(desc().format_);
|
||||
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
||||
hwState_[11] = 0; // one extra reserved field in the argument
|
||||
|
||||
@@ -961,8 +961,8 @@ bool Resource::CreateInterop(CreateParams* params) {
|
||||
viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
|
||||
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
|
||||
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
hwState_[8] = GetHSAImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAImageOrderType(desc().format_);
|
||||
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
||||
hwState_[11] = 0; // one extra reserved field in the argument
|
||||
}
|
||||
@@ -983,8 +983,8 @@ bool Resource::CreateInterop(CreateParams* params) {
|
||||
}
|
||||
|
||||
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
hwState_[8] = GetHSAImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAImageOrderType(desc().format_);
|
||||
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
||||
hwState_[11] = 0; // one extra reserved field in the argument
|
||||
} else {
|
||||
@@ -1047,8 +1047,8 @@ bool Resource::CreateInterop(CreateParams* params) {
|
||||
(desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
|
||||
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
|
||||
}
|
||||
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
||||
hwState_[8] = GetHSAImageFormatType(desc().format_);
|
||||
hwState_[9] = GetHSAImageOrderType(desc().format_);
|
||||
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
||||
hwState_[11] = 0; // one extra reserved field in the argument
|
||||
}
|
||||
|
||||
@@ -58,8 +58,6 @@ Settings::Settings() {
|
||||
|
||||
hostMemDirectAccess_ = HostMemDisable;
|
||||
|
||||
libSelector_ = amd::LibraryUndefined;
|
||||
|
||||
// By default use host blit
|
||||
blitEngine_ = BlitEngineHost;
|
||||
pinnedXferSize_ = GPU_PINNED_XFER_SIZE * Mi;
|
||||
@@ -213,7 +211,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
// L1 cache size is 16KB
|
||||
cacheSize_ = 16 * Ki;
|
||||
|
||||
libSelector_ = amd::GPU_Library_CI;
|
||||
if (LP64_SWITCH(false, true)) {
|
||||
oclVersion_ =
|
||||
!reportAsOCL12Device ? XCONCAT(OpenCL, XCONCAT(OPENCL_MAJOR, OPENCL_MINOR)) : OpenCL12;
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "top.hpp"
|
||||
#include "library.hpp"
|
||||
#include "palDevice.h"
|
||||
|
||||
/*! \addtogroup pal PAL Resource Implementation
|
||||
@@ -63,7 +62,7 @@ class Settings : public device::Settings {
|
||||
uint imageSupport_ : 1; //!< Report images support
|
||||
uint doublePrecision_ : 1; //!< Enables double precision support
|
||||
uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU
|
||||
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
|
||||
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG path on GPU
|
||||
uint imageDMA_ : 1; //!< Enable direct image DMA transfers
|
||||
uint threadTraceEnable_ : 1; //!< Thread trace enable
|
||||
uint svmAtomics_ : 1; //!< SVM device atomics
|
||||
@@ -108,8 +107,6 @@ class Settings : public device::Settings {
|
||||
uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations
|
||||
uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
|
||||
|
||||
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
|
||||
|
||||
size_t prepinnedMinSize_; //!< minimal memory size for prepinned transfer
|
||||
uint32_t limit_blit_wg_; //!< The number of workgroups for blit execution
|
||||
|
||||
|
||||
@@ -700,19 +700,19 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData
|
||||
);
|
||||
|
||||
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel
|
||||
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSA kernel
|
||||
VirtualGPU* gpuDefQueue //!< Device queue for children execution
|
||||
);
|
||||
|
||||
bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSA kernel object
|
||||
VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue
|
||||
uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue
|
||||
);
|
||||
|
||||
void PostDeviceEnqueue(
|
||||
const amd::Kernel& kernel, //!< Parent amd kernel object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
|
||||
const pal::Kernel& hsaKernel, //!< Parent HSA kernel object
|
||||
VirtualGPU* gpuDefQueue, //!< GPU default queue
|
||||
uint64_t vmDefQueue, //!< VM handle to the virtual queue
|
||||
uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location
|
||||
|
||||
@@ -148,7 +148,7 @@ class NullDevice : public amd::Device {
|
||||
|
||||
const Settings& settings() const { return static_cast<Settings&>(*settings_); }
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
//! Construct an device program object from the ELF assuming it is valid
|
||||
device::Program* createProgram(amd::Program& owner,
|
||||
amd::option::Options* options = nullptr) override;
|
||||
|
||||
@@ -373,7 +373,7 @@ class Device : public NullDevice {
|
||||
//! Instantiate a new virtual device
|
||||
virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = nullptr);
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
//! Construct an device program object from the ELF assuming it is valid
|
||||
virtual device::Program* createProgram(amd::Program& owner,
|
||||
amd::option::Options* options = nullptr);
|
||||
|
||||
|
||||
@@ -53,22 +53,22 @@
|
||||
#endif
|
||||
|
||||
/**
|
||||
* HSA image object size in bytes (see HSAIL spec)
|
||||
* HSA image object size in bytes (see HSA spec)
|
||||
*/
|
||||
#define HSA_IMAGE_OBJECT_SIZE 48
|
||||
|
||||
/**
|
||||
* HSA image object alignment in bytes (see HSAIL spec)
|
||||
* HSA image object alignment in bytes (see HSA spec)
|
||||
*/
|
||||
#define HSA_IMAGE_OBJECT_ALIGNMENT 16
|
||||
|
||||
/**
|
||||
* HSA sampler object size in bytes (see HSAIL spec)
|
||||
* HSA sampler object size in bytes (see HSA spec)
|
||||
*/
|
||||
#define HSA_SAMPLER_OBJECT_SIZE 32
|
||||
|
||||
/**
|
||||
* HSA sampler object alignment in bytes (see HSAIL spec)
|
||||
* HSA sampler object alignment in bytes (see HSA spec)
|
||||
*/
|
||||
#define HSA_SAMPLER_OBJECT_ALIGNMENT 16
|
||||
|
||||
|
||||
@@ -411,7 +411,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Returns memory dependency class
|
||||
MemoryDependency& memoryDependency() { return memoryDependency_; }
|
||||
|
||||
//! Detects memory dependency for HSAIL kernels and uses appropriate AQL header
|
||||
//! Detects memory dependency for HSA kernels and uses appropriate AQL header
|
||||
bool processMemObjects(const amd::Kernel& kernel, //!< AMD kernel object for execution
|
||||
const_address params, //!< Pointer to the param's store
|
||||
size_t& ldsAddress, //!< LDS usage
|
||||
|
||||
@@ -369,36 +369,9 @@ bool Elf::setupShdr(ElfSections id, section* section, Elf64_Word shlink) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Elf::getTarget(uint16_t& machine, ElfPlatform& platform) const {
|
||||
Elf64_Half mach = _elfio.get_machine();
|
||||
if ((mach >= CPU_FIRST) && (mach <= CPU_LAST)) {
|
||||
platform = CPU_PLATFORM;
|
||||
machine = mach - CPU_BASE;
|
||||
} else if (mach == EM_386 || mach == EM_HSAIL || mach == EM_HSAIL_64 || mach == EM_AMDIL ||
|
||||
mach == EM_AMDIL_64 || mach == EM_X86_64) {
|
||||
platform = COMPLIB_PLATFORM;
|
||||
machine = mach;
|
||||
} else {
|
||||
// Invalid machine
|
||||
LogElfError("failed: Invalid machine=0x%04x(%d)", mach, mach);
|
||||
return false;
|
||||
}
|
||||
LogElfInfo("succeeded: machine=0x%04x, platform=%d", machine, platform);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Elf::setTarget(uint16_t machine, ElfPlatform platform) {
|
||||
Elf64_Half mach;
|
||||
if (platform == CPU_PLATFORM)
|
||||
mach = machine + CPU_BASE;
|
||||
else if (platform == CAL_PLATFORM)
|
||||
mach = machine + CAL_BASE;
|
||||
else
|
||||
mach = machine;
|
||||
|
||||
bool Elf::setTarget(uint16_t machine) {
|
||||
Elf64_Half mach = machine + CAL_BASE;
|
||||
_elfio.set_machine(mach);
|
||||
LogElfInfo("succeeded: machine=0x%04x(%d), platform=%d", machine, machine, platform);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -36,12 +36,6 @@ using amd::ELFIO::Elf64_Shdr;
|
||||
// These two definitions need to stay in sync with
|
||||
// the definitions elfdefinitions.h until they get
|
||||
// properly upstreamed to gcc/libelf.
|
||||
#ifndef EM_HSAIL
|
||||
#define EM_HSAIL 0xAF5A
|
||||
#endif
|
||||
#ifndef EM_HSAIL_64
|
||||
#define EM_HSAIL_64 0xAF5B
|
||||
#endif
|
||||
#ifndef EM_AMDIL
|
||||
#define EM_AMDIL 0x4154
|
||||
#endif
|
||||
@@ -57,9 +51,6 @@ using amd::ELFIO::Elf64_Shdr;
|
||||
#ifndef ELFOSABI_AMD_OPENCL
|
||||
#define ELFOSABI_AMD_OPENCL 201
|
||||
#endif
|
||||
#ifndef ELFOSABI_HSAIL
|
||||
#define ELFOSABI_HSAIL 202
|
||||
#endif
|
||||
#ifndef ELFOSABI_AMDIL
|
||||
#define ELFOSABI_AMDIL 203
|
||||
#endif
|
||||
@@ -89,14 +80,6 @@ class Elf {
|
||||
OCL_TARGETS_LAST,
|
||||
} ElfTargets;
|
||||
|
||||
typedef enum {
|
||||
CAL_PLATFORM = 0,
|
||||
CPU_PLATFORM = 1,
|
||||
COMPLIB_PLATFORM = 2,
|
||||
LC_PLATFORM = 3,
|
||||
LAST_PLATFORM = 4
|
||||
} ElfPlatform;
|
||||
|
||||
typedef enum {
|
||||
LLVMIR = 0,
|
||||
SOURCE,
|
||||
@@ -301,9 +284,8 @@ class Elf {
|
||||
bool getNote(const char* noteName, char** noteDesc, size_t* descSize);
|
||||
|
||||
|
||||
/* Get/set machine and platform (target) for which elf is built */
|
||||
bool getTarget(uint16_t& machine, ElfPlatform& platform) const;
|
||||
bool setTarget(uint16_t machine, ElfPlatform platform);
|
||||
/* Set machine and platform (target) for which elf is built */
|
||||
bool setTarget(uint16_t machine);
|
||||
|
||||
/* Get/set elf type field from header */
|
||||
bool getType(uint16_t& type) const;
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
#-------------------------------------elf_test--------------------------------------#
|
||||
# This is unit test for amd::Elf.
|
||||
# The test is on top of rocclr, so rocclr must be built and installed firstly.
|
||||
# This file is seperate from cmake file of rocclr to prevent interference.
|
||||
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
cmake/amd_comgr
|
||||
lib/cmake/amd_comgr)
|
||||
|
||||
find_package(hsa-runtime64 REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
cmake/hsa-runtime64)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# Look for ROCclr which contains elfio
|
||||
find_package(ROCclr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm
|
||||
/opt/rocm/rocclr)
|
||||
|
||||
add_executable(elf_test main.cpp)
|
||||
set_target_properties(
|
||||
elf_test PROPERTIES
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(elf_test
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
add_definitions(-DCOMGR_DYN_DLL -DDEBUG)
|
||||
|
||||
target_link_libraries(elf_test PRIVATE amdrocclr_static)
|
||||
|
||||
#-------------------------------------elf_test--------------------------------------#
|
||||
@@ -1,21 +0,0 @@
|
||||
1. To build release version
|
||||
In test folder,
|
||||
mkdir release (if release doesn't exist)
|
||||
cd release
|
||||
cmake ..
|
||||
make
|
||||
|
||||
|
||||
2. To build debug version
|
||||
In test folder,
|
||||
mkdir debug (if debug doesn't exist)
|
||||
cd debug
|
||||
cmake -DCMAKE_BUILD_TYPE=Debug ..
|
||||
make
|
||||
|
||||
3. Run test
|
||||
rm -f *.bin
|
||||
./elf_test
|
||||
|
||||
To get debug log,
|
||||
AMD_LOG_LEVEL=5 ./elf_test
|
||||
@@ -1,360 +0,0 @@
|
||||
/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <elf/elf.hpp>
|
||||
#include <string>
|
||||
#include <utils/flags.hpp>
|
||||
#include <utils/debug.hpp>
|
||||
|
||||
using namespace amd::ELFIO;
|
||||
|
||||
static constexpr uint32_t target_ = 11;
|
||||
static constexpr char comment_[] = "comment text";
|
||||
static constexpr size_t commentSize_ = strlen(comment_) + 1;
|
||||
|
||||
// Elf::RODATA, ".rodata", 1, SHT_PROGBITS, SHF_ALLOC,
|
||||
static const amd::Elf::SymbolInfo rodataSymbolInfos_[] = {
|
||||
{".rodata", nullptr, 0, "data__fmetadata", "fmetatdata", strlen("fmetatdata") + 1},
|
||||
{".rodata", nullptr, 0, "data__amdil", "amdildata", strlen("amdildata") + 1},
|
||||
{".rodata", nullptr, 0, "data__metadata", "metadata", strlen("metadata") + 1},
|
||||
{".rodata", nullptr, 0, "data__header", "header", strlen("header") + 1},
|
||||
{".rodata", nullptr, 0, "data__global", "global", strlen("global") + 1},
|
||||
{".rodata", nullptr, 0, "data__randome0", "xu\0e\0\0l", sizeof("xu\0e\0\0l")}, // binary
|
||||
{".rodata", nullptr, 0, "data__randome1", "\0j\0\0w\0", sizeof("\0j\0\0w\0")}, // binary
|
||||
};
|
||||
|
||||
static constexpr size_t rodataSymbolInfosSize_ =
|
||||
sizeof(rodataSymbolInfos_) / sizeof(rodataSymbolInfos_[0]);
|
||||
|
||||
// Elf::COMMENT, ".comment", 1, SHT_PROGBITS, 0,
|
||||
static const amd::Elf::SymbolInfo commentSymbolInfos_[] = {
|
||||
{".comment", nullptr, 0, "compile", "-g -I/opt/include", strlen("-g -I/opt/include") + 1},
|
||||
{".comment", nullptr, 0, "link", "-g -l/opt/rocm/lib", strlen("-g -l/opt/rocm/lib") + 1},
|
||||
};
|
||||
static constexpr size_t commentSymbolInfosSize_ =
|
||||
sizeof(commentSymbolInfos_) / sizeof(commentSymbolInfos_[0]);
|
||||
|
||||
struct NoteInfo {
|
||||
const char* noteName;
|
||||
const char* noteDesc;
|
||||
size_t descSize;
|
||||
};
|
||||
|
||||
static constexpr NoteInfo noteInfos_[] = {
|
||||
{"notename0", "sjfasdfe2Afs", strlen("sjfasdfe2Afs") + 1},
|
||||
{"notename1", "AsdmvdfFfkd", strlen("AsdmvdfFfkd") + 1},
|
||||
{"notename2", "d\0kelH\0D", sizeof("d\0kelH\0D")}, // binary
|
||||
{"notename3", "\0F\0kA\0", sizeof("\0F\0kA\0")}, // binary
|
||||
};
|
||||
|
||||
static const size_t noteInfosSize_ = sizeof(noteInfos_) / sizeof(noteInfos_[0]);
|
||||
|
||||
bool set(amd::Elf* elf) {
|
||||
if (!elf->setTarget(target_, amd::Elf::CPU_PLATFORM)) {
|
||||
LogError("elf->setTarget() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!elf->setType(ET_EXEC)) {
|
||||
LogError("elf->elf() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!elf->addSection(amd::Elf::COMMENT, comment_, commentSize_)) {
|
||||
LogError("elf->addSection() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
LogInfo("writing rodataSymbolInfo");
|
||||
|
||||
for (i = 0; i < rodataSymbolInfosSize_; i++) {
|
||||
auto& info = rodataSymbolInfos_[i];
|
||||
if (!elf->addSymbol(amd::Elf::RODATA, info.sym_name.c_str(), info.address, info.size)) {
|
||||
LogPrintfError("elf->addSymbol(RODATA) failed at index %zu", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LogInfo("Succeeded");
|
||||
LogInfo("writing commentSymbolInfo");
|
||||
|
||||
for (i = 0; i < commentSymbolInfosSize_; i++) {
|
||||
auto& info = commentSymbolInfos_[i];
|
||||
if (!elf->addSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), info.address, info.size)) {
|
||||
LogPrintfError("elf->addSymbol(COMMENT) failed at index %zu", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LogInfo("Succeeded");
|
||||
LogInfo("writing noteInfos");
|
||||
|
||||
for (i = 0; i < noteInfosSize_; i++) {
|
||||
auto& info = noteInfos_[i];
|
||||
if (!elf->addNote(info.noteName, info.noteDesc, info.descSize)) {
|
||||
LogPrintfError("elf->addNote() failed at index %zu", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LogInfo("Succeeded");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verify(amd::Elf* elf) {
|
||||
uint16_t machine = amd::Elf::OCL_TARGETS_LAST;
|
||||
amd::Elf::ElfPlatform platform = amd::Elf::LAST_PLATFORM;
|
||||
if (!elf->getTarget(machine, platform)) {
|
||||
LogError("elf->getTarget() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LogPrintfInfo("getTarget(machine=%u, platform=%d)", machine, platform);
|
||||
|
||||
if (machine != target_) {
|
||||
LogPrintfError("machine(%u) != target_(%d)", machine, target_);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (platform != amd::Elf::CPU_PLATFORM) {
|
||||
LogPrintfError("platform(%d) != CAL_PLATFORM(%d)", platform, amd::Elf::CPU_PLATFORM);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint16_t type = ET_NONE;
|
||||
|
||||
if (!elf->getType(type)) {
|
||||
LogError("elf->elf() failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LogPrintfInfo("getType(%u)", type);
|
||||
|
||||
if (type != ET_EXEC) {
|
||||
LogError("type != ET_EXEC");
|
||||
return false;
|
||||
}
|
||||
|
||||
char* buffer = nullptr;
|
||||
size_t size = 0;
|
||||
|
||||
if (!elf->getSection(amd::Elf::COMMENT, &buffer, &size)) {
|
||||
LogError("elf->getSection(COMMENT) failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LogPrintfInfo("getSection(COMMENT, buffer=%s, size=%zu)", buffer, size);
|
||||
|
||||
if (size < commentSize_ || memcmp(comment_, buffer, commentSize_) != 0) {
|
||||
LogPrintfError("Not matched section: size = %zu, buffer = %s, expected: %zu, %s", size, buffer,
|
||||
commentSize_, comment_);
|
||||
return false;
|
||||
}
|
||||
|
||||
LogInfo("Reading rodataSymbolInfo");
|
||||
|
||||
size_t i = 0;
|
||||
buffer = nullptr;
|
||||
size = 0;
|
||||
for (i = 0; i < rodataSymbolInfosSize_; i++) {
|
||||
auto& info = rodataSymbolInfos_[i];
|
||||
if (!elf->getSymbol(amd::Elf::RODATA, info.sym_name.c_str(), &buffer, &size)) {
|
||||
LogPrintfError("elf->getSymbol(RODATA, %s) failed at index %zu", info.sym_name.c_str(), i);
|
||||
return false;
|
||||
}
|
||||
LogPrintfInfo("getSymbol(amd::Elf::RODATA, sym_name=%s, buffer=%s, size=%zu)",
|
||||
info.sym_name.c_str(), buffer, size); // Will possibly print part of buffer
|
||||
|
||||
if (size != info.size || memcmp(buffer, info.address, info.size)) {
|
||||
LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s",
|
||||
info.sym_name.c_str(), size, buffer, info.size, info.address);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LogInfo("Succeeded");
|
||||
LogInfo("reading commentSymbolInfo");
|
||||
|
||||
buffer = nullptr;
|
||||
size = 0;
|
||||
for (i = 0; i < commentSymbolInfosSize_; i++) {
|
||||
auto& info = commentSymbolInfos_[i];
|
||||
if (!elf->getSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), &buffer, &size)) {
|
||||
LogPrintfError("elf->getSymbol(COMMENT, %s) failed at index %zu", info.sym_name.c_str(), i);
|
||||
return false;
|
||||
}
|
||||
LogPrintfInfo("getSymbol(COMMENT, sym_name=%s, buffer=%s, size=%zu)", info.sym_name.c_str(),
|
||||
buffer, size); // Will possibly print part of buffer
|
||||
if (size != info.size || memcmp(buffer, info.address, info.size)) {
|
||||
LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s",
|
||||
info.sym_name.c_str(), size, buffer, info.size, info.address);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Test another way
|
||||
auto symbolNum = elf->getSymbolNum();
|
||||
if (symbolNum != (rodataSymbolInfosSize_ + commentSymbolInfosSize_)) {
|
||||
LogPrintfError(
|
||||
"Not matched: symbolNum(%u) != rodataSymbolInfosSize_(%u) +"
|
||||
" commentSymbolInfosSize_(%u)",
|
||||
symbolNum, rodataSymbolInfosSize_, commentSymbolInfosSize_);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < rodataSymbolInfosSize_; i++) {
|
||||
auto& info = rodataSymbolInfos_[i];
|
||||
amd::Elf::SymbolInfo symInfo;
|
||||
|
||||
if (!elf->getSymbolInfo(i, &symInfo)) {
|
||||
LogPrintfError("getSymbolInfo(%zu) failed", i);
|
||||
return false;
|
||||
}
|
||||
LogPrintfInfo(
|
||||
"getSymbolInfo(%zu): amd::Elf::RODATA: sec_name=%s, sym_name=%s, "
|
||||
"address=%s, size=%lu, sec_addr=%s, sec_size=%lu)",
|
||||
i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(),
|
||||
symInfo.address, // Will possibly print part of buffer
|
||||
symInfo.size, symInfo.sec_addr, symInfo.sec_size);
|
||||
if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name &&
|
||||
symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) {
|
||||
continue;
|
||||
}
|
||||
LogPrintfError("getSymbolInfo(%zu) returned not matched", i);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (; i < symbolNum; i++) {
|
||||
auto& info = commentSymbolInfos_[i - rodataSymbolInfosSize_];
|
||||
amd::Elf::SymbolInfo symInfo;
|
||||
|
||||
if (!elf->getSymbolInfo(i, &symInfo)) {
|
||||
LogPrintfError("getSymbolInfo(%zu) failed", i);
|
||||
return false;
|
||||
}
|
||||
LogPrintfInfo(
|
||||
"getSymbolInfo(%zu): amd::Elf::COMMENT: sec_name=%s, sym_name=%s, "
|
||||
"address=%s, size=%lu, sec_addr=%s, sec_size=%lu)",
|
||||
i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(),
|
||||
symInfo.address, // Will possibly print part of buffer
|
||||
symInfo.size, symInfo.sec_addr, symInfo.sec_size);
|
||||
if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name &&
|
||||
symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) {
|
||||
continue;
|
||||
}
|
||||
LogPrintfError("getSymbolInfo(%zu) returned not matched", i);
|
||||
return false;
|
||||
}
|
||||
|
||||
LogInfo("Succeeded");
|
||||
LogError("Reading noteInfos");
|
||||
|
||||
buffer = nullptr;
|
||||
size = 0;
|
||||
for (i = 0; i < noteInfosSize_; i++) {
|
||||
auto& info = noteInfos_[i];
|
||||
if (!elf->getNote(info.noteName, &buffer, &size)) {
|
||||
LogPrintfError("elf->getNote(%s) failed at index %zu", info.noteName, i);
|
||||
return false;
|
||||
}
|
||||
// Will possibly print part of buffer
|
||||
LogPrintfInfo("getNote(noteName=%s, buffer=%s, size=%zu)", info.noteName, buffer, size);
|
||||
if (size != info.descSize || memcmp(buffer, info.noteDesc, info.descSize)) {
|
||||
LogPrintfError("Not matched note(%s): size = %zu, buff = %s, expected: %zu, %s",
|
||||
info.noteName, size, buffer, info.descSize, info.noteDesc);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
LogPrintfInfo("%s: Succeeded", __func__);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test(unsigned char eclass = ELFCLASS64, const char* outFile = nullptr) {
|
||||
amd::Elf* writer = new amd::Elf(eclass, nullptr, 0, outFile, amd::Elf::ELF_C_WRITE);
|
||||
amd::Elf* reader = nullptr;
|
||||
bool ret = false;
|
||||
do {
|
||||
if ((writer == nullptr) || !writer->isSuccessful()) {
|
||||
LogError("Creating writter ELF object failed");
|
||||
break;
|
||||
}
|
||||
|
||||
// Writing
|
||||
if (!set(writer)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Verifying
|
||||
if (!verify(writer)) {
|
||||
break;
|
||||
}
|
||||
|
||||
char* buff = nullptr;
|
||||
unsigned long len = 0;
|
||||
if (writer->dumpImage(&buff, &len)) {
|
||||
LogPrintfInfo("dumpImage succeed: buff=%p, len=%u)", buff, len);
|
||||
|
||||
reader = new amd::Elf(eclass, buff, len, nullptr, amd::Elf::ELF_C_READ);
|
||||
|
||||
delete[] buff;
|
||||
|
||||
if ((reader == nullptr) || !reader->isSuccessful()) {
|
||||
LogError("Creating reader ELF object failed");
|
||||
break;
|
||||
}
|
||||
|
||||
ret = verify(reader);
|
||||
|
||||
delete reader;
|
||||
}
|
||||
} while (false);
|
||||
|
||||
if (writer) {
|
||||
delete writer;
|
||||
}
|
||||
if (reader) {
|
||||
delete reader;
|
||||
}
|
||||
LogPrintfError("%s(%s, %s): %s", __func__, eclass == ELFCLASS64 ? "ELFCLASS64" : "ELFCLASS32",
|
||||
outFile ? outFile : "nullptr", ret ? "Succeeded" : "Failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main() {
|
||||
bool ret = false;
|
||||
amd::Flag::init();
|
||||
unsigned char eclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
const char* outFile = eclass == ELFCLASS32 ? "elf32.bin" : "elf64.bin";
|
||||
|
||||
ret = test(eclass, outFile);
|
||||
printf("%s: test(%s, %s) %s!\n", __func__, eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64",
|
||||
outFile, ret ? "Succeeded" : "Failed");
|
||||
|
||||
if (ret) {
|
||||
ret = test(eclass, nullptr);
|
||||
printf("%s: test(%s, nullptr) %s!\n", __func__,
|
||||
eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64", ret ? "Succeeded" : "Failed");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -63,9 +63,8 @@ class KernelSignature : public HeapObject {
|
||||
|
||||
public:
|
||||
enum {
|
||||
ABIVersion_0 = 0, //! ABI constructed based on the OCL semantics
|
||||
ABIVersion_1 = 1, //! ABI constructed based on the HW ABI returned from HSAIL
|
||||
ABIVersion_2 = 2 //! ABI constructed based on the HW ABI returned from LC
|
||||
ABIVersion_OCL = 0, //! ABI constructed based on the OCL semantics
|
||||
ABIVersion_LC = 1 //! ABI constructed based on the HW ABI returned from LC
|
||||
};
|
||||
|
||||
//! Default constructor
|
||||
@@ -75,7 +74,7 @@ class KernelSignature : public HeapObject {
|
||||
numMemories_(0),
|
||||
numSamplers_(0),
|
||||
numQueues_(0),
|
||||
version_(ABIVersion_0) {}
|
||||
version_(ABIVersion_OCL) {}
|
||||
|
||||
//! Construct a new signature.
|
||||
KernelSignature(const std::vector<KernelParameterDescriptor>& params, const std::string& attrib,
|
||||
|
||||
Ссылка в новой задаче
Block a user