Этот коммит содержится в:
Pengda Xie
2025-11-08 02:22:03 -08:00
коммит произвёл GitHub
родитель 5dd15e22ca
Коммит 93947241d0
26 изменённых файлов: 182 добавлений и 801 удалений
+1 -1
Просмотреть файл
@@ -38,7 +38,7 @@ Run these commands:
```bash
cd "$CLR_DIR"
mkdir -p build; cd build
cmake -DUSE_COMGR_LIBRARY=ON -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON ..
cmake -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCLR_BUILD_HIP=OFF -DCLR_BUILD_OCL=ON ..
make -j$(nproc)
```
-68
Просмотреть файл
@@ -1,68 +0,0 @@
/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef LIBRARY_H_
#define LIBRARY_H_
#include <vector>
#include <string>
namespace amd {
typedef enum _library_selector {
LibraryUndefined = 0,
GPU_Library_7xx,
GPU_Library_Evergreen,
GPU_Library_SI,
CPU_Library_Generic,
CPU_Library_AVX,
CPU_Library_FMA4,
GPU_Library_Generic,
CPU64_Library_Generic,
CPU64_Library_AVX,
CPU64_Library_FMA4,
GPU64_Library_Evergreen,
GPU64_Library_SI,
GPU64_Library_Generic,
GPU_Library_CI,
GPU64_Library_CI,
GPU_Library_HSAIL,
LibraryTotal
} LibrarySelector;
/** Integrated Bitcode Libararies **/
class LibraryDescriptor {
public:
enum { MAX_NUM_LIBRARY_DESCS = 11 };
const char* start;
size_t size;
};
int getLibDescs(LibrarySelector LibType, // input
LibraryDescriptor* LibDesc, // output
int& LibDescSize // output -- LibDesc[0:LibDescSize-1]
);
static constexpr const char* amdRTFuns[] = {"__amdrt_div_i64", "__amdrt_div_u64",
"__amdrt_mod_i64", "__amdrt_mod_u64",
"__amdrt_cvt_f64_to_u64", "__amdrt_cvt_f32_to_u64"};
} // namespace amd
#endif // LIBRARY_H_
+1 -2
Просмотреть файл
@@ -1233,8 +1233,7 @@ Options::Options()
dumpFileRoot(),
currKernelName(NULL),
encryptCode(0),
MemoryHandles(),
libraryType_(amd::LibraryUndefined) {
MemoryHandles() {
oVariables = new OptionVariables();
::memset(flags, 0, sizeof(flags));
-2
Просмотреть файл
@@ -25,7 +25,6 @@
#include <vector>
#include <cstdio>
#include "top.hpp"
#include "library.hpp"
#include <cassert>
#include <sstream>
#ifdef __linux__
@@ -316,7 +315,6 @@ class Options {
void setDumpFileName(const char* val);
public:
LibrarySelector libraryType_;
std::string sourceFileName_;
};
+128 -159
Просмотреть файл
@@ -107,159 +107,159 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
// the ANY value. If the target feature is disabled then use a '-' suffix,
// and if enabled use a '+' suffix.
//
// If the HSAIL or AMD IL compilers do not support the target, then use
// If the AMD IL compilers do not support the target, then use
// nullptr for the ID.
//
// -------------- Compiler ---------- - Runtime - ---- IP ---- -- Target -- ----------
// Target Properties ----------
// Supported Version Features
// SIMD/
// SIMD
// Instr
// Bank LDS
// Mem
// Target ID HSAIL ID ROC PAL Maj/Min/Stp SRAMECC XNACK CU Width
// Width Width Size Banks LDSAlignment
{"gfx801", nullptr, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx801:xnack-", nullptr, true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx801:xnack+", "gfx801", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx802", "gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx803", "gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx805", nullptr, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810", nullptr, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810:xnack-", nullptr, true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810:xnack+", "gfx810", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900", "gfx901", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900:xnack-", "gfx900", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900:xnack+", "gfx901", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902", "gfx903", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902:xnack-", "gfx902", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902:xnack+", "gfx903", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904", "gfx905", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904:xnack-", "gfx904", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904:xnack+", "gfx905", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906", "gfx907", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc-", "gfx907", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc+", nullptr, true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:xnack-", "gfx906", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:xnack+", "gfx907", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc-:xnack-", "gfx906", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
// -- Compiler --|-- Runtime --|-- IP --|-- Target --|-- Target Properties --
// | Supported | Version| Features |
// --------------|-------------|--------|------------|-----------------------
// Target ID | ROC PAL | Major | SRAMECC | SIMD/CU
// | | Minor | XNACK | SIMD Width
// | | Step | | Instr Width
// | | | | Bank Width
// | | | | LDS Size
// | | | | Mem Banks
// | | | | LDS Alignment
{"gfx801", true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx801:xnack-", true, false, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx801:xnack+", true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx802", true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx803", true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx805", true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810", true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810:xnack-", true, false, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx810:xnack+", true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx900:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902", true, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902:xnack-", true, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx902:xnack+", true, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904", true, true, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904:xnack-", true, true, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx904:xnack+", true, true, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906", true, true, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc-", true, true, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc+", true, true, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:xnack-", true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:xnack+", true, true, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc-:xnack-", true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx906:sramecc-:xnack+", "gfx907", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki,
{"gfx906:sramecc-:xnack+", true, true, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx906:sramecc+:xnack-", nullptr, true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc+:xnack+", nullptr, true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908", nullptr, true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc-", nullptr, true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc+", nullptr, true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:xnack-", nullptr, true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:xnack+", nullptr, true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc-:xnack-", nullptr, true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx906:sramecc+:xnack-", true, true, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx906:sramecc+:xnack+", true, true, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908", true, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc-", true, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc+", true, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:xnack-", true, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:xnack+", true, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx908:sramecc-:xnack-", true, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx908:sramecc-:xnack+", nullptr, true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki,
{"gfx908:sramecc-:xnack+", true, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx908:sramecc+:xnack-", nullptr, true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx908:sramecc+:xnack-", true, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx908:sramecc+:xnack+", nullptr, true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909", nullptr, false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909:xnack-", nullptr, false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909:xnack+", nullptr, false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a", nullptr, true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc-", nullptr, true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc+", nullptr, true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:xnack-", nullptr, true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:xnack+", nullptr, true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc-:xnack-", nullptr, true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx908:sramecc+:xnack+", true, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909", false, true, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909:xnack-", false, true, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx909:xnack+", false, true, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a", true, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc-", true, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc+", true, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:xnack-", true, false, 9, 0, 10, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:xnack+", true, false, 9, 0, 10, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90a:sramecc-:xnack-", true, false, 9, 0, 10, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx90a:sramecc-:xnack+", nullptr, true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki,
{"gfx90a:sramecc-:xnack+", true, false, 9, 0, 10, OFF, ON, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx90a:sramecc+:xnack-", nullptr, true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx90a:sramecc+:xnack-", true, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx90a:sramecc+:xnack+", nullptr, true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki,
{"gfx90a:sramecc+:xnack+", true, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx942", nullptr, true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc-", nullptr, true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc+", nullptr, true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:xnack-", nullptr, true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:xnack+", nullptr, true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc-:xnack-", nullptr, true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx942", true, false, 9, 4, 2, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc-", true, false, 9, 4, 2, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc+", true, false, 9, 4, 2, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:xnack-", true, false, 9, 4, 2, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:xnack+", true, false, 9, 4, 2, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx942:sramecc-:xnack-", true, false, 9, 4, 2, OFF, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx942:sramecc-:xnack+", nullptr, true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki,
{"gfx942:sramecc-:xnack+", true, false, 9, 4, 2, OFF, ON, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx942:sramecc+:xnack-", nullptr, true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki,
{"gfx942:sramecc+:xnack-", true, false, 9, 4, 2, ON, OFF, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx942:sramecc+:xnack+", nullptr, true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx950", nullptr, true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc-", nullptr, true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc+", nullptr, true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:xnack-", nullptr, true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:xnack+", nullptr, true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc-:xnack-", nullptr, true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki,
{"gfx942:sramecc+:xnack+", true, false, 9, 4, 2, ON, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c", true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c:xnack-", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx90c:xnack+", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx950", true, false, 9, 5, 0, ANY, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc-", true, false, 9, 5, 0, OFF, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc+", true, false, 9, 5, 0, ON, ANY, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:xnack-", true, false, 9, 5, 0, ANY, OFF, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:xnack+", true, false, 9, 5, 0, ANY, ON, 4, 16, 1, 256, 160 * Ki, 64, 1280},
{"gfx950:sramecc-:xnack-", true, false, 9, 5, 0, OFF, OFF, 4, 16, 1, 256, 160 * Ki,
64, 1280},
{"gfx950:sramecc-:xnack+", nullptr, true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki,
{"gfx950:sramecc-:xnack+", true, false, 9, 5, 0, OFF, ON, 4, 16, 1, 256, 160 * Ki,
64, 1280},
{"gfx950:sramecc+:xnack-", nullptr, true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki,
{"gfx950:sramecc+:xnack-", true, false, 9, 5, 0, ON, OFF, 4, 16, 1, 256, 160 * Ki,
64, 1280},
{"gfx950:sramecc+:xnack+", nullptr, true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki,
{"gfx950:sramecc+:xnack+", true, false, 9, 5, 0, ON, ON, 4, 16, 1, 256, 160 * Ki,
64, 1280},
{"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-generic:xnack-", nullptr, true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-generic:xnack+", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic", nullptr, true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc-", nullptr, true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki,
{"gfx9-generic", true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-generic:xnack-", true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-generic:xnack+", true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic", true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc-", true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx9-4-generic:sramecc+", nullptr, true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki,
{"gfx9-4-generic:sramecc+", true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki,
32, 512},
{"gfx9-4-generic:xnack-", nullptr, true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:xnack+", nullptr, true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc-:xnack-", nullptr, true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256,
{"gfx9-4-generic:xnack-", true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:xnack+", true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc-:xnack-", true, true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256,
64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc-:xnack+", nullptr, true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256,
{"gfx9-4-generic:sramecc-:xnack+", true, true, 9, 4, 0, OFF, ON, 4, 16, 1, 256,
64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc+:xnack-", nullptr, true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256,
{"gfx9-4-generic:sramecc+:xnack-", true, true, 9, 4, 0, ON, OFF, 4, 16, 1, 256,
64 * Ki, 32, 512},
{"gfx9-4-generic:sramecc+:xnack+", nullptr, true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256,
{"gfx9-4-generic:sramecc+:xnack+", true, true, 9, 4, 0, ON, ON, 4, 16, 1, 256,
64 * Ki, 32, 512},
{"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011", "gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011:xnack-", "gfx1011", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011:xnack+", nullptr, true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012", "gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012:xnack-", "gfx1012", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012:xnack+", nullptr, true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-1-generic:xnack-", nullptr, true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki,
{"gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1010:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1010:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011", true, true, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011:xnack-", true, true, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1011:xnack+", true, true, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012", true, true, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012:xnack-", true, true, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1012:xnack+", true, true, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013:xnack-", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1013:xnack+", true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-1-generic", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-1-generic:xnack-", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki,
32, 1024},
{"gfx10-1-generic:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki,
{"gfx10-1-generic:xnack+", true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki,
32, 1024},
{"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1033", "gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1103", "gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1153", "gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1033", true, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx10-3-generic", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1103", true, true, 11, 0, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1153", true, true, 11, 5, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx11-generic", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
{"gfx12-generic", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32, 1024},
};
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
}
@@ -1240,7 +1240,7 @@ bool ClBinary::setElfTarget() {
static const uint32_t Target = 21;
assert(((0xFFFF8000 & Target) == 0) && "ASIC target ID >= 2^15");
uint16_t elf_target = static_cast<uint16_t>(0x7FFF & Target);
return elfOut()->setTarget(elf_target, amd::Elf::CAL_PLATFORM);
return elfOut()->setTarget(elf_target);
}
void ClBinary::init(amd::option::Options* optionsObj) {
@@ -1272,37 +1272,6 @@ void ClBinary::init(amd::option::Options* optionsObj) {
}
}
bool ClBinary::isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform) {
/* It is recompilable if there is llvmir that was generated for
the same platform (CPU or GPU) and with the same bitness.
Note: the bitness has been checked in initClBinary(), no need
to check it here.
*/
if (llvmBinary.empty()) {
DevLogError("LLVM Binary string is empty \n");
return false;
}
uint16_t elf_target;
amd::Elf::ElfPlatform platform;
if (elfIn()->getTarget(elf_target, platform)) {
if (platform == thePlatform) {
return true;
}
if ((platform == amd::Elf::COMPLIB_PLATFORM) &&
(((thePlatform == amd::Elf::CAL_PLATFORM) &&
((elf_target == (uint16_t)EM_HSAIL) || (elf_target == (uint16_t)EM_HSAIL_64))) ||
((thePlatform == amd::Elf::CPU_PLATFORM) &&
((elf_target == (uint16_t)EM_386) || (elf_target == (uint16_t)EM_X86_64))))) {
return true;
}
}
DevLogPrintfError("LLVM_Binary: %s is not recompilable \n", llvmBinary.c_str());
return false;
}
void ClBinary::release() {
if (isBinaryAllocated() && (binary_ != nullptr)) {
delete[] binary_;
+4 -16
Просмотреть файл
@@ -1083,9 +1083,6 @@ class ClBinary : public amd::HeapObject {
amd::Elf::ElfSections& elfSectionType //!< LLVMIR binary is in SPIR format
) const;
//! Check if the binary is recompilable
bool isRecompilable(std::string& llvmBinary, amd::Elf::ElfPlatform thePlatform);
void saveOrigBinary(const char* origBinary, size_t origSize) {
origBinary_ = origBinary;
origSize_ = origSize;
@@ -1455,9 +1452,6 @@ class Isa {
/// @returns This Isa's target ID name.
const char* targetId() const { return targetId_; }
/// @returns This Isa's name to use with the HSAIL compiler.
const char* hsailName() const { return hsailId_; }
/// @returns If the ROCm runtime supports the ISA.
bool runtimeRocSupported() const {
if (!IS_HIP && (versionMajor_ == 8)) {
@@ -1534,13 +1528,12 @@ class Isa {
static const Isa* end();
private:
constexpr Isa(const char* targetId, const char* hsailId, bool runtimeRocSupported,
bool runtimePalSupported, uint32_t versionMajor, uint32_t versionMinor,
uint32_t versionStepping, Feature sramecc, Feature xnack, uint32_t simdPerCU,
uint32_t simdWidth, uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
constexpr Isa(const char* targetId, bool runtimeRocSupported, bool runtimePalSupported,
uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth,
uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
uint32_t localMemSizePerCU, uint32_t localMemBanks, uint32_t ldsAlignment)
: targetId_(targetId),
hsailId_(hsailId),
runtimeRocSupported_(runtimeRocSupported),
runtimePalSupported_(runtimePalSupported),
versionMajor_(versionMajor),
@@ -1563,11 +1556,6 @@ class Isa {
// compilations.
const char* targetId_;
// @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL
// compilation using the Shader Compiler Finalizer. Empty string if
// unsupported.
const char* hsailId_;
bool runtimeRocSupported_; //!< ROCm runtime is supported.
bool runtimePalSupported_; //!< PAL runtime is supported.
uint32_t versionMajor_; //!< Isa's major version.
+1 -1
Просмотреть файл
@@ -997,7 +997,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
uint32_t numParams = params.size();
// Append the hidden arguments to the OCL arguments
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
createSignature(params, numParams, amd::KernelSignature::ABIVersion_LC);
}
// ================================================================================================
-1
Просмотреть файл
@@ -1702,7 +1702,6 @@ Program::file_type_t Program::getNextCompilationStageFromBinary(amd::option::Opt
bool recompile = false;
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
switch (continueCompileFrom) {
case FILE_TYPE_HSAIL_BINARY:
case FILE_TYPE_CG:
case FILE_TYPE_ISA: {
// Compare options loaded from binary with current ones, recompile if differ;
+10 -12
Просмотреть файл
@@ -87,18 +87,16 @@ class Program : public amd::HeapObject {
FILE_TYPE_SPIR_BINARY = 5,
FILE_TYPE_AMDIL_TEXT = 6,
FILE_TYPE_AMDIL_BINARY = 7,
FILE_TYPE_HSAIL_TEXT = 8,
FILE_TYPE_HSAIL_BINARY = 9,
FILE_TYPE_X86_TEXT = 10,
FILE_TYPE_X86_BINARY = 11,
FILE_TYPE_CG = 12,
FILE_TYPE_SOURCE = 13,
FILE_TYPE_ISA = 14,
FILE_TYPE_HEADER = 15,
FILE_TYPE_RSLLVMIR_BINARY = 16,
FILE_TYPE_SPIRV_BINARY = 17,
FILE_TYPE_ASM_TEXT = 18,
FILE_TYPE_LAST = 19
FILE_TYPE_X86_TEXT = 8,
FILE_TYPE_X86_BINARY = 9,
FILE_TYPE_CG = 10,
FILE_TYPE_SOURCE = 11,
FILE_TYPE_ISA = 12,
FILE_TYPE_HEADER = 13,
FILE_TYPE_RSLLVMIR_BINARY = 14,
FILE_TYPE_SPIRV_BINARY = 15,
FILE_TYPE_ASM_TEXT = 16,
FILE_TYPE_LAST = 17
} file_type_t;
private:
+1 -1
Просмотреть файл
@@ -122,7 +122,7 @@ static std::tuple<const amd::Isa*, const char*> findIsa(uint32_t gfxipMajor, uin
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
return std::make_tuple(
isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->hsailName() : palDeviceIter->palName_);
isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->targetId() : palDeviceIter->palName_);
}
static std::tuple<Pal::GfxIpLevel, Pal::AsicRevision, const char*> findPal(uint32_t gfxipMajor,
+1 -1
Просмотреть файл
@@ -193,7 +193,7 @@ Kernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
}
// The check below handles a special case of single context with multiple devices
// when the devices use different compilers(HSAIL and LC) and have different signatures
// when the devices have different signatures
const amd::KernelSignature& signature =
(this->signature().version() == kernel.signature().version()) ? kernel.signature()
: this->signature();
+4 -11
Просмотреть файл
@@ -84,9 +84,6 @@ class Kernel : public device::Kernel {
//! Returns LDS size used in this kernel
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
//! Returns pointer on CPU to AQL code info
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
//! Returns pointer on CPU to AQL kernel descriptor info
const llvm::amdhsa::kernel_descriptor_t* cpuAqlKd() const { return &akd_; }
@@ -135,14 +132,10 @@ class Kernel : public device::Kernel {
void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
const uint16_t numSGPRs, const uint16_t numVGPRs);
union {
amd_kernel_code_t akc_; //!< AQL kernel code on CPU, used by HSAIL
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
};
uint index_; //!< Kernel index in the program
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
llvm::amdhsa::kernel_descriptor_t akd_; //!< AQL kernel descriptor on CPU, used by LC
uint index_; //!< Kernel index in the program
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
};
/*@}*/ // namespace amd::pal
+1 -1
Просмотреть файл
@@ -591,7 +591,7 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
return false;
}
// Get memory pointer to the satged buffer
// Get memory pointer to the staged buffer
uint32_t* dbgBufferPtr = reinterpret_cast<uint32_t*>(xferBufRead_->map(&gpu));
if (nullptr == dbgBufferPtr) {
return false;
+12 -12
Просмотреть файл
@@ -407,7 +407,7 @@ Resource::~Resource() {
}
// ================================================================================================
static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
static uint32_t GetHSAImageFormatType(const cl_image_format& format) {
static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8,
@@ -431,7 +431,7 @@ static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
}
// ================================================================================================
static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
static uint32_t GetHSAImageOrderType(const cl_image_format& format) {
static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
HSA_EXT_IMAGE_CHANNEL_ORDER_A,
HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
@@ -569,8 +569,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
}
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
hwState_[8] = GetHSAImageFormatType(desc().format_);
hwState_[9] = GetHSAImageOrderType(desc().format_);
hwState_[10] = static_cast<uint32_t>(desc().width_);
hwState_[11] = 0; // one extra reserved field in the argument
return true;
@@ -741,8 +741,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
viewInfo.subresRange = ImgSubresRange;
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
hwState_[8] = GetHSAImageFormatType(desc().format_);
hwState_[9] = GetHSAImageOrderType(desc().format_);
hwState_[10] = static_cast<uint32_t>(desc().width_);
hwState_[11] = 0; // one extra reserved field in the argument
@@ -961,8 +961,8 @@ bool Resource::CreateInterop(CreateParams* params) {
viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
hwState_[8] = GetHSAImageFormatType(desc().format_);
hwState_[9] = GetHSAImageOrderType(desc().format_);
hwState_[10] = static_cast<uint32_t>(desc().width_);
hwState_[11] = 0; // one extra reserved field in the argument
}
@@ -983,8 +983,8 @@ bool Resource::CreateInterop(CreateParams* params) {
}
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
hwState_[8] = GetHSAImageFormatType(desc().format_);
hwState_[9] = GetHSAImageOrderType(desc().format_);
hwState_[10] = static_cast<uint32_t>(desc().width_);
hwState_[11] = 0; // one extra reserved field in the argument
} else {
@@ -1047,8 +1047,8 @@ bool Resource::CreateInterop(CreateParams* params) {
(desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
}
hwState_[8] = GetHSAILImageFormatType(desc().format_);
hwState_[9] = GetHSAILImageOrderType(desc().format_);
hwState_[8] = GetHSAImageFormatType(desc().format_);
hwState_[9] = GetHSAImageOrderType(desc().format_);
hwState_[10] = static_cast<uint32_t>(desc().width_);
hwState_[11] = 0; // one extra reserved field in the argument
}
-3
Просмотреть файл
@@ -58,8 +58,6 @@ Settings::Settings() {
hostMemDirectAccess_ = HostMemDisable;
libSelector_ = amd::LibraryUndefined;
// By default use host blit
blitEngine_ = BlitEngineHost;
pinnedXferSize_ = GPU_PINNED_XFER_SIZE * Mi;
@@ -213,7 +211,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
// L1 cache size is 16KB
cacheSize_ = 16 * Ki;
libSelector_ = amd::GPU_Library_CI;
if (LP64_SWITCH(false, true)) {
oclVersion_ =
!reportAsOCL12Device ? XCONCAT(OpenCL, XCONCAT(OPENCL_MAJOR, OPENCL_MINOR)) : OpenCL12;
+1 -4
Просмотреть файл
@@ -21,7 +21,6 @@
#pragma once
#include "top.hpp"
#include "library.hpp"
#include "palDevice.h"
/*! \addtogroup pal PAL Resource Implementation
@@ -63,7 +62,7 @@ class Settings : public device::Settings {
uint imageSupport_ : 1; //!< Report images support
uint doublePrecision_ : 1; //!< Enables double precision support
uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG path on GPU
uint imageDMA_ : 1; //!< Enable direct image DMA transfers
uint threadTraceEnable_ : 1; //!< Thread trace enable
uint svmAtomics_ : 1; //!< SVM device atomics
@@ -108,8 +107,6 @@ class Settings : public device::Settings {
uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations
uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
size_t prepinnedMinSize_; //!< minimal memory size for prepinned transfer
uint32_t limit_blit_wg_; //!< The number of workgroups for blit execution
+3 -3
Просмотреть файл
@@ -700,19 +700,19 @@ class VirtualGPU : public device::VirtualDevice {
amd::CopyMetadata copyMetadata = amd::CopyMetadata() //!< Memory copy MetaData
);
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSAIL kernel
void PrintChildren(const pal::Kernel& hsaKernel, //!< The parent HSA kernel
VirtualGPU* gpuDefQueue //!< Device queue for children execution
);
bool PreDeviceEnqueue(const amd::Kernel& kernel, //!< Parent amd kernel object
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
const pal::Kernel& hsaKernel, //!< Parent HSA kernel object
VirtualGPU** gpuDefQueue, //!< [Return] GPU default queue
uint64_t* vmDefQueue //!< [Return] VM handle to the virtual queue
);
void PostDeviceEnqueue(
const amd::Kernel& kernel, //!< Parent amd kernel object
const pal::Kernel& hsaKernel, //!< Parent HSAIL object
const pal::Kernel& hsaKernel, //!< Parent HSA kernel object
VirtualGPU* gpuDefQueue, //!< GPU default queue
uint64_t vmDefQueue, //!< VM handle to the virtual queue
uint64_t vmParentWrap, //!< VM handle to the wrapped AQL packet location
+2 -2
Просмотреть файл
@@ -148,7 +148,7 @@ class NullDevice : public amd::Device {
const Settings& settings() const { return static_cast<Settings&>(*settings_); }
//! Construct an HSAIL program object from the ELF assuming it is valid
//! Construct an device program object from the ELF assuming it is valid
device::Program* createProgram(amd::Program& owner,
amd::option::Options* options = nullptr) override;
@@ -373,7 +373,7 @@ class Device : public NullDevice {
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = nullptr);
//! Construct an HSAIL program object from the ELF assuming it is valid
//! Construct an device program object from the ELF assuming it is valid
virtual device::Program* createProgram(amd::Program& owner,
amd::option::Options* options = nullptr);
+4 -4
Просмотреть файл
@@ -53,22 +53,22 @@
#endif
/**
* HSA image object size in bytes (see HSAIL spec)
* HSA image object size in bytes (see HSA spec)
*/
#define HSA_IMAGE_OBJECT_SIZE 48
/**
* HSA image object alignment in bytes (see HSAIL spec)
* HSA image object alignment in bytes (see HSA spec)
*/
#define HSA_IMAGE_OBJECT_ALIGNMENT 16
/**
* HSA sampler object size in bytes (see HSAIL spec)
* HSA sampler object size in bytes (see HSA spec)
*/
#define HSA_SAMPLER_OBJECT_SIZE 32
/**
* HSA sampler object alignment in bytes (see HSAIL spec)
* HSA sampler object alignment in bytes (see HSA spec)
*/
#define HSA_SAMPLER_OBJECT_ALIGNMENT 16
+1 -1
Просмотреть файл
@@ -411,7 +411,7 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns memory dependency class
MemoryDependency& memoryDependency() { return memoryDependency_; }
//! Detects memory dependency for HSAIL kernels and uses appropriate AQL header
//! Detects memory dependency for HSA kernels and uses appropriate AQL header
bool processMemObjects(const amd::Kernel& kernel, //!< AMD kernel object for execution
const_address params, //!< Pointer to the param's store
size_t& ldsAddress, //!< LDS usage
+2 -29
Просмотреть файл
@@ -369,36 +369,9 @@ bool Elf::setupShdr(ElfSections id, section* section, Elf64_Word shlink) const {
return true;
}
bool Elf::getTarget(uint16_t& machine, ElfPlatform& platform) const {
Elf64_Half mach = _elfio.get_machine();
if ((mach >= CPU_FIRST) && (mach <= CPU_LAST)) {
platform = CPU_PLATFORM;
machine = mach - CPU_BASE;
} else if (mach == EM_386 || mach == EM_HSAIL || mach == EM_HSAIL_64 || mach == EM_AMDIL ||
mach == EM_AMDIL_64 || mach == EM_X86_64) {
platform = COMPLIB_PLATFORM;
machine = mach;
} else {
// Invalid machine
LogElfError("failed: Invalid machine=0x%04x(%d)", mach, mach);
return false;
}
LogElfInfo("succeeded: machine=0x%04x, platform=%d", machine, platform);
return true;
}
bool Elf::setTarget(uint16_t machine, ElfPlatform platform) {
Elf64_Half mach;
if (platform == CPU_PLATFORM)
mach = machine + CPU_BASE;
else if (platform == CAL_PLATFORM)
mach = machine + CAL_BASE;
else
mach = machine;
bool Elf::setTarget(uint16_t machine) {
Elf64_Half mach = machine + CAL_BASE;
_elfio.set_machine(mach);
LogElfInfo("succeeded: machine=0x%04x(%d), platform=%d", machine, machine, platform);
return true;
}
+2 -20
Просмотреть файл
@@ -36,12 +36,6 @@ using amd::ELFIO::Elf64_Shdr;
// These two definitions need to stay in sync with
// the definitions elfdefinitions.h until they get
// properly upstreamed to gcc/libelf.
#ifndef EM_HSAIL
#define EM_HSAIL 0xAF5A
#endif
#ifndef EM_HSAIL_64
#define EM_HSAIL_64 0xAF5B
#endif
#ifndef EM_AMDIL
#define EM_AMDIL 0x4154
#endif
@@ -57,9 +51,6 @@ using amd::ELFIO::Elf64_Shdr;
#ifndef ELFOSABI_AMD_OPENCL
#define ELFOSABI_AMD_OPENCL 201
#endif
#ifndef ELFOSABI_HSAIL
#define ELFOSABI_HSAIL 202
#endif
#ifndef ELFOSABI_AMDIL
#define ELFOSABI_AMDIL 203
#endif
@@ -89,14 +80,6 @@ class Elf {
OCL_TARGETS_LAST,
} ElfTargets;
typedef enum {
CAL_PLATFORM = 0,
CPU_PLATFORM = 1,
COMPLIB_PLATFORM = 2,
LC_PLATFORM = 3,
LAST_PLATFORM = 4
} ElfPlatform;
typedef enum {
LLVMIR = 0,
SOURCE,
@@ -301,9 +284,8 @@ class Elf {
bool getNote(const char* noteName, char** noteDesc, size_t* descSize);
/* Get/set machine and platform (target) for which elf is built */
bool getTarget(uint16_t& machine, ElfPlatform& platform) const;
bool setTarget(uint16_t machine, ElfPlatform platform);
/* Set machine and platform (target) for which elf is built */
bool setTarget(uint16_t machine);
/* Get/set elf type field from header */
bool getType(uint16_t& type) const;
-62
Просмотреть файл
@@ -1,62 +0,0 @@
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#-------------------------------------elf_test--------------------------------------#
# This is unit test for amd::Elf.
# The test is on top of rocclr, so rocclr must be built and installed firstly.
# This file is seperate from cmake file of rocclr to prevent interference.
find_package(amd_comgr REQUIRED CONFIG
PATHS
/opt/rocm/
PATH_SUFFIXES
cmake/amd_comgr
lib/cmake/amd_comgr)
find_package(hsa-runtime64 REQUIRED CONFIG
PATHS
/opt/rocm/
PATH_SUFFIXES
cmake/hsa-runtime64)
find_package(Threads REQUIRED)
# Look for ROCclr which contains elfio
find_package(ROCclr REQUIRED CONFIG
PATHS
/opt/rocm
/opt/rocm/rocclr)
add_executable(elf_test main.cpp)
set_target_properties(
elf_test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(elf_test
PRIVATE
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
add_definitions(-DCOMGR_DYN_DLL -DDEBUG)
target_link_libraries(elf_test PRIVATE amdrocclr_static)
#-------------------------------------elf_test--------------------------------------#
-21
Просмотреть файл
@@ -1,21 +0,0 @@
1. To build release version
In test folder,
mkdir release (if release doesn't exist)
cd release
cmake ..
make
2. To build debug version
In test folder,
mkdir debug (if debug doesn't exist)
cd debug
cmake -DCMAKE_BUILD_TYPE=Debug ..
make
3. Run test
rm -f *.bin
./elf_test
To get debug log,
AMD_LOG_LEVEL=5 ./elf_test
-360
Просмотреть файл
@@ -1,360 +0,0 @@
/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <elf/elf.hpp>
#include <string>
#include <utils/flags.hpp>
#include <utils/debug.hpp>
using namespace amd::ELFIO;
static constexpr uint32_t target_ = 11;
static constexpr char comment_[] = "comment text";
static constexpr size_t commentSize_ = strlen(comment_) + 1;
// Elf::RODATA, ".rodata", 1, SHT_PROGBITS, SHF_ALLOC,
static const amd::Elf::SymbolInfo rodataSymbolInfos_[] = {
{".rodata", nullptr, 0, "data__fmetadata", "fmetatdata", strlen("fmetatdata") + 1},
{".rodata", nullptr, 0, "data__amdil", "amdildata", strlen("amdildata") + 1},
{".rodata", nullptr, 0, "data__metadata", "metadata", strlen("metadata") + 1},
{".rodata", nullptr, 0, "data__header", "header", strlen("header") + 1},
{".rodata", nullptr, 0, "data__global", "global", strlen("global") + 1},
{".rodata", nullptr, 0, "data__randome0", "xu\0e\0\0l", sizeof("xu\0e\0\0l")}, // binary
{".rodata", nullptr, 0, "data__randome1", "\0j\0\0w\0", sizeof("\0j\0\0w\0")}, // binary
};
static constexpr size_t rodataSymbolInfosSize_ =
sizeof(rodataSymbolInfos_) / sizeof(rodataSymbolInfos_[0]);
// Elf::COMMENT, ".comment", 1, SHT_PROGBITS, 0,
static const amd::Elf::SymbolInfo commentSymbolInfos_[] = {
{".comment", nullptr, 0, "compile", "-g -I/opt/include", strlen("-g -I/opt/include") + 1},
{".comment", nullptr, 0, "link", "-g -l/opt/rocm/lib", strlen("-g -l/opt/rocm/lib") + 1},
};
static constexpr size_t commentSymbolInfosSize_ =
sizeof(commentSymbolInfos_) / sizeof(commentSymbolInfos_[0]);
struct NoteInfo {
const char* noteName;
const char* noteDesc;
size_t descSize;
};
static constexpr NoteInfo noteInfos_[] = {
{"notename0", "sjfasdfe2Afs", strlen("sjfasdfe2Afs") + 1},
{"notename1", "AsdmvdfFfkd", strlen("AsdmvdfFfkd") + 1},
{"notename2", "d\0kelH\0D", sizeof("d\0kelH\0D")}, // binary
{"notename3", "\0F\0kA\0", sizeof("\0F\0kA\0")}, // binary
};
static const size_t noteInfosSize_ = sizeof(noteInfos_) / sizeof(noteInfos_[0]);
bool set(amd::Elf* elf) {
if (!elf->setTarget(target_, amd::Elf::CPU_PLATFORM)) {
LogError("elf->setTarget() failed");
return false;
}
if (!elf->setType(ET_EXEC)) {
LogError("elf->elf() failed");
return false;
}
if (!elf->addSection(amd::Elf::COMMENT, comment_, commentSize_)) {
LogError("elf->addSection() failed");
return false;
}
size_t i = 0;
LogInfo("writing rodataSymbolInfo");
for (i = 0; i < rodataSymbolInfosSize_; i++) {
auto& info = rodataSymbolInfos_[i];
if (!elf->addSymbol(amd::Elf::RODATA, info.sym_name.c_str(), info.address, info.size)) {
LogPrintfError("elf->addSymbol(RODATA) failed at index %zu", i);
return false;
}
}
LogInfo("Succeeded");
LogInfo("writing commentSymbolInfo");
for (i = 0; i < commentSymbolInfosSize_; i++) {
auto& info = commentSymbolInfos_[i];
if (!elf->addSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), info.address, info.size)) {
LogPrintfError("elf->addSymbol(COMMENT) failed at index %zu", i);
return false;
}
}
LogInfo("Succeeded");
LogInfo("writing noteInfos");
for (i = 0; i < noteInfosSize_; i++) {
auto& info = noteInfos_[i];
if (!elf->addNote(info.noteName, info.noteDesc, info.descSize)) {
LogPrintfError("elf->addNote() failed at index %zu", i);
return false;
}
}
LogInfo("Succeeded");
return true;
}
bool verify(amd::Elf* elf) {
uint16_t machine = amd::Elf::OCL_TARGETS_LAST;
amd::Elf::ElfPlatform platform = amd::Elf::LAST_PLATFORM;
if (!elf->getTarget(machine, platform)) {
LogError("elf->getTarget() failed");
return false;
}
LogPrintfInfo("getTarget(machine=%u, platform=%d)", machine, platform);
if (machine != target_) {
LogPrintfError("machine(%u) != target_(%d)", machine, target_);
return false;
}
if (platform != amd::Elf::CPU_PLATFORM) {
LogPrintfError("platform(%d) != CAL_PLATFORM(%d)", platform, amd::Elf::CPU_PLATFORM);
return false;
}
uint16_t type = ET_NONE;
if (!elf->getType(type)) {
LogError("elf->elf() failed");
return false;
}
LogPrintfInfo("getType(%u)", type);
if (type != ET_EXEC) {
LogError("type != ET_EXEC");
return false;
}
char* buffer = nullptr;
size_t size = 0;
if (!elf->getSection(amd::Elf::COMMENT, &buffer, &size)) {
LogError("elf->getSection(COMMENT) failed");
return false;
}
LogPrintfInfo("getSection(COMMENT, buffer=%s, size=%zu)", buffer, size);
if (size < commentSize_ || memcmp(comment_, buffer, commentSize_) != 0) {
LogPrintfError("Not matched section: size = %zu, buffer = %s, expected: %zu, %s", size, buffer,
commentSize_, comment_);
return false;
}
LogInfo("Reading rodataSymbolInfo");
size_t i = 0;
buffer = nullptr;
size = 0;
for (i = 0; i < rodataSymbolInfosSize_; i++) {
auto& info = rodataSymbolInfos_[i];
if (!elf->getSymbol(amd::Elf::RODATA, info.sym_name.c_str(), &buffer, &size)) {
LogPrintfError("elf->getSymbol(RODATA, %s) failed at index %zu", info.sym_name.c_str(), i);
return false;
}
LogPrintfInfo("getSymbol(amd::Elf::RODATA, sym_name=%s, buffer=%s, size=%zu)",
info.sym_name.c_str(), buffer, size); // Will possibly print part of buffer
if (size != info.size || memcmp(buffer, info.address, info.size)) {
LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s",
info.sym_name.c_str(), size, buffer, info.size, info.address);
return false;
}
}
LogInfo("Succeeded");
LogInfo("reading commentSymbolInfo");
buffer = nullptr;
size = 0;
for (i = 0; i < commentSymbolInfosSize_; i++) {
auto& info = commentSymbolInfos_[i];
if (!elf->getSymbol(amd::Elf::COMMENT, info.sym_name.c_str(), &buffer, &size)) {
LogPrintfError("elf->getSymbol(COMMENT, %s) failed at index %zu", info.sym_name.c_str(), i);
return false;
}
LogPrintfInfo("getSymbol(COMMENT, sym_name=%s, buffer=%s, size=%zu)", info.sym_name.c_str(),
buffer, size); // Will possibly print part of buffer
if (size != info.size || memcmp(buffer, info.address, info.size)) {
LogPrintfError("Not matched symbol(%s): size = %zu, buff = %s, expected: %zu, %s",
info.sym_name.c_str(), size, buffer, info.size, info.address);
return false;
}
}
// Test another way
auto symbolNum = elf->getSymbolNum();
if (symbolNum != (rodataSymbolInfosSize_ + commentSymbolInfosSize_)) {
LogPrintfError(
"Not matched: symbolNum(%u) != rodataSymbolInfosSize_(%u) +"
" commentSymbolInfosSize_(%u)",
symbolNum, rodataSymbolInfosSize_, commentSymbolInfosSize_);
return false;
}
for (i = 0; i < rodataSymbolInfosSize_; i++) {
auto& info = rodataSymbolInfos_[i];
amd::Elf::SymbolInfo symInfo;
if (!elf->getSymbolInfo(i, &symInfo)) {
LogPrintfError("getSymbolInfo(%zu) failed", i);
return false;
}
LogPrintfInfo(
"getSymbolInfo(%zu): amd::Elf::RODATA: sec_name=%s, sym_name=%s, "
"address=%s, size=%lu, sec_addr=%s, sec_size=%lu)",
i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(),
symInfo.address, // Will possibly print part of buffer
symInfo.size, symInfo.sec_addr, symInfo.sec_size);
if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name &&
symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) {
continue;
}
LogPrintfError("getSymbolInfo(%zu) returned not matched", i);
return false;
}
for (; i < symbolNum; i++) {
auto& info = commentSymbolInfos_[i - rodataSymbolInfosSize_];
amd::Elf::SymbolInfo symInfo;
if (!elf->getSymbolInfo(i, &symInfo)) {
LogPrintfError("getSymbolInfo(%zu) failed", i);
return false;
}
LogPrintfInfo(
"getSymbolInfo(%zu): amd::Elf::COMMENT: sec_name=%s, sym_name=%s, "
"address=%s, size=%lu, sec_addr=%s, sec_size=%lu)",
i, symInfo.sec_name.c_str(), symInfo.sym_name.c_str(),
symInfo.address, // Will possibly print part of buffer
symInfo.size, symInfo.sec_addr, symInfo.sec_size);
if (symInfo.sec_name == info.sec_name && symInfo.sym_name == info.sym_name &&
symInfo.size == info.size && ::memcmp(symInfo.address, info.address, info.size) == 0) {
continue;
}
LogPrintfError("getSymbolInfo(%zu) returned not matched", i);
return false;
}
LogInfo("Succeeded");
LogError("Reading noteInfos");
buffer = nullptr;
size = 0;
for (i = 0; i < noteInfosSize_; i++) {
auto& info = noteInfos_[i];
if (!elf->getNote(info.noteName, &buffer, &size)) {
LogPrintfError("elf->getNote(%s) failed at index %zu", info.noteName, i);
return false;
}
// Will possibly print part of buffer
LogPrintfInfo("getNote(noteName=%s, buffer=%s, size=%zu)", info.noteName, buffer, size);
if (size != info.descSize || memcmp(buffer, info.noteDesc, info.descSize)) {
LogPrintfError("Not matched note(%s): size = %zu, buff = %s, expected: %zu, %s",
info.noteName, size, buffer, info.descSize, info.noteDesc);
return false;
}
}
LogPrintfInfo("%s: Succeeded", __func__);
return true;
}
bool test(unsigned char eclass = ELFCLASS64, const char* outFile = nullptr) {
amd::Elf* writer = new amd::Elf(eclass, nullptr, 0, outFile, amd::Elf::ELF_C_WRITE);
amd::Elf* reader = nullptr;
bool ret = false;
do {
if ((writer == nullptr) || !writer->isSuccessful()) {
LogError("Creating writter ELF object failed");
break;
}
// Writing
if (!set(writer)) {
break;
}
// Verifying
if (!verify(writer)) {
break;
}
char* buff = nullptr;
unsigned long len = 0;
if (writer->dumpImage(&buff, &len)) {
LogPrintfInfo("dumpImage succeed: buff=%p, len=%u)", buff, len);
reader = new amd::Elf(eclass, buff, len, nullptr, amd::Elf::ELF_C_READ);
delete[] buff;
if ((reader == nullptr) || !reader->isSuccessful()) {
LogError("Creating reader ELF object failed");
break;
}
ret = verify(reader);
delete reader;
}
} while (false);
if (writer) {
delete writer;
}
if (reader) {
delete reader;
}
LogPrintfError("%s(%s, %s): %s", __func__, eclass == ELFCLASS64 ? "ELFCLASS64" : "ELFCLASS32",
outFile ? outFile : "nullptr", ret ? "Succeeded" : "Failed");
return ret;
}
int main() {
bool ret = false;
amd::Flag::init();
unsigned char eclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
const char* outFile = eclass == ELFCLASS32 ? "elf32.bin" : "elf64.bin";
ret = test(eclass, outFile);
printf("%s: test(%s, %s) %s!\n", __func__, eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64",
outFile, ret ? "Succeeded" : "Failed");
if (ret) {
ret = test(eclass, nullptr);
printf("%s: test(%s, nullptr) %s!\n", __func__,
eclass == ELFCLASS32 ? "ELFCLASS32" : "ELFCLASS64", ret ? "Succeeded" : "Failed");
}
return 0;
}
+3 -4
Просмотреть файл
@@ -63,9 +63,8 @@ class KernelSignature : public HeapObject {
public:
enum {
ABIVersion_0 = 0, //! ABI constructed based on the OCL semantics
ABIVersion_1 = 1, //! ABI constructed based on the HW ABI returned from HSAIL
ABIVersion_2 = 2 //! ABI constructed based on the HW ABI returned from LC
ABIVersion_OCL = 0, //! ABI constructed based on the OCL semantics
ABIVersion_LC = 1 //! ABI constructed based on the HW ABI returned from LC
};
//! Default constructor
@@ -75,7 +74,7 @@ class KernelSignature : public HeapObject {
numMemories_(0),
numSamplers_(0),
numQueues_(0),
version_(ABIVersion_0) {}
version_(ABIVersion_OCL) {}
//! Construct a new signature.
KernelSignature(const std::vector<KernelParameterDescriptor>& params, const std::string& attrib,