This introduces LipoProteinLipase (lpl), a simple tool for creating fat binaries. It represents a direct replacement of the creaky hccgenco.sh script, which had various issues. The format it uses is that of a code object bundle, generated by the Clang Offload Bundler. The output is always suffixed with the ".adipose" extension. It is shared with HCC. The hipcc script and associated tests are modified to use lpl. Help can be obtained by invoking lpl --help. A more computer-sciency / corporate friendly name is likely to be beneficial, which is a reason for choosing easily searchable/replaceable names such as lpl or adipose.

[ROCm/clr commit: 4e0739c68a]
Этот коммит содержится в:
Alex Voicu
2017-12-08 04:22:57 +00:00
родитель a25a01f2c9
Коммит aa48cc7b55
17 изменённых файлов: 3958 добавлений и 94 удалений
+12
Просмотреть файл
@@ -0,0 +1,12 @@
add_executable(lpl lpl.cpp)
set_target_properties(
lpl PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(lpl PUBLIC ${PROJECT_SOURCE_DIR}/src)
# Install LPL if platform is hcc
if (HIP_PLATFORM STREQUAL "hcc")
install(TARGETS lpl RUNTIME DESTINATION bin)
endif ()
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+56
Просмотреть файл
@@ -0,0 +1,56 @@
#include "lpl.hpp"
#include <cstdlib>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
using namespace clara;
using namespace hip_impl;
using namespace std;
int main(int argc, char** argv)
{
try {
if (!hipcc_and_lpl_colocated()) {
throw runtime_error{
"The LPL executable and hipcc must be in the same directory."};
}
bool help = false;
string flags;
string output;
vector<string> sources;
string targets;
auto cmd = cmdline_parser(help, sources, targets, flags, output);
const auto r = cmd.parse(Args{argc, argv});
if (!r) throw runtime_error{r.errorMessage()};
if (help) cout << cmd << endl;
else {
if (sources.empty()) throw runtime_error{"No inputs specified."};
auto tmp = tokenize_targets(targets);
if (tmp.empty()) {
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
}
else validate_targets(tmp);
if (output.empty()) for (auto&& x : tmp) output += x;
generate_fat_binary(sources, tmp, flags, output);
}
}
catch (const exception& ex) {
cerr << ex.what() << endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
+246
Просмотреть файл
@@ -0,0 +1,246 @@
#include "clara/clara.hpp"
#include "pstreams/pstream.h"
#include "../src/elfio/elfio.hpp"
#include <unistd.h>
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstdlib>
#include <mutex>
#include <stdexcept>
#include <string>
#include <unordered_set>
#include <vector>
namespace hip_impl
{
inline
const std::unordered_set<std::string>& amdgpu_targets()
{ // The evolving list lives at:
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
static const std::unordered_set<std::string> r{
"gfx701", "gfx801", "gfx802", "gfx803", "gfx900"};
return r;
}
inline
const std::string& fat_binary_extension()
{
static const std::string r{".adipose"};
return r;
}
inline
const std::string& kernel_section()
{
static const std::string r{".kernel"};
return r;
}
inline
const std::string& path_to_self()
{
static constexpr const char self[] = "/proc/self/exe";
static std::string r(PATH_MAX, '\0');
static std::once_flag f;
std::call_once(f, []() {
decltype(readlink(self, &r.front(), r.size())) read_cnt;
do {
read_cnt = readlink(self, &r.front(), r.size());
} while (read_cnt == -1);
r.resize(read_cnt);
});
return r;
}
inline
const std::string& path_to_hipcc()
{
assert(!path_to_self().empty());
static const auto r = path_to_self().substr(
0, path_to_self().find_last_of('/')) += "/hipcc";
return r;
}
inline
std::string make_hipcc_call(
const std::vector<std::string>& sources,
const std::vector<std::string>& targets,
const std::string& flags,
const std::string& hipcc_output)
{
assert(!sources.empty() && !targets.empty() && !hipcc_output.empty());
std::string r{path_to_hipcc() + ' '};
for (auto&& x : sources) r += x + ' ';
r += "-o " + hipcc_output + ' ';
for (auto&& x : targets) r += "--amdgpu-target=" + x + ' ';
r += flags + " -fPIC -shared";
return r;
}
inline
void copy_kernel_section_to_fat_binary(
const std::string& tmp, const std::string& output)
{
ELFIO::elfio reader;
if (!reader.load(tmp)) {
throw std::runtime_error{
"The result of the compilation is inaccessible."};
}
const auto it = std::find_if(
reader.sections.begin(),
reader.sections.end(),
[](const ELFIO::section* x) {
return x->get_name() == kernel_section();
});
std::ofstream out{output + fat_binary_extension()};
if (it == reader.sections.end()) {
std::cerr << "Warning: no kernels were generated; fat binary shall "
"be empty." << std::endl;
}
else {
std::copy_n(
(*it)->get_data(),
(*it)->get_size(),
std::ostreambuf_iterator<char>{out});
}
}
inline
void generate_fat_binary(
const std::vector<std::string>& sources,
const std::vector<std::string>& targets,
const std::string& flags,
const std::string& output)
{
static const auto d = [](const std::string* f) { remove(f->c_str()); };
std::unique_ptr<const std::string, decltype(d)> tmp{&output, d};
redi::ipstream hipcc{
make_hipcc_call(sources, targets, flags, *tmp),
redi::pstream::pstderr};
if (!hipcc.is_open()) {
throw std::runtime_error{"Compiler invocation failed."};
}
std::string log;
while (std::getline(hipcc, log)) std::cout << log << '\n';
hipcc.close();
if (hipcc.rdbuf()->exited() &&
hipcc.rdbuf()->status() != EXIT_SUCCESS) {
throw std::runtime_error{"Compilation failed."};
}
copy_kernel_section_to_fat_binary(*tmp, output);
}
inline
bool file_exists(const std::string& path_to)
{
return static_cast<bool>(std::ifstream{path_to});
}
inline
bool hipcc_and_lpl_colocated()
{
if (path_to_self().empty()) return false;
return file_exists(path_to_hipcc());
}
inline
std::vector<std::string> tokenize_targets(const std::string& x)
{ // TODO: move to regular expressions once we clarify the need to support
// ancient standard library implementations.
if (x.empty()) return {};
static constexpr const char valid_characters[] = "gfx0123456789,";
if (x.find_first_not_of(valid_characters) != std::string::npos) {
throw std::runtime_error{"Invalid target string: " + x};
}
std::vector<std::string> r;
auto it = x.cbegin();
do {
auto it1 = std::find(it, x.cend(), ',');
r.emplace_back(it, it1);
if (it1 == x.cend()) break;
it = ++it1;
} while (true);
return r;
}
inline
void validate_targets(const std::vector<std::string>& x)
{
assert(!x.empty());
for (auto&& t : x) {
static const std::string digits{"0123456789"};
static const std::string pre{"gfx"};
if (t.find(pre) != 0 ||
t.find_first_not_of(digits, pre.size()) != std::string::npos) {
throw std::runtime_error{"Invalid target: " + t};
}
if (amdgpu_targets().find(t) == amdgpu_targets().cend()) {
std::cerr << "Warning: target " << t
<< " has not been validated yet; it may be invalid."
<< std::endl;
}
}
}
inline
clara::Parser cmdline_parser(
bool& help,
std::vector<std::string>& sources,
std::string& targets,
std::string& flags,
std::string& output)
{
return
clara::Opt{flags, "\"-v -DMACRO etc.\""}
["-f"]["--flags"](
"flags for compilation; must be valid for hipcc.") |
clara::Help{help} |
clara::Opt{output, "filename"}
["-o"]["--output"](
"name of fat-binary output file; the binary format of the "
"file is documented at: https://reviews.llvm.org/D13909.") |
clara::Arg{sources, "a.cpp b.cpp etc."}(
"inputs for compilation; must contain valid C++ code.") |
clara::Opt{targets, "gfx803,gfx900 etc."}
["-t"]["--targets"](
"targets for AMDGPU lowering; must be one of the processors"
" with ROCm support from "
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
}
}
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+2 -4
Просмотреть файл
@@ -93,8 +93,6 @@ if ($HIP_PLATFORM eq "hcc") {
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
$ROCM_TARGET=$ENV{'ROCM_TARGET'} // "gfx803";
# HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below)
$HCC = "$HCC_HOME/bin/hcc";
$HCCFLAGS = "-hc -D__HIPCC__ -I$HCC_HOME/include ";
@@ -128,7 +126,7 @@ if ($HIP_PLATFORM eq "hcc") {
# Force -stdlib=libc++ on UB14.04
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"")
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) {
$HIPCXXFLAGS .= " -stdlib=libc++";
$setStdLib = 1;
@@ -202,7 +200,7 @@ if ($verbose & 0x4) {
# Handle code object generation
my $ISACMD="";
if($HIP_PLATFORM eq "hcc"){
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
$ISACMD .= "$HIP_PATH/bin/lpl ";
if($ARGV[0] eq "--genco"){
foreach $isaarg (@ARGV[1..$#ARGV]){
$ISACMD .= " ";
+26 -24
Просмотреть файл
@@ -37,17 +37,16 @@ namespace hip_impl
hsa_isa_t triple_to_hsa_isa(const std::string& triple);
struct Bundled_code {
union {
union Header {
struct {
std::uint64_t offset;
std::uint64_t bundle_sz;
std::uint64_t triple_sz;
};
std::uint8_t cbuf[
sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
};
char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
} header;
std::string triple;
std::vector<std::uint8_t> blob;
std::vector<char> blob;
};
class Bundled_code_header {
@@ -57,14 +56,13 @@ namespace hip_impl
static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1;
// DATA
union {
union Header_ {
struct {
std::uint8_t bundler_magic_string_[magic_string_sz_];
char bundler_magic_string_[magic_string_sz_];
std::uint64_t bundle_cnt_;
};
std::uint8_t cbuf_[
sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
};
char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
} header_;
std::vector<Bundled_code> bundles_;
// FRIENDS - MANIPULATORS
@@ -78,22 +76,24 @@ namespace hip_impl
{
if (f == l) return false;
std::copy_n(f, sizeof(x.cbuf_), x.cbuf_);
std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_);
if (valid(x)) {
x.bundles_.resize(x.bundle_cnt_);
x.bundles_.resize(x.header_.bundle_cnt_);
auto it = f + sizeof(x.cbuf_);
auto it = f + sizeof(x.header_.cbuf_);
for (auto&& y : x.bundles_) {
std::copy_n(it, sizeof(y.cbuf), y.cbuf);
it += sizeof(y.cbuf);
std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf);
it += sizeof(y.header.cbuf);
y.triple.insert(y.triple.cend(), it, it + y.triple_sz);
y.triple.assign(it, it + y.header.triple_sz);
std::copy_n(
f + y.offset, y.bundle_sz, std::back_inserter(y.blob));
f + y.header.offset,
y.header.bundle_sz,
std::back_inserter(y.blob));
it += y.triple_sz;
it += y.header.triple_sz;
}
return true;
@@ -103,7 +103,7 @@ namespace hip_impl
}
friend
inline
bool read(const std::vector<std::uint8_t>& blob, Bundled_code_header& x)
bool read(const std::vector<char>& blob, Bundled_code_header& x)
{
return read(blob.cbegin(), blob.cend(), x);
}
@@ -111,7 +111,7 @@ namespace hip_impl
inline
bool read(std::istream& is, Bundled_code_header& x)
{
return read(std::vector<std::uint8_t>{
return read(std::vector<char>{
std::istreambuf_iterator<char>{is},
std::istreambuf_iterator<char>{}},
x);
@@ -123,9 +123,9 @@ namespace hip_impl
bool valid(const Bundled_code_header& x)
{
return std::equal(
x.bundler_magic_string_,
x.bundler_magic_string_ + magic_string_sz_,
x.magic_string_);
magic_string_,
magic_string_ + magic_string_sz_,
x.header_.bundler_magic_string_);
}
friend
inline
@@ -139,7 +139,9 @@ namespace hip_impl
template<typename RandomAccessIterator>
Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l);
explicit
Bundled_code_header(const std::vector<std::uint8_t>& blob);
Bundled_code_header(const std::vector<char>& blob);
explicit
Bundled_code_header(const void* maybe_blob);
Bundled_code_header(const Bundled_code_header&) = default;
Bundled_code_header(Bundled_code_header&&) = default;
~Bundled_code_header() = default;
+1 -2
Просмотреть файл
@@ -35,7 +35,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN<<2
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
@@ -72,7 +72,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6];
void * _Ad;
void * _Bd;
} args;
+1 -2
Просмотреть файл
@@ -30,7 +30,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN<<2
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
@@ -68,7 +68,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6];
void * _Ad;
void * _Bd;
} args;
+1 -1
Просмотреть файл
@@ -22,7 +22,7 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
extern "C" __global__ void hello_world(float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx];
+3 -4
Просмотреть файл
@@ -30,7 +30,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN*sizeof(float)
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
float myDeviceGlobal;
float myDeviceGlobalArray[16];
#define HIP_CHECK(cmd) \
@@ -80,7 +80,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6]; // genco path + wrapper-gen pass use hidden arguments.
void * _Ad;
void * _Bd;
} args;
@@ -111,7 +110,7 @@ int main(){
HIP_LAUNCH_PARAM_END
};
{
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
@@ -136,7 +135,7 @@ int main(){
};
}
{
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
+2 -2
Просмотреть файл
@@ -27,13 +27,13 @@ THE SOFTWARE.
extern float myDeviceGlobal;
extern float myDeviceGlobalArray[16];;
extern "C" __global__ void hello_world(hipLaunchParm lp, const float *a, float *b)
extern "C" __global__ void hello_world(const float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx];
}
extern "C" __global__ void test_globals(hipLaunchParm lp, const float *a, float *b)
extern "C" __global__ void test_globals(const float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx] + myDeviceGlobal+ myDeviceGlobalArray[tx%ARRAY_SIZE] ;
+34 -6
Просмотреть файл
@@ -2,22 +2,26 @@
#include <hsa/hsa.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
using namespace std;
hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
{
static constexpr const char prefix[] = "hcc-amdgcn--amdhsa-gfx";
static constexpr std::size_t prefix_sz = sizeof(prefix) - 1;
static constexpr size_t prefix_sz = sizeof(prefix) - 1;
hsa_isa_t r = {};
auto idx = triple.find(prefix);
if (idx != std::string::npos) {
if (idx != string::npos) {
idx += prefix_sz;
std::string tmp = "AMD:AMDGPU";
string tmp = "AMD:AMDGPU";
while (idx != triple.size()) {
tmp.push_back(':');
tmp.push_back(triple[idx++]);
@@ -33,7 +37,31 @@ hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
constexpr const char hip_impl::Bundled_code_header::magic_string_[];
// CREATORS
hip_impl::Bundled_code_header::Bundled_code_header(
const std::vector<std::uint8_t>& x)
hip_impl::Bundled_code_header::Bundled_code_header(const vector<char>& x)
: Bundled_code_header{x.cbegin(), x.cend()}
{}
{}
hip_impl::Bundled_code_header::Bundled_code_header(const void* p)
{ // This is a pretty terrible interface, useful only because
// hipLoadModuleData is so poorly specified (for no fault of its own).
if (!p) return;
auto ph = static_cast<const Header_*>(p);
if (!equal(
magic_string_,
magic_string_ + magic_string_sz_,
ph->bundler_magic_string_)) {
return;
}
size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header);
auto pb = static_cast<const char*>(p) + sizeof(Header_);
auto n = ph->bundle_cnt_;
while (n--) {
sz += reinterpret_cast<const Bundled_code::Header*>(pb)->bundle_sz;
pb += sizeof(Bundled_code::Header);
}
read(static_cast<const char*>(p), static_cast<const char*>(p) + sz, *this);
}
+44 -18
Просмотреть файл
@@ -96,23 +96,6 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\
return ihipLogStatus(hipStatus);\
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
{
HIP_INIT_API(module, fname);
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
ifstream file{fname};
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
vector<char> tmp{
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
return hipModuleLoadData(module, tmp.data());
}
hipError_t hipModuleUnload(hipModule_t hmod)
{
HIP_INIT_API(hmod);
@@ -473,6 +456,29 @@ namespace
return string{s, s + sz};
}
string code_object_blob_for_agent(
const void* maybe_bundled_code, hsa_agent_t agent)
{
if (!maybe_bundled_code) return {};
Bundled_code_header tmp{maybe_bundled_code};
if (!valid(tmp)) return {};
const auto agent_isa = isa(agent);
const auto it = find_if(
bundles(tmp).cbegin(),
bundles(tmp).cend(),
[=](const Bundled_code& x) {
return agent_isa == triple_to_hsa_isa(x.triple);;
});
if (it == bundles(tmp).cend()) return {};
return string{it->blob.cbegin(), it->blob.cend()};
}
} // Anonymous namespace, internal linkage.
hipError_t ihipModuleGetFunction(
@@ -526,6 +532,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
return ihipLogStatus(r);
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
{
HIP_INIT_API(module, fname);
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
ifstream file{fname};
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
vector<char> tmp{
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
return hipModuleLoadData(module, tmp.data());
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
HIP_INIT_API(module, image);
@@ -543,8 +565,12 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
nullptr,
&(*module)->executable);
auto tmp = code_object_blob_for_agent(image, this_agent());
(*module)->executable = hip_impl::load_executable(
read_elf_file_as_string(image), (*module)->executable, this_agent());
tmp.empty() ? read_elf_file_as_string(image) : tmp,
(*module)->executable,
this_agent());
return ihipLogStatus(
(*module)->executable.handle ? hipSuccess : hipErrorUnknown);
+32
Просмотреть файл
@@ -24,8 +24,27 @@ THE SOFTWARE.
#include <hsa/hsa.h>
#include <cstdint>
#include <functional>
#include <string>
inline
constexpr
bool operator==(hsa_isa_t x, hsa_isa_t y)
{
return x.handle == y.handle;
}
namespace std
{
template<>
struct hash<hsa_isa_t> {
size_t operator()(hsa_isa_t x) const
{
return hash<decltype(x.handle)>{}(x.handle);
}
};
}
namespace hip_impl
{
inline
@@ -57,6 +76,19 @@ namespace hip_impl
return r;
}
inline
hsa_isa_t isa(hsa_agent_t x)
{
hsa_isa_t r = {};
hsa_agent_iterate_isas(x, [](hsa_isa_t i, void* o) {
*static_cast<hsa_isa_t*>(o) = i; // Pick the first.
return HSA_STATUS_INFO_BREAK;
}, &r);
return r;
}
inline
std::uint64_t kernel_object(hsa_executable_symbol_t x)
{
+12 -30
Просмотреть файл
@@ -30,34 +30,16 @@ using namespace ELFIO;
using namespace hip_impl;
using namespace std;
namespace std
{
template<>
struct hash<hsa_isa_t> {
size_t operator()(hsa_isa_t x) const
{
return hash<decltype(x.handle)>{}(x.handle);
}
};
}
inline
constexpr
bool operator==(hsa_isa_t x, hsa_isa_t y)
{
return x.handle == y.handle;
}
namespace
{
struct Symbol {
std::string name;
string name;
ELFIO::Elf64_Addr value = 0;
ELFIO::Elf_Xword size = 0;
ELFIO::Elf_Half sect_idx = 0;
std::uint8_t bind = 0;
std::uint8_t type = 0;
std::uint8_t other = 0;
Elf_Xword size = 0;
Elf_Half sect_idx = 0;
uint8_t bind = 0;
uint8_t type = 0;
uint8_t other = 0;
};
inline
@@ -185,7 +167,7 @@ namespace
}
}
vector<uint8_t> code_object_blob_for_process()
vector<char> code_object_blob_for_process()
{
static constexpr const char self[] = "/proc/self/exe";
static constexpr const char kernel_section[] = ".kernel";
@@ -200,7 +182,7 @@ namespace
return x->get_name() == kernel_section;
});
vector<uint8_t> r;
vector<char> r;
if (kernels) {
r.insert(
r.end(),
@@ -211,13 +193,13 @@ namespace
return r;
}
const unordered_map<hsa_isa_t, vector<vector<uint8_t>>>& code_object_blobs()
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs()
{
static unordered_map<hsa_isa_t, vector<vector<uint8_t>>> r;
static unordered_map<hsa_isa_t, vector<vector<char>>> r;
static once_flag f;
call_once(f, []() {
static vector<vector<uint8_t>> blobs{
static vector<vector<char>> blobs{
code_object_blob_for_process()};
dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) {
@@ -481,7 +463,7 @@ namespace hip_impl
const auto code_object_dynsym =
find_section_if(reader, [](const ELFIO::section* x) {
return x->get_type() == SHT_DYNSYM;
return x->get_type() == SHT_DYNSYM;
});
associate_code_object_symbols_with_host_allocation(
-1
Просмотреть файл
@@ -69,7 +69,6 @@ void MemcpyFunction::load(const char *fileName, const char *functionName)
void MemcpyFunction::launch(int * dst, const int * src, size_t numElements, hipStream_t s)
{
struct {
uint32_t _hidden[6];
int* _dst;
const int* _src;
size_t _numElements;