This introduces LipoProteinLipase (lpl), a simple tool for creating fat binaries. It represents a direct replacement of the creaky hccgenco.sh script, which had various issues. The format it uses is that of a code object bundle, generated by the Clang Offload Bundler. The output is always suffixed with the ".adipose" extension. It is shared with HCC. The hipcc script and associated tests are modified to use lpl. Help can be obtained by invoking lpl --help. A more computer-sciency / corporate friendly name is likely to be beneficial, which is a reason for choosing easily searchable/replaceable names such as lpl or adipose.
[ROCm/hip commit: b842394957]
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
add_executable(lpl lpl.cpp)
|
||||
set_target_properties(
|
||||
lpl PROPERTIES
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(lpl PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
# Install LPL if platform is hcc
|
||||
if (HIP_PLATFORM STREQUAL "hcc")
|
||||
install(TARGETS lpl RUNTIME DESTINATION bin)
|
||||
endif ()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,56 @@
|
||||
#include "lpl.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace clara;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try {
|
||||
if (!hipcc_and_lpl_colocated()) {
|
||||
throw runtime_error{
|
||||
"The LPL executable and hipcc must be in the same directory."};
|
||||
}
|
||||
|
||||
bool help = false;
|
||||
string flags;
|
||||
string output;
|
||||
vector<string> sources;
|
||||
string targets;
|
||||
|
||||
auto cmd = cmdline_parser(help, sources, targets, flags, output);
|
||||
|
||||
const auto r = cmd.parse(Args{argc, argv});
|
||||
|
||||
if (!r) throw runtime_error{r.errorMessage()};
|
||||
|
||||
if (help) cout << cmd << endl;
|
||||
else {
|
||||
if (sources.empty()) throw runtime_error{"No inputs specified."};
|
||||
|
||||
auto tmp = tokenize_targets(targets);
|
||||
if (tmp.empty()) {
|
||||
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
|
||||
}
|
||||
else validate_targets(tmp);
|
||||
|
||||
if (output.empty()) for (auto&& x : tmp) output += x;
|
||||
|
||||
generate_fat_binary(sources, tmp, flags, output);
|
||||
}
|
||||
}
|
||||
catch (const exception& ex) {
|
||||
cerr << ex.what() << endl;
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
#include "clara/clara.hpp"
|
||||
#include "pstreams/pstream.h"
|
||||
#include "../src/elfio/elfio.hpp"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
const std::unordered_set<std::string>& amdgpu_targets()
|
||||
{ // The evolving list lives at:
|
||||
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
|
||||
static const std::unordered_set<std::string> r{
|
||||
"gfx701", "gfx801", "gfx802", "gfx803", "gfx900"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& fat_binary_extension()
|
||||
{
|
||||
static const std::string r{".adipose"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& kernel_section()
|
||||
{
|
||||
static const std::string r{".kernel"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& path_to_self()
|
||||
{
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
|
||||
static std::string r(PATH_MAX, '\0');
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
decltype(readlink(self, &r.front(), r.size())) read_cnt;
|
||||
do {
|
||||
read_cnt = readlink(self, &r.front(), r.size());
|
||||
} while (read_cnt == -1);
|
||||
|
||||
r.resize(read_cnt);
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& path_to_hipcc()
|
||||
{
|
||||
assert(!path_to_self().empty());
|
||||
|
||||
static const auto r = path_to_self().substr(
|
||||
0, path_to_self().find_last_of('/')) += "/hipcc";
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string make_hipcc_call(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& targets,
|
||||
const std::string& flags,
|
||||
const std::string& hipcc_output)
|
||||
{
|
||||
assert(!sources.empty() && !targets.empty() && !hipcc_output.empty());
|
||||
|
||||
std::string r{path_to_hipcc() + ' '};
|
||||
|
||||
for (auto&& x : sources) r += x + ' ';
|
||||
r += "-o " + hipcc_output + ' ';
|
||||
for (auto&& x : targets) r += "--amdgpu-target=" + x + ' ';
|
||||
r += flags + " -fPIC -shared";
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void copy_kernel_section_to_fat_binary(
|
||||
const std::string& tmp, const std::string& output)
|
||||
{
|
||||
ELFIO::elfio reader;
|
||||
if (!reader.load(tmp)) {
|
||||
throw std::runtime_error{
|
||||
"The result of the compilation is inaccessible."};
|
||||
}
|
||||
|
||||
const auto it = std::find_if(
|
||||
reader.sections.begin(),
|
||||
reader.sections.end(),
|
||||
[](const ELFIO::section* x) {
|
||||
return x->get_name() == kernel_section();
|
||||
});
|
||||
|
||||
std::ofstream out{output + fat_binary_extension()};
|
||||
|
||||
if (it == reader.sections.end()) {
|
||||
std::cerr << "Warning: no kernels were generated; fat binary shall "
|
||||
"be empty." << std::endl;
|
||||
}
|
||||
else {
|
||||
std::copy_n(
|
||||
(*it)->get_data(),
|
||||
(*it)->get_size(),
|
||||
std::ostreambuf_iterator<char>{out});
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void generate_fat_binary(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& targets,
|
||||
const std::string& flags,
|
||||
const std::string& output)
|
||||
{
|
||||
static const auto d = [](const std::string* f) { remove(f->c_str()); };
|
||||
|
||||
std::unique_ptr<const std::string, decltype(d)> tmp{&output, d};
|
||||
|
||||
redi::ipstream hipcc{
|
||||
make_hipcc_call(sources, targets, flags, *tmp),
|
||||
redi::pstream::pstderr};
|
||||
|
||||
if (!hipcc.is_open()) {
|
||||
throw std::runtime_error{"Compiler invocation failed."};
|
||||
}
|
||||
|
||||
std::string log;
|
||||
while (std::getline(hipcc, log)) std::cout << log << '\n';
|
||||
|
||||
hipcc.close();
|
||||
|
||||
if (hipcc.rdbuf()->exited() &&
|
||||
hipcc.rdbuf()->status() != EXIT_SUCCESS) {
|
||||
throw std::runtime_error{"Compilation failed."};
|
||||
}
|
||||
|
||||
copy_kernel_section_to_fat_binary(*tmp, output);
|
||||
}
|
||||
|
||||
inline
|
||||
bool file_exists(const std::string& path_to)
|
||||
{
|
||||
return static_cast<bool>(std::ifstream{path_to});
|
||||
}
|
||||
|
||||
inline
|
||||
bool hipcc_and_lpl_colocated()
|
||||
{
|
||||
if (path_to_self().empty()) return false;
|
||||
|
||||
return file_exists(path_to_hipcc());
|
||||
}
|
||||
|
||||
inline
|
||||
std::vector<std::string> tokenize_targets(const std::string& x)
|
||||
{ // TODO: move to regular expressions once we clarify the need to support
|
||||
// ancient standard library implementations.
|
||||
if (x.empty()) return {};
|
||||
|
||||
static constexpr const char valid_characters[] = "gfx0123456789,";
|
||||
|
||||
if (x.find_first_not_of(valid_characters) != std::string::npos) {
|
||||
throw std::runtime_error{"Invalid target string: " + x};
|
||||
}
|
||||
|
||||
std::vector<std::string> r;
|
||||
|
||||
auto it = x.cbegin();
|
||||
do {
|
||||
auto it1 = std::find(it, x.cend(), ',');
|
||||
r.emplace_back(it, it1);
|
||||
|
||||
if (it1 == x.cend()) break;
|
||||
|
||||
it = ++it1;
|
||||
} while (true);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void validate_targets(const std::vector<std::string>& x)
|
||||
{
|
||||
assert(!x.empty());
|
||||
|
||||
for (auto&& t : x) {
|
||||
static const std::string digits{"0123456789"};
|
||||
static const std::string pre{"gfx"};
|
||||
|
||||
if (t.find(pre) != 0 ||
|
||||
t.find_first_not_of(digits, pre.size()) != std::string::npos) {
|
||||
throw std::runtime_error{"Invalid target: " + t};
|
||||
}
|
||||
|
||||
if (amdgpu_targets().find(t) == amdgpu_targets().cend()) {
|
||||
std::cerr << "Warning: target " << t
|
||||
<< " has not been validated yet; it may be invalid."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
clara::Parser cmdline_parser(
|
||||
bool& help,
|
||||
std::vector<std::string>& sources,
|
||||
std::string& targets,
|
||||
std::string& flags,
|
||||
std::string& output)
|
||||
{
|
||||
return
|
||||
clara::Opt{flags, "\"-v -DMACRO etc.\""}
|
||||
["-f"]["--flags"](
|
||||
"flags for compilation; must be valid for hipcc.") |
|
||||
clara::Help{help} |
|
||||
clara::Opt{output, "filename"}
|
||||
["-o"]["--output"](
|
||||
"name of fat-binary output file; the binary format of the "
|
||||
"file is documented at: https://reviews.llvm.org/D13909.") |
|
||||
clara::Arg{sources, "a.cpp b.cpp etc."}(
|
||||
"inputs for compilation; must contain valid C++ code.") |
|
||||
clara::Opt{targets, "gfx803,gfx900 etc."}
|
||||
["-t"]["--targets"](
|
||||
"targets for AMDGPU lowering; must be one of the processors"
|
||||
" with ROCm support from "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -93,8 +93,6 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
|
||||
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
|
||||
|
||||
$ROCM_TARGET=$ENV{'ROCM_TARGET'} // "gfx803";
|
||||
|
||||
# HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below)
|
||||
$HCC = "$HCC_HOME/bin/hcc";
|
||||
$HCCFLAGS = "-hc -D__HIPCC__ -I$HCC_HOME/include ";
|
||||
@@ -128,7 +126,7 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
# Force -stdlib=libc++ on UB14.04
|
||||
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"")
|
||||
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
|
||||
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
|
||||
or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) {
|
||||
$HIPCXXFLAGS .= " -stdlib=libc++";
|
||||
$setStdLib = 1;
|
||||
@@ -202,7 +200,7 @@ if ($verbose & 0x4) {
|
||||
# Handle code object generation
|
||||
my $ISACMD="";
|
||||
if($HIP_PLATFORM eq "hcc"){
|
||||
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
|
||||
$ISACMD .= "$HIP_PATH/bin/lpl ";
|
||||
if($ARGV[0] eq "--genco"){
|
||||
foreach $isaarg (@ARGV[1..$#ARGV]){
|
||||
$ISACMD .= " ";
|
||||
|
||||
@@ -37,17 +37,16 @@ namespace hip_impl
|
||||
hsa_isa_t triple_to_hsa_isa(const std::string& triple);
|
||||
|
||||
struct Bundled_code {
|
||||
union {
|
||||
union Header {
|
||||
struct {
|
||||
std::uint64_t offset;
|
||||
std::uint64_t bundle_sz;
|
||||
std::uint64_t triple_sz;
|
||||
};
|
||||
std::uint8_t cbuf[
|
||||
sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
|
||||
};
|
||||
char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
|
||||
} header;
|
||||
std::string triple;
|
||||
std::vector<std::uint8_t> blob;
|
||||
std::vector<char> blob;
|
||||
};
|
||||
|
||||
class Bundled_code_header {
|
||||
@@ -57,14 +56,13 @@ namespace hip_impl
|
||||
static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1;
|
||||
|
||||
// DATA
|
||||
union {
|
||||
union Header_ {
|
||||
struct {
|
||||
std::uint8_t bundler_magic_string_[magic_string_sz_];
|
||||
char bundler_magic_string_[magic_string_sz_];
|
||||
std::uint64_t bundle_cnt_;
|
||||
};
|
||||
std::uint8_t cbuf_[
|
||||
sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
|
||||
};
|
||||
char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
|
||||
} header_;
|
||||
std::vector<Bundled_code> bundles_;
|
||||
|
||||
// FRIENDS - MANIPULATORS
|
||||
@@ -78,22 +76,24 @@ namespace hip_impl
|
||||
{
|
||||
if (f == l) return false;
|
||||
|
||||
std::copy_n(f, sizeof(x.cbuf_), x.cbuf_);
|
||||
std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_);
|
||||
|
||||
if (valid(x)) {
|
||||
x.bundles_.resize(x.bundle_cnt_);
|
||||
x.bundles_.resize(x.header_.bundle_cnt_);
|
||||
|
||||
auto it = f + sizeof(x.cbuf_);
|
||||
auto it = f + sizeof(x.header_.cbuf_);
|
||||
for (auto&& y : x.bundles_) {
|
||||
std::copy_n(it, sizeof(y.cbuf), y.cbuf);
|
||||
it += sizeof(y.cbuf);
|
||||
std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf);
|
||||
it += sizeof(y.header.cbuf);
|
||||
|
||||
y.triple.insert(y.triple.cend(), it, it + y.triple_sz);
|
||||
y.triple.assign(it, it + y.header.triple_sz);
|
||||
|
||||
std::copy_n(
|
||||
f + y.offset, y.bundle_sz, std::back_inserter(y.blob));
|
||||
f + y.header.offset,
|
||||
y.header.bundle_sz,
|
||||
std::back_inserter(y.blob));
|
||||
|
||||
it += y.triple_sz;
|
||||
it += y.header.triple_sz;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -103,7 +103,7 @@ namespace hip_impl
|
||||
}
|
||||
friend
|
||||
inline
|
||||
bool read(const std::vector<std::uint8_t>& blob, Bundled_code_header& x)
|
||||
bool read(const std::vector<char>& blob, Bundled_code_header& x)
|
||||
{
|
||||
return read(blob.cbegin(), blob.cend(), x);
|
||||
}
|
||||
@@ -111,7 +111,7 @@ namespace hip_impl
|
||||
inline
|
||||
bool read(std::istream& is, Bundled_code_header& x)
|
||||
{
|
||||
return read(std::vector<std::uint8_t>{
|
||||
return read(std::vector<char>{
|
||||
std::istreambuf_iterator<char>{is},
|
||||
std::istreambuf_iterator<char>{}},
|
||||
x);
|
||||
@@ -123,9 +123,9 @@ namespace hip_impl
|
||||
bool valid(const Bundled_code_header& x)
|
||||
{
|
||||
return std::equal(
|
||||
x.bundler_magic_string_,
|
||||
x.bundler_magic_string_ + magic_string_sz_,
|
||||
x.magic_string_);
|
||||
magic_string_,
|
||||
magic_string_ + magic_string_sz_,
|
||||
x.header_.bundler_magic_string_);
|
||||
}
|
||||
friend
|
||||
inline
|
||||
@@ -139,7 +139,9 @@ namespace hip_impl
|
||||
template<typename RandomAccessIterator>
|
||||
Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l);
|
||||
explicit
|
||||
Bundled_code_header(const std::vector<std::uint8_t>& blob);
|
||||
Bundled_code_header(const std::vector<char>& blob);
|
||||
explicit
|
||||
Bundled_code_header(const void* maybe_blob);
|
||||
Bundled_code_header(const Bundled_code_header&) = default;
|
||||
Bundled_code_header(Bundled_code_header&&) = default;
|
||||
~Bundled_code_header() = default;
|
||||
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
@@ -72,7 +72,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
@@ -68,7 +68,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
extern "C" __global__ void hello_world(float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
|
||||
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN*sizeof(float)
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
float myDeviceGlobal;
|
||||
float myDeviceGlobalArray[16];
|
||||
#define HIP_CHECK(cmd) \
|
||||
@@ -80,7 +80,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6]; // genco path + wrapper-gen pass use hidden arguments.
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
@@ -111,7 +110,7 @@ int main(){
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
{
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
@@ -136,7 +135,7 @@ int main(){
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
@@ -27,13 +27,13 @@ THE SOFTWARE.
|
||||
extern float myDeviceGlobal;
|
||||
extern float myDeviceGlobalArray[16];;
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, const float *a, float *b)
|
||||
extern "C" __global__ void hello_world(const float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
extern "C" __global__ void test_globals(hipLaunchParm lp, const float *a, float *b)
|
||||
extern "C" __global__ void test_globals(const float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx] + myDeviceGlobal+ myDeviceGlobalArray[tx%ARRAY_SIZE] ;
|
||||
|
||||
@@ -2,22 +2,26 @@
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
|
||||
{
|
||||
static constexpr const char prefix[] = "hcc-amdgcn--amdhsa-gfx";
|
||||
static constexpr std::size_t prefix_sz = sizeof(prefix) - 1;
|
||||
static constexpr size_t prefix_sz = sizeof(prefix) - 1;
|
||||
|
||||
hsa_isa_t r = {};
|
||||
|
||||
auto idx = triple.find(prefix);
|
||||
|
||||
if (idx != std::string::npos) {
|
||||
if (idx != string::npos) {
|
||||
idx += prefix_sz;
|
||||
std::string tmp = "AMD:AMDGPU";
|
||||
string tmp = "AMD:AMDGPU";
|
||||
while (idx != triple.size()) {
|
||||
tmp.push_back(':');
|
||||
tmp.push_back(triple[idx++]);
|
||||
@@ -33,7 +37,31 @@ hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
|
||||
constexpr const char hip_impl::Bundled_code_header::magic_string_[];
|
||||
|
||||
// CREATORS
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(
|
||||
const std::vector<std::uint8_t>& x)
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(const vector<char>& x)
|
||||
: Bundled_code_header{x.cbegin(), x.cend()}
|
||||
{}
|
||||
{}
|
||||
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(const void* p)
|
||||
{ // This is a pretty terrible interface, useful only because
|
||||
// hipLoadModuleData is so poorly specified (for no fault of its own).
|
||||
if (!p) return;
|
||||
|
||||
auto ph = static_cast<const Header_*>(p);
|
||||
|
||||
if (!equal(
|
||||
magic_string_,
|
||||
magic_string_ + magic_string_sz_,
|
||||
ph->bundler_magic_string_)) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header);
|
||||
auto pb = static_cast<const char*>(p) + sizeof(Header_);
|
||||
auto n = ph->bundle_cnt_;
|
||||
while (n--) {
|
||||
sz += reinterpret_cast<const Bundled_code::Header*>(pb)->bundle_sz;
|
||||
pb += sizeof(Bundled_code::Header);
|
||||
}
|
||||
|
||||
read(static_cast<const char*>(p), static_cast<const char*>(p) + sz, *this);
|
||||
}
|
||||
@@ -96,23 +96,6 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\
|
||||
return ihipLogStatus(hipStatus);\
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
{
|
||||
HIP_INIT_API(module, fname);
|
||||
|
||||
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
ifstream file{fname};
|
||||
|
||||
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
vector<char> tmp{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
return hipModuleLoadData(module, tmp.data());
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
{
|
||||
HIP_INIT_API(hmod);
|
||||
@@ -473,6 +456,29 @@ namespace
|
||||
|
||||
return string{s, s + sz};
|
||||
}
|
||||
|
||||
string code_object_blob_for_agent(
|
||||
const void* maybe_bundled_code, hsa_agent_t agent)
|
||||
{
|
||||
if (!maybe_bundled_code) return {};
|
||||
|
||||
Bundled_code_header tmp{maybe_bundled_code};
|
||||
|
||||
if (!valid(tmp)) return {};
|
||||
|
||||
const auto agent_isa = isa(agent);
|
||||
|
||||
const auto it = find_if(
|
||||
bundles(tmp).cbegin(),
|
||||
bundles(tmp).cend(),
|
||||
[=](const Bundled_code& x) {
|
||||
return agent_isa == triple_to_hsa_isa(x.triple);;
|
||||
});
|
||||
|
||||
if (it == bundles(tmp).cend()) return {};
|
||||
|
||||
return string{it->blob.cbegin(), it->blob.cend()};
|
||||
}
|
||||
} // Anonymous namespace, internal linkage.
|
||||
|
||||
hipError_t ihipModuleGetFunction(
|
||||
@@ -526,6 +532,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
return ihipLogStatus(r);
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
{
|
||||
HIP_INIT_API(module, fname);
|
||||
|
||||
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
ifstream file{fname};
|
||||
|
||||
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
vector<char> tmp{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
return hipModuleLoadData(module, tmp.data());
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(module, image);
|
||||
@@ -543,8 +565,12 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
nullptr,
|
||||
&(*module)->executable);
|
||||
|
||||
auto tmp = code_object_blob_for_agent(image, this_agent());
|
||||
|
||||
(*module)->executable = hip_impl::load_executable(
|
||||
read_elf_file_as_string(image), (*module)->executable, this_agent());
|
||||
tmp.empty() ? read_elf_file_as_string(image) : tmp,
|
||||
(*module)->executable,
|
||||
this_agent());
|
||||
|
||||
return ihipLogStatus(
|
||||
(*module)->executable.handle ? hipSuccess : hipErrorUnknown);
|
||||
|
||||
@@ -24,8 +24,27 @@ THE SOFTWARE.
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
inline
|
||||
constexpr
|
||||
bool operator==(hsa_isa_t x, hsa_isa_t y)
|
||||
{
|
||||
return x.handle == y.handle;
|
||||
}
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<hsa_isa_t> {
|
||||
size_t operator()(hsa_isa_t x) const
|
||||
{
|
||||
return hash<decltype(x.handle)>{}(x.handle);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
@@ -57,6 +76,19 @@ namespace hip_impl
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_isa_t isa(hsa_agent_t x)
|
||||
{
|
||||
hsa_isa_t r = {};
|
||||
hsa_agent_iterate_isas(x, [](hsa_isa_t i, void* o) {
|
||||
*static_cast<hsa_isa_t*>(o) = i; // Pick the first.
|
||||
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint64_t kernel_object(hsa_executable_symbol_t x)
|
||||
{
|
||||
|
||||
@@ -30,34 +30,16 @@ using namespace ELFIO;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<hsa_isa_t> {
|
||||
size_t operator()(hsa_isa_t x) const
|
||||
{
|
||||
return hash<decltype(x.handle)>{}(x.handle);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
inline
|
||||
constexpr
|
||||
bool operator==(hsa_isa_t x, hsa_isa_t y)
|
||||
{
|
||||
return x.handle == y.handle;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Symbol {
|
||||
std::string name;
|
||||
string name;
|
||||
ELFIO::Elf64_Addr value = 0;
|
||||
ELFIO::Elf_Xword size = 0;
|
||||
ELFIO::Elf_Half sect_idx = 0;
|
||||
std::uint8_t bind = 0;
|
||||
std::uint8_t type = 0;
|
||||
std::uint8_t other = 0;
|
||||
Elf_Xword size = 0;
|
||||
Elf_Half sect_idx = 0;
|
||||
uint8_t bind = 0;
|
||||
uint8_t type = 0;
|
||||
uint8_t other = 0;
|
||||
};
|
||||
|
||||
inline
|
||||
@@ -185,7 +167,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
vector<uint8_t> code_object_blob_for_process()
|
||||
vector<char> code_object_blob_for_process()
|
||||
{
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
static constexpr const char kernel_section[] = ".kernel";
|
||||
@@ -200,7 +182,7 @@ namespace
|
||||
return x->get_name() == kernel_section;
|
||||
});
|
||||
|
||||
vector<uint8_t> r;
|
||||
vector<char> r;
|
||||
if (kernels) {
|
||||
r.insert(
|
||||
r.end(),
|
||||
@@ -211,13 +193,13 @@ namespace
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<hsa_isa_t, vector<vector<uint8_t>>>& code_object_blobs()
|
||||
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs()
|
||||
{
|
||||
static unordered_map<hsa_isa_t, vector<vector<uint8_t>>> r;
|
||||
static unordered_map<hsa_isa_t, vector<vector<char>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
static vector<vector<uint8_t>> blobs{
|
||||
static vector<vector<char>> blobs{
|
||||
code_object_blob_for_process()};
|
||||
|
||||
dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) {
|
||||
@@ -481,7 +463,7 @@ namespace hip_impl
|
||||
|
||||
const auto code_object_dynsym =
|
||||
find_section_if(reader, [](const ELFIO::section* x) {
|
||||
return x->get_type() == SHT_DYNSYM;
|
||||
return x->get_type() == SHT_DYNSYM;
|
||||
});
|
||||
|
||||
associate_code_object_symbols_with_host_allocation(
|
||||
|
||||
@@ -69,7 +69,6 @@ void MemcpyFunction::load(const char *fileName, const char *functionName)
|
||||
void MemcpyFunction::launch(int * dst, const int * src, size_t numElements, hipStream_t s)
|
||||
{
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
int* _dst;
|
||||
const int* _src;
|
||||
size_t _numElements;
|
||||
|
||||
Reference in New Issue
Block a user