Merge pull request #288 from AlexVlx/feature_purge_genco
Purge hsagenco.sh
[ROCm/clr commit: 1190a9e5d0]
This commit is contained in:
@@ -144,6 +144,13 @@ if (BUILD_HIPIFY_CLANG)
|
||||
add_subdirectory(hipify-clang)
|
||||
endif()
|
||||
|
||||
# Build LPL an CA (fat binary generation / fat binary decomposition tools) if
|
||||
# platform is hcc; do this before the ugly hijacking of the compiler, since no
|
||||
# HC code is involved.
|
||||
if (HIP_PLATFORM STREQUAL "hcc")
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
|
||||
endif ()
|
||||
|
||||
# Build hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
|
||||
@@ -93,8 +93,6 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
|
||||
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
|
||||
|
||||
$ROCM_TARGET=$ENV{'ROCM_TARGET'} // "gfx803";
|
||||
|
||||
# HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below)
|
||||
$HCC = "$HCC_HOME/bin/hcc";
|
||||
$HCCFLAGS = "-hc -D__HIPCC__ -I$HCC_HOME/include ";
|
||||
@@ -128,7 +126,7 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
# Force -stdlib=libc++ on UB14.04
|
||||
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"")
|
||||
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
|
||||
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
|
||||
or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) {
|
||||
$HIPCXXFLAGS .= " -stdlib=libc++";
|
||||
$setStdLib = 1;
|
||||
@@ -203,7 +201,7 @@ if ($verbose & 0x4) {
|
||||
# Handle code object generation
|
||||
my $ISACMD="";
|
||||
if($HIP_PLATFORM eq "hcc"){
|
||||
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
|
||||
$ISACMD .= "$HIP_PATH/bin/lpl ";
|
||||
if($ARGV[0] eq "--genco"){
|
||||
foreach $isaarg (@ARGV[1..$#ARGV]){
|
||||
$ISACMD .= " ";
|
||||
@@ -218,7 +216,6 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
}
|
||||
|
||||
if(($HIP_PLATFORM eq "hcc")){
|
||||
$EXPORT_LL=" ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
|
||||
$ENV{HIP_HC_IR_FILE}="";
|
||||
}
|
||||
|
||||
@@ -37,17 +37,16 @@ namespace hip_impl
|
||||
hsa_isa_t triple_to_hsa_isa(const std::string& triple);
|
||||
|
||||
struct Bundled_code {
|
||||
union {
|
||||
union Header {
|
||||
struct {
|
||||
std::uint64_t offset;
|
||||
std::uint64_t bundle_sz;
|
||||
std::uint64_t triple_sz;
|
||||
};
|
||||
std::uint8_t cbuf[
|
||||
sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
|
||||
};
|
||||
char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
|
||||
} header;
|
||||
std::string triple;
|
||||
std::vector<std::uint8_t> blob;
|
||||
std::vector<char> blob;
|
||||
};
|
||||
|
||||
class Bundled_code_header {
|
||||
@@ -57,14 +56,13 @@ namespace hip_impl
|
||||
static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1;
|
||||
|
||||
// DATA
|
||||
union {
|
||||
union Header_ {
|
||||
struct {
|
||||
std::uint8_t bundler_magic_string_[magic_string_sz_];
|
||||
char bundler_magic_string_[magic_string_sz_];
|
||||
std::uint64_t bundle_cnt_;
|
||||
};
|
||||
std::uint8_t cbuf_[
|
||||
sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
|
||||
};
|
||||
char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
|
||||
} header_;
|
||||
std::vector<Bundled_code> bundles_;
|
||||
|
||||
// FRIENDS - MANIPULATORS
|
||||
@@ -78,22 +76,24 @@ namespace hip_impl
|
||||
{
|
||||
if (f == l) return false;
|
||||
|
||||
std::copy_n(f, sizeof(x.cbuf_), x.cbuf_);
|
||||
std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_);
|
||||
|
||||
if (valid(x)) {
|
||||
x.bundles_.resize(x.bundle_cnt_);
|
||||
x.bundles_.resize(x.header_.bundle_cnt_);
|
||||
|
||||
auto it = f + sizeof(x.cbuf_);
|
||||
auto it = f + sizeof(x.header_.cbuf_);
|
||||
for (auto&& y : x.bundles_) {
|
||||
std::copy_n(it, sizeof(y.cbuf), y.cbuf);
|
||||
it += sizeof(y.cbuf);
|
||||
std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf);
|
||||
it += sizeof(y.header.cbuf);
|
||||
|
||||
y.triple.assign(it, it + y.triple_sz);
|
||||
y.triple.assign(it, it + y.header.triple_sz);
|
||||
|
||||
std::copy_n(
|
||||
f + y.offset, y.bundle_sz, std::back_inserter(y.blob));
|
||||
f + y.header.offset,
|
||||
y.header.bundle_sz,
|
||||
std::back_inserter(y.blob));
|
||||
|
||||
it += y.triple_sz;
|
||||
it += y.header.triple_sz;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -103,7 +103,7 @@ namespace hip_impl
|
||||
}
|
||||
friend
|
||||
inline
|
||||
bool read(const std::vector<std::uint8_t>& blob, Bundled_code_header& x)
|
||||
bool read(const std::vector<char>& blob, Bundled_code_header& x)
|
||||
{
|
||||
return read(blob.cbegin(), blob.cend(), x);
|
||||
}
|
||||
@@ -111,7 +111,7 @@ namespace hip_impl
|
||||
inline
|
||||
bool read(std::istream& is, Bundled_code_header& x)
|
||||
{
|
||||
return read(std::vector<std::uint8_t>{
|
||||
return read(std::vector<char>{
|
||||
std::istreambuf_iterator<char>{is},
|
||||
std::istreambuf_iterator<char>{}},
|
||||
x);
|
||||
@@ -123,9 +123,9 @@ namespace hip_impl
|
||||
bool valid(const Bundled_code_header& x)
|
||||
{
|
||||
return std::equal(
|
||||
x.bundler_magic_string_,
|
||||
x.bundler_magic_string_ + magic_string_sz_,
|
||||
x.magic_string_);
|
||||
magic_string_,
|
||||
magic_string_ + magic_string_sz_,
|
||||
x.header_.bundler_magic_string_);
|
||||
}
|
||||
friend
|
||||
inline
|
||||
@@ -139,7 +139,9 @@ namespace hip_impl
|
||||
template<typename RandomAccessIterator>
|
||||
Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l);
|
||||
explicit
|
||||
Bundled_code_header(const std::vector<std::uint8_t>& blob);
|
||||
Bundled_code_header(const std::vector<char>& blob);
|
||||
explicit
|
||||
Bundled_code_header(const void* maybe_blob);
|
||||
Bundled_code_header(const Bundled_code_header&) = default;
|
||||
Bundled_code_header(Bundled_code_header&&) = default;
|
||||
~Bundled_code_header() = default;
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
#-------------------------------------LPL--------------------------------------#
|
||||
add_executable(lpl lpl.cpp)
|
||||
set_target_properties(
|
||||
lpl PROPERTIES
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(lpl PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
target_compile_options(lpl PUBLIC -Wall)
|
||||
target_link_libraries(lpl PUBLIC pthread)
|
||||
|
||||
install(TARGETS lpl RUNTIME DESTINATION bin)
|
||||
#-------------------------------------LPL--------------------------------------#
|
||||
|
||||
#-------------------------------------CA---------------------------------------#
|
||||
add_executable(ca ca.cpp ${PROJECT_SOURCE_DIR}/src/code_object_bundle.cpp)
|
||||
set_target_properties(
|
||||
ca PROPERTIES
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(ca SYSTEM PUBLIC ${HSA_PATH}/include)
|
||||
target_include_directories(ca PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
find_library(
|
||||
hsart NAMES libhsa-runtime64.so libhsa-runtime64.so.1 HINTS ${HSA_PATH}/lib)
|
||||
target_link_libraries(ca PUBLIC ${hsart})
|
||||
target_compile_options(ca PUBLIC -Wall)
|
||||
|
||||
install(TARGETS ca RUNTIME DESTINATION bin)
|
||||
#-------------------------------------CA---------------------------------------#
|
||||
@@ -0,0 +1,49 @@
|
||||
#include "ca.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace clara;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try {
|
||||
bool help = false;
|
||||
vector<string> inputs;
|
||||
string targets;
|
||||
|
||||
auto cmd = cmdline_parser(help, inputs, targets);
|
||||
|
||||
const auto r = cmd.parse(Args{argc, argv});
|
||||
|
||||
if (!r) throw runtime_error{r.errorMessage()};
|
||||
|
||||
if (help) cout << cmd << endl;
|
||||
else {
|
||||
if (inputs.empty()) throw runtime_error{"No inputs specified."};
|
||||
|
||||
validate_inputs(inputs);
|
||||
|
||||
auto tmp = tokenize_targets(targets);
|
||||
if (tmp.empty()) {
|
||||
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
|
||||
}
|
||||
else validate_targets(tmp);
|
||||
|
||||
extract_code_objects(inputs, tmp);
|
||||
}
|
||||
}
|
||||
catch (const exception& ex) {
|
||||
cerr << ex.what() << endl;
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
|
||||
|
||||
#include "clara/clara.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
clara::Parser cmdline_parser(
|
||||
bool& help,
|
||||
std::vector<std::string>& inputs,
|
||||
std::string& targets)
|
||||
{
|
||||
return
|
||||
clara::Help{help} |
|
||||
clara::Arg{inputs, "a" + fat_binary_extension() + " etc."}(
|
||||
"fat binaries which contain the code objects to be extracted; "
|
||||
"the binary format of the file(s) is documented at: "
|
||||
"https://reviews.llvm.org/D13909; "
|
||||
"the code object format is documented at: "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") |
|
||||
clara::Opt{targets, "gfx803,gfx900 etc."}
|
||||
["-t"]["--targets"](
|
||||
"targets for which code objects are to be extracted from "
|
||||
"the fat binary; must be included in the set of processors "
|
||||
"with ROCm support from "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
|
||||
}
|
||||
|
||||
inline
|
||||
std::string make_code_object_file_name(
|
||||
const std::string& input, const std::string& target)
|
||||
{
|
||||
assert(!input.empty() && !target.empty());
|
||||
|
||||
auto r = input.substr(0, input.find(fat_binary_extension()));
|
||||
r += '_' + target + code_object_extension();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void extract_code_objects(
|
||||
const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& targets)
|
||||
{
|
||||
for (auto&& input : inputs) {
|
||||
std::ifstream tmp{input};
|
||||
std::vector<char> bundle{
|
||||
std::istreambuf_iterator<char>{tmp},
|
||||
std::istreambuf_iterator<char>{}};
|
||||
|
||||
Bundled_code_header tmp1{bundle};
|
||||
|
||||
if (!valid(tmp1)) {
|
||||
throw std::runtime_error{input + " is not a valid fat binary."};
|
||||
}
|
||||
|
||||
for (auto&& target : targets) {
|
||||
const auto it = std::find_if(
|
||||
bundles(tmp1).cbegin(),
|
||||
bundles(tmp1).cend(),
|
||||
[&](const Bundled_code& x) {
|
||||
return x.triple.find(target) != std::string::npos;
|
||||
});
|
||||
|
||||
if (it == bundles(tmp1).cend()) {
|
||||
std::cerr << "Warning: " << input
|
||||
<< " does not contain code for the " << target
|
||||
<< " target.";
|
||||
continue;
|
||||
}
|
||||
|
||||
std::ofstream out{make_code_object_file_name(input, target)};
|
||||
std::copy_n(
|
||||
it->blob.cbegin(),
|
||||
it->blob.size(),
|
||||
std::ostreambuf_iterator<char>{out});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void validate_inputs(const std::vector<std::string>& inputs)
|
||||
{
|
||||
const auto it = std::find_if_not(
|
||||
inputs.cbegin(), inputs.cend(), file_exists);
|
||||
|
||||
if (it != inputs.cend()) {
|
||||
throw std::runtime_error{
|
||||
"Non existent file " + *it + " passed as input."};
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
const std::unordered_set<std::string>& amdgpu_targets()
|
||||
{ // The evolving list lives at:
|
||||
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
|
||||
static const std::unordered_set<std::string> r{
|
||||
"gfx701", "gfx801", "gfx802", "gfx803", "gfx900"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& code_object_extension()
|
||||
{
|
||||
static const std::string r{".ffa"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& fat_binary_extension()
|
||||
{
|
||||
static const std::string r{".adipose"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
bool file_exists(const std::string& path_to)
|
||||
{
|
||||
return static_cast<bool>(std::ifstream{path_to});
|
||||
}
|
||||
|
||||
inline
|
||||
std::vector<std::string> tokenize_targets(const std::string& x)
|
||||
{ // TODO: move to regular expressions once we clarify the need to support
|
||||
// ancient standard library implementations.
|
||||
if (x.empty()) return {};
|
||||
|
||||
static constexpr const char valid_characters[] = "gfx0123456789,";
|
||||
|
||||
if (x.find_first_not_of(valid_characters) != std::string::npos) {
|
||||
throw std::runtime_error{"Invalid target string: " + x};
|
||||
}
|
||||
|
||||
std::vector<std::string> r;
|
||||
|
||||
auto it = x.cbegin();
|
||||
do {
|
||||
auto it1 = std::find(it, x.cend(), ',');
|
||||
r.emplace_back(it, it1);
|
||||
|
||||
if (it1 == x.cend()) break;
|
||||
|
||||
it = ++it1;
|
||||
} while (true);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void validate_targets(const std::vector<std::string>& x)
|
||||
{
|
||||
assert(!x.empty());
|
||||
|
||||
for (auto&& t : x) {
|
||||
static const std::string digits{"0123456789"};
|
||||
static const std::string pre{"gfx"};
|
||||
|
||||
if (t.find(pre) != 0 ||
|
||||
t.find_first_not_of(digits, pre.size()) != std::string::npos) {
|
||||
throw std::runtime_error{"Invalid target: " + t};
|
||||
}
|
||||
|
||||
if (amdgpu_targets().find(t) == amdgpu_targets().cend()) {
|
||||
std::cerr << "Warning: target " << t
|
||||
<< " has not been validated yet; it may be invalid."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // Namespace hip_impl.
|
||||
@@ -0,0 +1,56 @@
|
||||
#include "lpl.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace clara;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try {
|
||||
if (!hipcc_and_lpl_colocated()) {
|
||||
throw runtime_error{
|
||||
"The LPL executable and hipcc must be in the same directory."};
|
||||
}
|
||||
|
||||
bool help = false;
|
||||
string flags;
|
||||
string output;
|
||||
vector<string> sources;
|
||||
string targets;
|
||||
|
||||
auto cmd = cmdline_parser(help, sources, targets, flags, output);
|
||||
|
||||
const auto r = cmd.parse(Args{argc, argv});
|
||||
|
||||
if (!r) throw runtime_error{r.errorMessage()};
|
||||
|
||||
if (help) cout << cmd << endl;
|
||||
else {
|
||||
if (sources.empty()) throw runtime_error{"No inputs specified."};
|
||||
|
||||
auto tmp = tokenize_targets(targets);
|
||||
if (tmp.empty()) {
|
||||
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
|
||||
}
|
||||
else validate_targets(tmp);
|
||||
|
||||
if (output.empty()) for (auto&& x : tmp) output += x;
|
||||
|
||||
generate_fat_binary(sources, tmp, flags, output);
|
||||
}
|
||||
}
|
||||
catch (const exception& ex) {
|
||||
cerr << ex.what() << endl;
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#include "clara/clara.hpp"
|
||||
#include "pstreams/pstream.h"
|
||||
#include "../src/elfio/elfio.hpp"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
const std::string& kernel_section()
|
||||
{
|
||||
static const std::string r{".kernel"};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& path_to_self()
|
||||
{
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
|
||||
static std::string r;
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
using N = decltype(readlink(self, &r.front(), r.size()));
|
||||
|
||||
constexpr decltype(r.size()) max_path_sz{PATH_MAX};
|
||||
N read_cnt;
|
||||
do {
|
||||
r.resize(std::max(2 * r.size(), max_path_sz));
|
||||
read_cnt = readlink(self, &r.front(), r.size());
|
||||
} while (read_cnt == -1 && r.size() < r.max_size());
|
||||
|
||||
r.resize(std::max(read_cnt, N{0}));
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
const std::string& path_to_hipcc()
|
||||
{
|
||||
assert(!path_to_self().empty());
|
||||
|
||||
static const auto r = path_to_self().substr(
|
||||
0, path_to_self().find_last_of('/')) += "/hipcc";
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string make_hipcc_call(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& targets,
|
||||
const std::string& flags,
|
||||
const std::string& hipcc_output)
|
||||
{
|
||||
assert(!sources.empty() && !targets.empty() && !hipcc_output.empty());
|
||||
|
||||
std::string r{path_to_hipcc() + ' '};
|
||||
|
||||
for (auto&& x : sources) r += x + ' ';
|
||||
r += "-o " + hipcc_output + ' ';
|
||||
for (auto&& x : targets) r += "--amdgpu-target=" + x + ' ';
|
||||
r += flags + " -fPIC -shared";
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void copy_kernel_section_to_fat_binary(
|
||||
const std::string& tmp, const std::string& output)
|
||||
{
|
||||
ELFIO::elfio reader;
|
||||
if (!reader.load(tmp)) {
|
||||
throw std::runtime_error{
|
||||
"The result of the compilation is inaccessible."};
|
||||
}
|
||||
|
||||
const auto it = std::find_if(
|
||||
reader.sections.begin(),
|
||||
reader.sections.end(),
|
||||
[](const ELFIO::section* x) {
|
||||
return x->get_name() == kernel_section();
|
||||
});
|
||||
|
||||
std::ofstream out{output + fat_binary_extension()};
|
||||
|
||||
if (it == reader.sections.end()) {
|
||||
std::cerr << "Warning: no kernels were generated; fat binary shall "
|
||||
"be empty." << std::endl;
|
||||
}
|
||||
else {
|
||||
std::copy_n(
|
||||
(*it)->get_data(),
|
||||
(*it)->get_size(),
|
||||
std::ostreambuf_iterator<char>{out});
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void generate_fat_binary(
|
||||
const std::vector<std::string>& sources,
|
||||
const std::vector<std::string>& targets,
|
||||
const std::string& flags,
|
||||
const std::string& output)
|
||||
{
|
||||
static const auto d = [](const std::string* f) { remove(f->c_str()); };
|
||||
|
||||
std::unique_ptr<const std::string, decltype(d)> tmp{&output, d};
|
||||
|
||||
redi::ipstream hipcc{
|
||||
make_hipcc_call(sources, targets, flags, *tmp),
|
||||
redi::pstream::pstderr};
|
||||
|
||||
if (!hipcc.is_open()) {
|
||||
throw std::runtime_error{"Compiler invocation failed."};
|
||||
}
|
||||
|
||||
std::string log;
|
||||
while (std::getline(hipcc, log)) std::cout << log << '\n';
|
||||
|
||||
hipcc.close();
|
||||
|
||||
if (hipcc.rdbuf()->exited() &&
|
||||
hipcc.rdbuf()->status() != EXIT_SUCCESS) {
|
||||
throw std::runtime_error{"Compilation failed."};
|
||||
}
|
||||
|
||||
copy_kernel_section_to_fat_binary(*tmp, output);
|
||||
}
|
||||
|
||||
inline
|
||||
bool hipcc_and_lpl_colocated()
|
||||
{
|
||||
if (path_to_self().empty()) return false;
|
||||
|
||||
return file_exists(path_to_hipcc());
|
||||
}
|
||||
|
||||
inline
|
||||
clara::Parser cmdline_parser(
|
||||
bool& help,
|
||||
std::vector<std::string>& sources,
|
||||
std::string& targets,
|
||||
std::string& flags,
|
||||
std::string& output)
|
||||
{
|
||||
return
|
||||
clara::Opt{flags, "\"-v -DMACRO etc.\""}
|
||||
["-f"]["--flags"](
|
||||
"flags for compilation; must be valid for hipcc.") |
|
||||
clara::Help{help} |
|
||||
clara::Opt{output, "filename"}
|
||||
["-o"]["--output"](
|
||||
"name of fat-binary output file; the binary format of the "
|
||||
"file is documented at: https://reviews.llvm.org/D13909.") |
|
||||
clara::Arg{sources, "a.cpp b.cpp etc."}(
|
||||
"inputs for compilation; must contain valid C++ code.") |
|
||||
clara::Opt{targets, "gfx803,gfx900 etc."}
|
||||
["-t"]["--targets"](
|
||||
"targets for AMDGPU lowering; must be included in the set "
|
||||
"of processors with ROCm support from "
|
||||
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
|
||||
}
|
||||
}
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
@@ -72,7 +72,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN<<2
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
#define HIP_CHECK(status) \
|
||||
@@ -68,7 +68,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
extern "C" __global__ void hello_world(float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
|
||||
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
#define LEN 64
|
||||
#define SIZE LEN*sizeof(float)
|
||||
|
||||
#define fileName "vcpy_kernel.code"
|
||||
#define fileName "vcpy_kernel.code.adipose"
|
||||
float myDeviceGlobal;
|
||||
float myDeviceGlobalArray[16];
|
||||
#define HIP_CHECK(cmd) \
|
||||
@@ -80,7 +80,6 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6]; // genco path + wrapper-gen pass use hidden arguments.
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
@@ -111,7 +110,7 @@ int main(){
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
{
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
@@ -136,7 +135,7 @@ int main(){
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
{
|
||||
hipFunction_t Function;
|
||||
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
|
||||
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
|
||||
|
||||
@@ -27,13 +27,13 @@ THE SOFTWARE.
|
||||
extern float myDeviceGlobal;
|
||||
extern float myDeviceGlobalArray[16];;
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, const float *a, float *b)
|
||||
extern "C" __global__ void hello_world(const float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
extern "C" __global__ void test_globals(hipLaunchParm lp, const float *a, float *b)
|
||||
extern "C" __global__ void test_globals(const float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
b[tx] = a[tx] + myDeviceGlobal+ myDeviceGlobalArray[tx%ARRAY_SIZE] ;
|
||||
|
||||
@@ -2,22 +2,26 @@
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
|
||||
{
|
||||
static constexpr const char prefix[] = "hcc-amdgcn--amdhsa-gfx";
|
||||
static constexpr std::size_t prefix_sz = sizeof(prefix) - 1;
|
||||
static constexpr size_t prefix_sz = sizeof(prefix) - 1;
|
||||
|
||||
hsa_isa_t r = {};
|
||||
|
||||
auto idx = triple.find(prefix);
|
||||
|
||||
if (idx != std::string::npos) {
|
||||
if (idx != string::npos) {
|
||||
idx += prefix_sz;
|
||||
std::string tmp = "AMD:AMDGPU";
|
||||
string tmp = "AMD:AMDGPU";
|
||||
while (idx != triple.size()) {
|
||||
tmp.push_back(':');
|
||||
tmp.push_back(triple[idx++]);
|
||||
@@ -33,7 +37,31 @@ hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
|
||||
constexpr const char hip_impl::Bundled_code_header::magic_string_[];
|
||||
|
||||
// CREATORS
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(
|
||||
const std::vector<std::uint8_t>& x)
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(const vector<char>& x)
|
||||
: Bundled_code_header{x.cbegin(), x.cend()}
|
||||
{}
|
||||
{}
|
||||
|
||||
hip_impl::Bundled_code_header::Bundled_code_header(const void* p)
|
||||
{ // This is a pretty terrible interface, useful only because
|
||||
// hipLoadModuleData is so poorly specified (for no fault of its own).
|
||||
if (!p) return;
|
||||
|
||||
auto ph = static_cast<const Header_*>(p);
|
||||
|
||||
if (!equal(
|
||||
magic_string_,
|
||||
magic_string_ + magic_string_sz_,
|
||||
ph->bundler_magic_string_)) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header);
|
||||
auto pb = static_cast<const char*>(p) + sizeof(Header_);
|
||||
auto n = ph->bundle_cnt_;
|
||||
while (n--) {
|
||||
sz += reinterpret_cast<const Bundled_code::Header*>(pb)->bundle_sz;
|
||||
pb += sizeof(Bundled_code::Header);
|
||||
}
|
||||
|
||||
read(static_cast<const char*>(p), static_cast<const char*>(p) + sz, *this);
|
||||
}
|
||||
@@ -780,7 +780,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
|
||||
array[0]->depth = extent.depth;
|
||||
array[0]->desc = *desc;
|
||||
array[0]->isDrv = false;
|
||||
array[0]->textureType = hipTextureType3D;
|
||||
array[0]->textureType = hipTextureType3D;
|
||||
void ** ptr = &array[0]->data;
|
||||
|
||||
if (ctx) {
|
||||
@@ -1553,6 +1553,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template<
|
||||
|
||||
@@ -46,7 +46,7 @@ THE SOFTWARE.
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
|
||||
//TODO Use Pool APIs from HCC to get memory regions.
|
||||
|
||||
using namespace ELFIO;
|
||||
@@ -96,23 +96,6 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\
|
||||
return ihipLogStatus(hipStatus);\
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
{
|
||||
HIP_INIT_API(module, fname);
|
||||
|
||||
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
ifstream file{fname};
|
||||
|
||||
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
vector<char> tmp{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
return hipModuleLoadData(module, tmp.data());
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
{
|
||||
HIP_INIT_API(hmod);
|
||||
@@ -473,6 +456,29 @@ namespace
|
||||
|
||||
return string{s, s + sz};
|
||||
}
|
||||
|
||||
string code_object_blob_for_agent(
|
||||
const void* maybe_bundled_code, hsa_agent_t agent)
|
||||
{
|
||||
if (!maybe_bundled_code) return {};
|
||||
|
||||
Bundled_code_header tmp{maybe_bundled_code};
|
||||
|
||||
if (!valid(tmp)) return {};
|
||||
|
||||
const auto agent_isa = isa(agent);
|
||||
|
||||
const auto it = find_if(
|
||||
bundles(tmp).cbegin(),
|
||||
bundles(tmp).cend(),
|
||||
[=](const Bundled_code& x) {
|
||||
return agent_isa == triple_to_hsa_isa(x.triple);;
|
||||
});
|
||||
|
||||
if (it == bundles(tmp).cend()) return {};
|
||||
|
||||
return string{it->blob.cbegin(), it->blob.cend()};
|
||||
}
|
||||
} // Anonymous namespace, internal linkage.
|
||||
|
||||
hipError_t ihipModuleGetFunction(
|
||||
@@ -526,6 +532,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
return ihipLogStatus(r);
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
{
|
||||
HIP_INIT_API(module, fname);
|
||||
|
||||
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
ifstream file{fname};
|
||||
|
||||
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
vector<char> tmp{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
return hipModuleLoadData(module, tmp.data());
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(module, image);
|
||||
@@ -543,8 +565,12 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
nullptr,
|
||||
&(*module)->executable);
|
||||
|
||||
auto tmp = code_object_blob_for_agent(image, this_agent());
|
||||
|
||||
(*module)->executable = hip_impl::load_executable(
|
||||
read_elf_file_as_string(image), (*module)->executable, this_agent());
|
||||
tmp.empty() ? read_elf_file_as_string(image) : tmp,
|
||||
(*module)->executable,
|
||||
this_agent());
|
||||
|
||||
return ihipLogStatus(
|
||||
(*module)->executable.handle ? hipSuccess : hipErrorUnknown);
|
||||
|
||||
@@ -24,8 +24,27 @@ THE SOFTWARE.
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
inline
|
||||
constexpr
|
||||
bool operator==(hsa_isa_t x, hsa_isa_t y)
|
||||
{
|
||||
return x.handle == y.handle;
|
||||
}
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<hsa_isa_t> {
|
||||
size_t operator()(hsa_isa_t x) const
|
||||
{
|
||||
return hash<decltype(x.handle)>{}(x.handle);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
@@ -57,6 +76,19 @@ namespace hip_impl
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_isa_t isa(hsa_agent_t x)
|
||||
{
|
||||
hsa_isa_t r = {};
|
||||
hsa_agent_iterate_isas(x, [](hsa_isa_t i, void* o) {
|
||||
*static_cast<hsa_isa_t*>(o) = i; // Pick the first.
|
||||
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint64_t kernel_object(hsa_executable_symbol_t x)
|
||||
{
|
||||
|
||||
@@ -30,34 +30,16 @@ using namespace ELFIO;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<hsa_isa_t> {
|
||||
size_t operator()(hsa_isa_t x) const
|
||||
{
|
||||
return hash<decltype(x.handle)>{}(x.handle);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
inline
|
||||
constexpr
|
||||
bool operator==(hsa_isa_t x, hsa_isa_t y)
|
||||
{
|
||||
return x.handle == y.handle;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Symbol {
|
||||
std::string name;
|
||||
string name;
|
||||
ELFIO::Elf64_Addr value = 0;
|
||||
ELFIO::Elf_Xword size = 0;
|
||||
ELFIO::Elf_Half sect_idx = 0;
|
||||
std::uint8_t bind = 0;
|
||||
std::uint8_t type = 0;
|
||||
std::uint8_t other = 0;
|
||||
Elf_Xword size = 0;
|
||||
Elf_Half sect_idx = 0;
|
||||
uint8_t bind = 0;
|
||||
uint8_t type = 0;
|
||||
uint8_t other = 0;
|
||||
};
|
||||
|
||||
inline
|
||||
@@ -184,7 +166,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
vector<uint8_t> code_object_blob_for_process()
|
||||
vector<char> code_object_blob_for_process()
|
||||
{
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
static constexpr const char kernel_section[] = ".kernel";
|
||||
@@ -199,7 +181,7 @@ namespace
|
||||
return x->get_name() == kernel_section;
|
||||
});
|
||||
|
||||
vector<uint8_t> r;
|
||||
vector<char> r;
|
||||
if (kernels) {
|
||||
r.insert(
|
||||
r.end(),
|
||||
@@ -210,13 +192,13 @@ namespace
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<hsa_isa_t, vector<vector<uint8_t>>>& code_object_blobs()
|
||||
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs()
|
||||
{
|
||||
static unordered_map<hsa_isa_t, vector<vector<uint8_t>>> r;
|
||||
static unordered_map<hsa_isa_t, vector<vector<char>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
static vector<vector<uint8_t>> blobs{
|
||||
static vector<vector<char>> blobs{
|
||||
code_object_blob_for_process()};
|
||||
|
||||
dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) {
|
||||
@@ -480,7 +462,7 @@ namespace hip_impl
|
||||
|
||||
const auto code_object_dynsym =
|
||||
find_section_if(reader, [](const ELFIO::section* x) {
|
||||
return x->get_type() == SHT_DYNSYM;
|
||||
return x->get_type() == SHT_DYNSYM;
|
||||
});
|
||||
|
||||
associate_code_object_symbols_with_host_allocation(
|
||||
|
||||
@@ -69,7 +69,6 @@ void MemcpyFunction::load(const char *fileName, const char *functionName)
|
||||
void MemcpyFunction::launch(int * dst, const int * src, size_t numElements, hipStream_t s)
|
||||
{
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
int* _dst;
|
||||
const int* _src;
|
||||
size_t _numElements;
|
||||
|
||||
Reference in New Issue
Block a user