Merge pull request #288 from AlexVlx/feature_purge_genco

Purge hsagenco.sh

[ROCm/clr commit: 1190a9e5d0]
This commit is contained in:
Maneesh Gupta
2018-03-05 09:59:56 +05:30
committed by GitHub
melakukan 274c65457e
22 mengubah file dengan 4169 tambahan dan 97 penghapusan
+7
Melihat File
@@ -144,6 +144,13 @@ if (BUILD_HIPIFY_CLANG)
add_subdirectory(hipify-clang)
endif()
# Build LPL an CA (fat binary generation / fat binary decomposition tools) if
# platform is hcc; do this before the ugly hijacking of the compiler, since no
# HC code is involved.
if (HIP_PLATFORM STREQUAL "hcc")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
endif ()
# Build hip_hcc if platform is hcc
if(HIP_PLATFORM STREQUAL "hcc")
include_directories(${PROJECT_SOURCE_DIR}/include)
+2 -5
Melihat File
@@ -93,8 +93,6 @@ if ($HIP_PLATFORM eq "hcc") {
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
$ROCM_TARGET=$ENV{'ROCM_TARGET'} // "gfx803";
# HCC* may be used to compile src/hip_hcc.o (and also feed the HIPCXXFLAGS below)
$HCC = "$HCC_HOME/bin/hcc";
$HCCFLAGS = "-hc -D__HIPCC__ -I$HCC_HOME/include ";
@@ -128,7 +126,7 @@ if ($HIP_PLATFORM eq "hcc") {
# Force -stdlib=libc++ on UB14.04
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"")
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) {
$HIPCXXFLAGS .= " -stdlib=libc++";
$setStdLib = 1;
@@ -203,7 +201,7 @@ if ($verbose & 0x4) {
# Handle code object generation
my $ISACMD="";
if($HIP_PLATFORM eq "hcc"){
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
$ISACMD .= "$HIP_PATH/bin/lpl ";
if($ARGV[0] eq "--genco"){
foreach $isaarg (@ARGV[1..$#ARGV]){
$ISACMD .= " ";
@@ -218,7 +216,6 @@ if($HIP_PLATFORM eq "hcc"){
}
if(($HIP_PLATFORM eq "hcc")){
$EXPORT_LL=" ";
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
$ENV{HIP_HC_IR_FILE}="";
}
@@ -37,17 +37,16 @@ namespace hip_impl
hsa_isa_t triple_to_hsa_isa(const std::string& triple);
struct Bundled_code {
union {
union Header {
struct {
std::uint64_t offset;
std::uint64_t bundle_sz;
std::uint64_t triple_sz;
};
std::uint8_t cbuf[
sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
};
char cbuf[sizeof(offset) + sizeof(bundle_sz) + sizeof(triple_sz)];
} header;
std::string triple;
std::vector<std::uint8_t> blob;
std::vector<char> blob;
};
class Bundled_code_header {
@@ -57,14 +56,13 @@ namespace hip_impl
static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1;
// DATA
union {
union Header_ {
struct {
std::uint8_t bundler_magic_string_[magic_string_sz_];
char bundler_magic_string_[magic_string_sz_];
std::uint64_t bundle_cnt_;
};
std::uint8_t cbuf_[
sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
};
char cbuf_[sizeof(bundler_magic_string_) + sizeof(bundle_cnt_)];
} header_;
std::vector<Bundled_code> bundles_;
// FRIENDS - MANIPULATORS
@@ -78,22 +76,24 @@ namespace hip_impl
{
if (f == l) return false;
std::copy_n(f, sizeof(x.cbuf_), x.cbuf_);
std::copy_n(f, sizeof(x.header_.cbuf_), x.header_.cbuf_);
if (valid(x)) {
x.bundles_.resize(x.bundle_cnt_);
x.bundles_.resize(x.header_.bundle_cnt_);
auto it = f + sizeof(x.cbuf_);
auto it = f + sizeof(x.header_.cbuf_);
for (auto&& y : x.bundles_) {
std::copy_n(it, sizeof(y.cbuf), y.cbuf);
it += sizeof(y.cbuf);
std::copy_n(it, sizeof(y.header.cbuf), y.header.cbuf);
it += sizeof(y.header.cbuf);
y.triple.assign(it, it + y.triple_sz);
y.triple.assign(it, it + y.header.triple_sz);
std::copy_n(
f + y.offset, y.bundle_sz, std::back_inserter(y.blob));
f + y.header.offset,
y.header.bundle_sz,
std::back_inserter(y.blob));
it += y.triple_sz;
it += y.header.triple_sz;
}
return true;
@@ -103,7 +103,7 @@ namespace hip_impl
}
friend
inline
bool read(const std::vector<std::uint8_t>& blob, Bundled_code_header& x)
bool read(const std::vector<char>& blob, Bundled_code_header& x)
{
return read(blob.cbegin(), blob.cend(), x);
}
@@ -111,7 +111,7 @@ namespace hip_impl
inline
bool read(std::istream& is, Bundled_code_header& x)
{
return read(std::vector<std::uint8_t>{
return read(std::vector<char>{
std::istreambuf_iterator<char>{is},
std::istreambuf_iterator<char>{}},
x);
@@ -123,9 +123,9 @@ namespace hip_impl
bool valid(const Bundled_code_header& x)
{
return std::equal(
x.bundler_magic_string_,
x.bundler_magic_string_ + magic_string_sz_,
x.magic_string_);
magic_string_,
magic_string_ + magic_string_sz_,
x.header_.bundler_magic_string_);
}
friend
inline
@@ -139,7 +139,9 @@ namespace hip_impl
template<typename RandomAccessIterator>
Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l);
explicit
Bundled_code_header(const std::vector<std::uint8_t>& blob);
Bundled_code_header(const std::vector<char>& blob);
explicit
Bundled_code_header(const void* maybe_blob);
Bundled_code_header(const Bundled_code_header&) = default;
Bundled_code_header(Bundled_code_header&&) = default;
~Bundled_code_header() = default;
@@ -0,0 +1,32 @@
#-------------------------------------LPL--------------------------------------#
add_executable(lpl lpl.cpp)
set_target_properties(
lpl PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(lpl PUBLIC ${PROJECT_SOURCE_DIR}/src)
target_compile_options(lpl PUBLIC -Wall)
target_link_libraries(lpl PUBLIC pthread)
install(TARGETS lpl RUNTIME DESTINATION bin)
#-------------------------------------LPL--------------------------------------#
#-------------------------------------CA---------------------------------------#
add_executable(ca ca.cpp ${PROJECT_SOURCE_DIR}/src/code_object_bundle.cpp)
set_target_properties(
ca PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(ca SYSTEM PUBLIC ${HSA_PATH}/include)
target_include_directories(ca PUBLIC ${PROJECT_SOURCE_DIR}/src)
find_library(
hsart NAMES libhsa-runtime64.so libhsa-runtime64.so.1 HINTS ${HSA_PATH}/lib)
target_link_libraries(ca PUBLIC ${hsart})
target_compile_options(ca PUBLIC -Wall)
install(TARGETS ca RUNTIME DESTINATION bin)
#-------------------------------------CA---------------------------------------#
+49
Melihat File
@@ -0,0 +1,49 @@
#include "ca.hpp"
#include <cstdlib>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
using namespace clara;
using namespace hip_impl;
using namespace std;
int main(int argc, char** argv)
{
try {
bool help = false;
vector<string> inputs;
string targets;
auto cmd = cmdline_parser(help, inputs, targets);
const auto r = cmd.parse(Args{argc, argv});
if (!r) throw runtime_error{r.errorMessage()};
if (help) cout << cmd << endl;
else {
if (inputs.empty()) throw runtime_error{"No inputs specified."};
validate_inputs(inputs);
auto tmp = tokenize_targets(targets);
if (tmp.empty()) {
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
}
else validate_targets(tmp);
extract_code_objects(inputs, tmp);
}
}
catch (const exception& ex) {
cerr << ex.what() << endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
+104
Melihat File
@@ -0,0 +1,104 @@
#pragma once
#include "common.hpp"
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
#include "clara/clara.hpp"
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <stdexcept>
#include <string>
#include <vector>
namespace hip_impl
{
inline
clara::Parser cmdline_parser(
bool& help,
std::vector<std::string>& inputs,
std::string& targets)
{
return
clara::Help{help} |
clara::Arg{inputs, "a" + fat_binary_extension() + " etc."}(
"fat binaries which contain the code objects to be extracted; "
"the binary format of the file(s) is documented at: "
"https://reviews.llvm.org/D13909; "
"the code object format is documented at: "
"https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") |
clara::Opt{targets, "gfx803,gfx900 etc."}
["-t"]["--targets"](
"targets for which code objects are to be extracted from "
"the fat binary; must be included in the set of processors "
"with ROCm support from "
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
}
inline
std::string make_code_object_file_name(
const std::string& input, const std::string& target)
{
assert(!input.empty() && !target.empty());
auto r = input.substr(0, input.find(fat_binary_extension()));
r += '_' + target + code_object_extension();
return r;
}
inline
void extract_code_objects(
const std::vector<std::string>& inputs,
const std::vector<std::string>& targets)
{
for (auto&& input : inputs) {
std::ifstream tmp{input};
std::vector<char> bundle{
std::istreambuf_iterator<char>{tmp},
std::istreambuf_iterator<char>{}};
Bundled_code_header tmp1{bundle};
if (!valid(tmp1)) {
throw std::runtime_error{input + " is not a valid fat binary."};
}
for (auto&& target : targets) {
const auto it = std::find_if(
bundles(tmp1).cbegin(),
bundles(tmp1).cend(),
[&](const Bundled_code& x) {
return x.triple.find(target) != std::string::npos;
});
if (it == bundles(tmp1).cend()) {
std::cerr << "Warning: " << input
<< " does not contain code for the " << target
<< " target.";
continue;
}
std::ofstream out{make_code_object_file_name(input, target)};
std::copy_n(
it->blob.cbegin(),
it->blob.size(),
std::ostreambuf_iterator<char>{out});
}
}
}
inline
void validate_inputs(const std::vector<std::string>& inputs)
{
const auto it = std::find_if_not(
inputs.cbegin(), inputs.cend(), file_exists);
if (it != inputs.cend()) {
throw std::runtime_error{
"Non existent file " + *it + " passed as input."};
}
}
}
File diff ditekan karena terlalu besar Load Diff
+93
Melihat File
@@ -0,0 +1,93 @@
#pragma once
#include <algorithm>
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <unordered_set>
#include <vector>
namespace hip_impl
{
inline
const std::unordered_set<std::string>& amdgpu_targets()
{ // The evolving list lives at:
// https://www.llvm.org/docs/AMDGPUUsage.html#processors.
static const std::unordered_set<std::string> r{
"gfx701", "gfx801", "gfx802", "gfx803", "gfx900"};
return r;
}
inline
const std::string& code_object_extension()
{
static const std::string r{".ffa"};
return r;
}
inline
const std::string& fat_binary_extension()
{
static const std::string r{".adipose"};
return r;
}
inline
bool file_exists(const std::string& path_to)
{
return static_cast<bool>(std::ifstream{path_to});
}
inline
std::vector<std::string> tokenize_targets(const std::string& x)
{ // TODO: move to regular expressions once we clarify the need to support
// ancient standard library implementations.
if (x.empty()) return {};
static constexpr const char valid_characters[] = "gfx0123456789,";
if (x.find_first_not_of(valid_characters) != std::string::npos) {
throw std::runtime_error{"Invalid target string: " + x};
}
std::vector<std::string> r;
auto it = x.cbegin();
do {
auto it1 = std::find(it, x.cend(), ',');
r.emplace_back(it, it1);
if (it1 == x.cend()) break;
it = ++it1;
} while (true);
return r;
}
inline
void validate_targets(const std::vector<std::string>& x)
{
assert(!x.empty());
for (auto&& t : x) {
static const std::string digits{"0123456789"};
static const std::string pre{"gfx"};
if (t.find(pre) != 0 ||
t.find_first_not_of(digits, pre.size()) != std::string::npos) {
throw std::runtime_error{"Invalid target: " + t};
}
if (amdgpu_targets().find(t) == amdgpu_targets().cend()) {
std::cerr << "Warning: target " << t
<< " has not been validated yet; it may be invalid."
<< std::endl;
}
}
}
} // Namespace hip_impl.
+56
Melihat File
@@ -0,0 +1,56 @@
#include "lpl.hpp"
#include <cstdlib>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
using namespace clara;
using namespace hip_impl;
using namespace std;
int main(int argc, char** argv)
{
try {
if (!hipcc_and_lpl_colocated()) {
throw runtime_error{
"The LPL executable and hipcc must be in the same directory."};
}
bool help = false;
string flags;
string output;
vector<string> sources;
string targets;
auto cmd = cmdline_parser(help, sources, targets, flags, output);
const auto r = cmd.parse(Args{argc, argv});
if (!r) throw runtime_error{r.errorMessage()};
if (help) cout << cmd << endl;
else {
if (sources.empty()) throw runtime_error{"No inputs specified."};
auto tmp = tokenize_targets(targets);
if (tmp.empty()) {
tmp.assign(amdgpu_targets().cbegin(), amdgpu_targets().cend());
}
else validate_targets(tmp);
if (output.empty()) for (auto&& x : tmp) output += x;
generate_fat_binary(sources, tmp, flags, output);
}
}
catch (const exception& ex) {
cerr << ex.what() << endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
+181
Melihat File
@@ -0,0 +1,181 @@
#pragma once
#include "common.hpp"
#include "clara/clara.hpp"
#include "pstreams/pstream.h"
#include "../src/elfio/elfio.hpp"
#include <unistd.h>
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstdlib>
#include <mutex>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
namespace hip_impl
{
inline
const std::string& kernel_section()
{
static const std::string r{".kernel"};
return r;
}
inline
const std::string& path_to_self()
{
static constexpr const char self[] = "/proc/self/exe";
static std::string r;
static std::once_flag f;
std::call_once(f, []() {
using N = decltype(readlink(self, &r.front(), r.size()));
constexpr decltype(r.size()) max_path_sz{PATH_MAX};
N read_cnt;
do {
r.resize(std::max(2 * r.size(), max_path_sz));
read_cnt = readlink(self, &r.front(), r.size());
} while (read_cnt == -1 && r.size() < r.max_size());
r.resize(std::max(read_cnt, N{0}));
});
return r;
}
inline
const std::string& path_to_hipcc()
{
assert(!path_to_self().empty());
static const auto r = path_to_self().substr(
0, path_to_self().find_last_of('/')) += "/hipcc";
return r;
}
inline
std::string make_hipcc_call(
const std::vector<std::string>& sources,
const std::vector<std::string>& targets,
const std::string& flags,
const std::string& hipcc_output)
{
assert(!sources.empty() && !targets.empty() && !hipcc_output.empty());
std::string r{path_to_hipcc() + ' '};
for (auto&& x : sources) r += x + ' ';
r += "-o " + hipcc_output + ' ';
for (auto&& x : targets) r += "--amdgpu-target=" + x + ' ';
r += flags + " -fPIC -shared";
return r;
}
inline
void copy_kernel_section_to_fat_binary(
const std::string& tmp, const std::string& output)
{
ELFIO::elfio reader;
if (!reader.load(tmp)) {
throw std::runtime_error{
"The result of the compilation is inaccessible."};
}
const auto it = std::find_if(
reader.sections.begin(),
reader.sections.end(),
[](const ELFIO::section* x) {
return x->get_name() == kernel_section();
});
std::ofstream out{output + fat_binary_extension()};
if (it == reader.sections.end()) {
std::cerr << "Warning: no kernels were generated; fat binary shall "
"be empty." << std::endl;
}
else {
std::copy_n(
(*it)->get_data(),
(*it)->get_size(),
std::ostreambuf_iterator<char>{out});
}
}
inline
void generate_fat_binary(
const std::vector<std::string>& sources,
const std::vector<std::string>& targets,
const std::string& flags,
const std::string& output)
{
static const auto d = [](const std::string* f) { remove(f->c_str()); };
std::unique_ptr<const std::string, decltype(d)> tmp{&output, d};
redi::ipstream hipcc{
make_hipcc_call(sources, targets, flags, *tmp),
redi::pstream::pstderr};
if (!hipcc.is_open()) {
throw std::runtime_error{"Compiler invocation failed."};
}
std::string log;
while (std::getline(hipcc, log)) std::cout << log << '\n';
hipcc.close();
if (hipcc.rdbuf()->exited() &&
hipcc.rdbuf()->status() != EXIT_SUCCESS) {
throw std::runtime_error{"Compilation failed."};
}
copy_kernel_section_to_fat_binary(*tmp, output);
}
inline
bool hipcc_and_lpl_colocated()
{
if (path_to_self().empty()) return false;
return file_exists(path_to_hipcc());
}
inline
clara::Parser cmdline_parser(
bool& help,
std::vector<std::string>& sources,
std::string& targets,
std::string& flags,
std::string& output)
{
return
clara::Opt{flags, "\"-v -DMACRO etc.\""}
["-f"]["--flags"](
"flags for compilation; must be valid for hipcc.") |
clara::Help{help} |
clara::Opt{output, "filename"}
["-o"]["--output"](
"name of fat-binary output file; the binary format of the "
"file is documented at: https://reviews.llvm.org/D13909.") |
clara::Arg{sources, "a.cpp b.cpp etc."}(
"inputs for compilation; must contain valid C++ code.") |
clara::Opt{targets, "gfx803,gfx900 etc."}
["-t"]["--targets"](
"targets for AMDGPU lowering; must be included in the set "
"of processors with ROCm support from "
"https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
}
}
File diff ditekan karena terlalu besar Load Diff
@@ -35,7 +35,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN<<2
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
@@ -72,7 +72,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6];
void * _Ad;
void * _Bd;
} args;
@@ -30,7 +30,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN<<2
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
#define kernel_name "hello_world"
#define HIP_CHECK(status) \
@@ -68,7 +68,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6];
void * _Ad;
void * _Bd;
} args;
@@ -22,7 +22,7 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
extern "C" __global__ void hello_world(float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx];
@@ -30,7 +30,7 @@ THE SOFTWARE.
#define LEN 64
#define SIZE LEN*sizeof(float)
#define fileName "vcpy_kernel.code"
#define fileName "vcpy_kernel.code.adipose"
float myDeviceGlobal;
float myDeviceGlobalArray[16];
#define HIP_CHECK(cmd) \
@@ -80,7 +80,6 @@ int main(){
uint32_t one = 1;
struct {
uint32_t _hidden[6]; // genco path + wrapper-gen pass use hidden arguments.
void * _Ad;
void * _Bd;
} args;
@@ -111,7 +110,7 @@ int main(){
HIP_LAUNCH_PARAM_END
};
{
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
@@ -136,7 +135,7 @@ int main(){
};
}
{
{
hipFunction_t Function;
HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals"));
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
@@ -27,13 +27,13 @@ THE SOFTWARE.
extern float myDeviceGlobal;
extern float myDeviceGlobalArray[16];;
extern "C" __global__ void hello_world(hipLaunchParm lp, const float *a, float *b)
extern "C" __global__ void hello_world(const float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx];
}
extern "C" __global__ void test_globals(hipLaunchParm lp, const float *a, float *b)
extern "C" __global__ void test_globals(const float *a, float *b)
{
int tx = hipThreadIdx_x;
b[tx] = a[tx] + myDeviceGlobal+ myDeviceGlobalArray[tx%ARRAY_SIZE] ;
@@ -2,22 +2,26 @@
#include <hsa/hsa.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
using namespace std;
hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
{
static constexpr const char prefix[] = "hcc-amdgcn--amdhsa-gfx";
static constexpr std::size_t prefix_sz = sizeof(prefix) - 1;
static constexpr size_t prefix_sz = sizeof(prefix) - 1;
hsa_isa_t r = {};
auto idx = triple.find(prefix);
if (idx != std::string::npos) {
if (idx != string::npos) {
idx += prefix_sz;
std::string tmp = "AMD:AMDGPU";
string tmp = "AMD:AMDGPU";
while (idx != triple.size()) {
tmp.push_back(':');
tmp.push_back(triple[idx++]);
@@ -33,7 +37,31 @@ hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple)
constexpr const char hip_impl::Bundled_code_header::magic_string_[];
// CREATORS
hip_impl::Bundled_code_header::Bundled_code_header(
const std::vector<std::uint8_t>& x)
hip_impl::Bundled_code_header::Bundled_code_header(const vector<char>& x)
: Bundled_code_header{x.cbegin(), x.cend()}
{}
{}
hip_impl::Bundled_code_header::Bundled_code_header(const void* p)
{ // This is a pretty terrible interface, useful only because
// hipLoadModuleData is so poorly specified (for no fault of its own).
if (!p) return;
auto ph = static_cast<const Header_*>(p);
if (!equal(
magic_string_,
magic_string_ + magic_string_sz_,
ph->bundler_magic_string_)) {
return;
}
size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header);
auto pb = static_cast<const char*>(p) + sizeof(Header_);
auto n = ph->bundle_cnt_;
while (n--) {
sz += reinterpret_cast<const Bundled_code::Header*>(pb)->bundle_sz;
pb += sizeof(Bundled_code::Header);
}
read(static_cast<const char*>(p), static_cast<const char*>(p) + sz, *this);
}
+2 -1
Melihat File
@@ -780,7 +780,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
array[0]->depth = extent.depth;
array[0]->desc = *desc;
array[0]->isDrv = false;
array[0]->textureType = hipTextureType3D;
array[0]->textureType = hipTextureType3D;
void ** ptr = &array[0]->data;
if (ctx) {
@@ -1553,6 +1553,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
}
return ihipLogStatus(e);
}
namespace
{
template<
+45 -19
Melihat File
@@ -46,7 +46,7 @@ THE SOFTWARE.
#include <unordered_map>
#include <utility>
#include <vector>
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
//TODO Use Pool APIs from HCC to get memory regions.
using namespace ELFIO;
@@ -96,23 +96,6 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\
return ihipLogStatus(hipStatus);\
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
{
HIP_INIT_API(module, fname);
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
ifstream file{fname};
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
vector<char> tmp{
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
return hipModuleLoadData(module, tmp.data());
}
hipError_t hipModuleUnload(hipModule_t hmod)
{
HIP_INIT_API(hmod);
@@ -473,6 +456,29 @@ namespace
return string{s, s + sz};
}
string code_object_blob_for_agent(
const void* maybe_bundled_code, hsa_agent_t agent)
{
if (!maybe_bundled_code) return {};
Bundled_code_header tmp{maybe_bundled_code};
if (!valid(tmp)) return {};
const auto agent_isa = isa(agent);
const auto it = find_if(
bundles(tmp).cbegin(),
bundles(tmp).cend(),
[=](const Bundled_code& x) {
return agent_isa == triple_to_hsa_isa(x.triple);;
});
if (it == bundles(tmp).cend()) return {};
return string{it->blob.cbegin(), it->blob.cend()};
}
} // Anonymous namespace, internal linkage.
hipError_t ihipModuleGetFunction(
@@ -526,6 +532,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
return ihipLogStatus(r);
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
{
HIP_INIT_API(module, fname);
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
ifstream file{fname};
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
vector<char> tmp{
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
return hipModuleLoadData(module, tmp.data());
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
HIP_INIT_API(module, image);
@@ -543,8 +565,12 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
nullptr,
&(*module)->executable);
auto tmp = code_object_blob_for_agent(image, this_agent());
(*module)->executable = hip_impl::load_executable(
read_elf_file_as_string(image), (*module)->executable, this_agent());
tmp.empty() ? read_elf_file_as_string(image) : tmp,
(*module)->executable,
this_agent());
return ihipLogStatus(
(*module)->executable.handle ? hipSuccess : hipErrorUnknown);
@@ -24,8 +24,27 @@ THE SOFTWARE.
#include <hsa/hsa.h>
#include <cstdint>
#include <functional>
#include <string>
inline
constexpr
bool operator==(hsa_isa_t x, hsa_isa_t y)
{
return x.handle == y.handle;
}
namespace std
{
template<>
struct hash<hsa_isa_t> {
size_t operator()(hsa_isa_t x) const
{
return hash<decltype(x.handle)>{}(x.handle);
}
};
}
namespace hip_impl
{
inline
@@ -57,6 +76,19 @@ namespace hip_impl
return r;
}
inline
hsa_isa_t isa(hsa_agent_t x)
{
hsa_isa_t r = {};
hsa_agent_iterate_isas(x, [](hsa_isa_t i, void* o) {
*static_cast<hsa_isa_t*>(o) = i; // Pick the first.
return HSA_STATUS_INFO_BREAK;
}, &r);
return r;
}
inline
std::uint64_t kernel_object(hsa_executable_symbol_t x)
{
+12 -30
Melihat File
@@ -30,34 +30,16 @@ using namespace ELFIO;
using namespace hip_impl;
using namespace std;
namespace std
{
template<>
struct hash<hsa_isa_t> {
size_t operator()(hsa_isa_t x) const
{
return hash<decltype(x.handle)>{}(x.handle);
}
};
}
inline
constexpr
bool operator==(hsa_isa_t x, hsa_isa_t y)
{
return x.handle == y.handle;
}
namespace
{
struct Symbol {
std::string name;
string name;
ELFIO::Elf64_Addr value = 0;
ELFIO::Elf_Xword size = 0;
ELFIO::Elf_Half sect_idx = 0;
std::uint8_t bind = 0;
std::uint8_t type = 0;
std::uint8_t other = 0;
Elf_Xword size = 0;
Elf_Half sect_idx = 0;
uint8_t bind = 0;
uint8_t type = 0;
uint8_t other = 0;
};
inline
@@ -184,7 +166,7 @@ namespace
}
}
vector<uint8_t> code_object_blob_for_process()
vector<char> code_object_blob_for_process()
{
static constexpr const char self[] = "/proc/self/exe";
static constexpr const char kernel_section[] = ".kernel";
@@ -199,7 +181,7 @@ namespace
return x->get_name() == kernel_section;
});
vector<uint8_t> r;
vector<char> r;
if (kernels) {
r.insert(
r.end(),
@@ -210,13 +192,13 @@ namespace
return r;
}
const unordered_map<hsa_isa_t, vector<vector<uint8_t>>>& code_object_blobs()
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs()
{
static unordered_map<hsa_isa_t, vector<vector<uint8_t>>> r;
static unordered_map<hsa_isa_t, vector<vector<char>>> r;
static once_flag f;
call_once(f, []() {
static vector<vector<uint8_t>> blobs{
static vector<vector<char>> blobs{
code_object_blob_for_process()};
dl_iterate_phdr([](dl_phdr_info* info, std::size_t, void*) {
@@ -480,7 +462,7 @@ namespace hip_impl
const auto code_object_dynsym =
find_section_if(reader, [](const ELFIO::section* x) {
return x->get_type() == SHT_DYNSYM;
return x->get_type() == SHT_DYNSYM;
});
associate_code_object_symbols_with_host_allocation(
@@ -69,7 +69,6 @@ void MemcpyFunction::load(const char *fileName, const char *functionName)
void MemcpyFunction::launch(int * dst, const int * src, size_t numElements, hipStream_t s)
{
struct {
uint32_t _hidden[6];
int* _dst;
const int* _src;
size_t _numElements;