Merge pull request #841 from ROCm-Developer-Tools/rebase-pr827-to-master

Fix issues with prior iteration of 731

[ROCm/hip commit: 8c5e018e16]
このコミットが含まれているのは:
Maneesh Gupta
2019-01-01 20:22:57 +05:30
committed by GitHub
コミット 72fd96c3da
3個のファイルの変更163行の追加7行の削除
+36 -7
ファイルの表示
@@ -33,6 +33,7 @@ THE SOFTWARE.
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <iostream>
#include <mutex>
@@ -56,7 +57,9 @@ template <
typename... Ts,
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
inline std::vector<std::uint8_t> make_kernarg(
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>&) {
const std::tuple<Ts...>&,
const std::vector<std::pair<std::size_t, std::size_t>>&,
std::vector<std::uint8_t> kernarg) {
return kernarg;
}
@@ -65,7 +68,9 @@ template <
typename... Ts,
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
inline std::vector<std::uint8_t> make_kernarg(
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>& formals) {
const std::tuple<Ts...>& formals,
const std::vector<std::pair<std::size_t, std::size_t>>& size_align,
std::vector<std::uint8_t> kernarg) {
using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
static_assert(
@@ -80,24 +85,48 @@ inline std::vector<std::uint8_t> make_kernarg(
#endif
kernarg.resize(round_up_to_next_multiple_nonnegative(
kernarg.size(), alignof(T)) + sizeof(T));
kernarg.size(), size_align[n].second) + size_align[n].first);
new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get<n>(formals)};
std::memcpy(
kernarg.data() + kernarg.size() - size_align[n].first,
&std::get<n>(formals),
size_align[n].first);
return make_kernarg<n + 1>(std::move(kernarg), formals);
return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
}
template <typename... Formals, typename... Actuals>
inline std::vector<std::uint8_t> make_kernarg(
void (*)(Formals...), std::tuple<Actuals...> actuals) {
void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
static_assert(sizeof...(Formals) == sizeof...(Actuals),
"The count of formal arguments must match the count of actuals.");
if (sizeof...(Formals) == 0) return {};
auto it = function_names().find(reinterpret_cast<std::uintptr_t>(kernel));
if (it == function_names().cend()) {
it =
function_names(true).find(reinterpret_cast<std::uintptr_t>(kernel));
if (it == function_names().cend()) {
throw std::runtime_error{"Undefined __global__ function."};
}
}
auto it1 = kernargs().find(it->second);
if (it1 == kernargs().end()) {
it1 = kernargs(true).find(it->second);
if (it1 == kernargs().end()) {
throw std::runtime_error{
"Missing metadata for __global__ function: " + it->second};
}
}
std::tuple<Formals...> to_formals{std::move(actuals)};
std::vector<std::uint8_t> kernarg;
kernarg.reserve(sizeof(to_formals));
return make_kernarg<0>(std::move(kernarg), to_formals);
return make_kernarg<0>(to_formals, it1->second, std::move(kernarg));
}
void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks,
+3
ファイルの表示
@@ -99,6 +99,9 @@ const std::unordered_map<std::uintptr_t, std::vector<std::pair<hsa_agent_t, Kern
functions(bool rebuild = false);
const std::unordered_map<std::uintptr_t, std::string>& function_names(bool rebuild = false);
std::unordered_map<std::string, void*>& globals(bool rebuild = false);
const std::unordered_map<
std::string, std::vector<std::pair<std::size_t, std::size_t>>>&
kernargs(bool rebuild = false);
hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable,
hsa_agent_t agent);
+124
ファイルの表示
@@ -340,6 +340,90 @@ void load_code_object_and_freeze_executable(
code_readers.push_back(move(tmp));
}
}
size_t parse_args(
const string& metadata,
size_t f,
size_t l,
vector<pair<size_t, size_t>>& size_align) {
if (f == l) return f;
if (!size_align.empty()) return l;
do {
static constexpr size_t size_sz{5};
f = metadata.find("Size:", f) + size_sz;
if (l <= f) return f;
auto size = strtoul(&metadata[f], nullptr, 10);
static constexpr size_t align_sz{6};
f = metadata.find("Align:", f) + align_sz;
char* l{};
auto align = strtoul(&metadata[f], &l, 10);
f += (l - &metadata[f]) + 1;
size_align.emplace_back(size, align);
} while (true);
}
void read_kernarg_metadata(
elfio& reader,
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs)
{ // TODO: this is inefficient.
auto it = find_section_if(
reader, [](const section* x) { return x->get_type() == SHT_NOTE; });
if (!it) return;
const note_section_accessor acc{reader, it};
for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) {
ELFIO::Elf_Word type{};
string name{};
void* desc{};
Elf_Word desc_size{};
acc.get_note(i, type, name, desc, desc_size);
if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA.
string tmp{
static_cast<char*>(desc), static_cast<char*>(desc) + desc_size};
auto dx = tmp.find("Kernels:");
if (dx == string::npos) continue;
static constexpr decltype(tmp.size()) kernels_sz{8};
dx += kernels_sz;
do {
dx = tmp.find("Name:", dx);
if (dx == string::npos) break;
static constexpr decltype(tmp.size()) name_sz{5};
dx = tmp.find_first_not_of(" '", dx + name_sz);
auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx);
dx += fn.size();
auto dx1 = tmp.find("CodeProps", dx);
dx = tmp.find("Args:", dx);
if (dx1 < dx) {
dx = dx1;
continue;
}
if (dx == string::npos) break;
static constexpr decltype(tmp.size()) args_sz{5};
dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]);
} while (true);
}
}
} // namespace
namespace hip_impl {
@@ -454,6 +538,7 @@ const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& fu
// created previously
function_names(rebuild);
kernargs(rebuild);
kernels(rebuild);
globals(rebuild);
}
@@ -501,6 +586,45 @@ unordered_map<string, void*>& globals(bool rebuild) {
return r;
}
const unordered_map<string, vector<pair<size_t, size_t>>>& kernargs(
bool rebuild) {
static unordered_map<string, vector<pair<size_t, size_t>>> r;
static once_flag f;
static const auto build_map = [](decltype(r)& x) {
for (auto&& isa_blobs : code_object_blobs()) {
for (auto&& blob : isa_blobs.second) {
stringstream tmp{std::string{blob.cbegin(), blob.cend()}};
elfio reader;
if (!reader.load(tmp)) continue;
read_kernarg_metadata(reader, x);
}
}
};
call_once(f, []() { r.reserve(function_names().size()); build_map(r); });
if (rebuild) {
static mutex mtx;
thread_local static decltype(r) tmp;
{
lock_guard<mutex> lck{mtx};
tmp.insert(r.cbegin(), r.cend()); // Should use merge in C++17.
}
build_map(tmp);
lock_guard<mutex> lck{mtx};
r.insert(tmp.cbegin(), tmp.cend());
}
return r;
}
hsa_executable_t load_executable(const string& file, hsa_executable_t executable,
hsa_agent_t agent) {
elfio reader;