Revert "Rely on code object metadat for kernarg arguments alignof and sizeof."
This reverts commit fe1e963299.
Этот коммит содержится в:
@@ -33,7 +33,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
@@ -57,9 +56,7 @@ template <
|
||||
typename... Ts,
|
||||
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
const std::tuple<Ts...>&,
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>&,
|
||||
std::vector<std::uint8_t> kernarg) {
|
||||
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>&) {
|
||||
return kernarg;
|
||||
}
|
||||
|
||||
@@ -68,9 +65,7 @@ template <
|
||||
typename... Ts,
|
||||
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
const std::tuple<Ts...>& formals,
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>& size_align,
|
||||
std::vector<std::uint8_t> kernarg) {
|
||||
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>& formals) {
|
||||
using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
|
||||
|
||||
static_assert(
|
||||
@@ -85,42 +80,24 @@ inline std::vector<std::uint8_t> make_kernarg(
|
||||
#endif
|
||||
|
||||
kernarg.resize(round_up_to_next_multiple_nonnegative(
|
||||
kernarg.size(), size_align[n].second) +
|
||||
size_align[n].first);
|
||||
kernarg.size(), alignof(T)) + sizeof(T));
|
||||
|
||||
std::memcpy(
|
||||
kernarg.data() + kernarg.size() - size_align[n].first,
|
||||
&std::get<n>(formals),
|
||||
size_align[n].first);
|
||||
new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get<n>(formals)};
|
||||
|
||||
return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
|
||||
return make_kernarg<n + 1>(std::move(kernarg), formals);
|
||||
}
|
||||
|
||||
template <typename... Formals, typename... Actuals>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
|
||||
void (*)(Formals...), std::tuple<Actuals...> actuals) {
|
||||
static_assert(sizeof...(Formals) == sizeof...(Actuals),
|
||||
"The count of formal arguments must match the count of actuals.");
|
||||
|
||||
const auto it = function_names().find(
|
||||
reinterpret_cast<std::uintptr_t>(kernel));
|
||||
|
||||
if (it == function_names().cend()) {
|
||||
throw std::runtime_error{"Undefined __global__ function."};
|
||||
}
|
||||
|
||||
const auto it1 = kernargs().find(it->second);
|
||||
|
||||
if (it1 == kernargs().end()) {
|
||||
throw std::runtime_error{
|
||||
"Missing metadata for __global__ function: " + it->second};
|
||||
}
|
||||
|
||||
std::tuple<Formals...> to_formals{std::move(actuals)};
|
||||
std::vector<std::uint8_t> kernarg;
|
||||
kernarg.reserve(sizeof(to_formals));
|
||||
|
||||
return make_kernarg<0>(to_formals, it1->second, std::move(kernarg));
|
||||
return make_kernarg<0>(std::move(kernarg), to_formals);
|
||||
}
|
||||
|
||||
void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks,
|
||||
|
||||
@@ -99,8 +99,6 @@ const std::unordered_map<std::uintptr_t, std::vector<std::pair<hsa_agent_t, Kern
|
||||
functions(bool rebuild = false);
|
||||
const std::unordered_map<std::uintptr_t, std::string>& function_names(bool rebuild = false);
|
||||
std::unordered_map<std::string, void*>& globals(bool rebuild = false);
|
||||
std::unordered_map<
|
||||
std::string, std::vector<std::pair<std::size_t, std::size_t>>>& kernargs();
|
||||
|
||||
hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable,
|
||||
hsa_agent_t agent);
|
||||
|
||||
@@ -312,8 +312,8 @@ const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels(bool rebui
|
||||
|
||||
void load_code_object_and_freeze_executable(
|
||||
const string& file, hsa_agent_t agent,
|
||||
hsa_executable_t executable) {
|
||||
// TODO: the following sequence is inefficient, should be refactored
|
||||
hsa_executable_t
|
||||
executable) { // TODO: the following sequence is inefficient, should be refactored
|
||||
// into a single load of the file and subsequent ELFIO
|
||||
// processing.
|
||||
static const auto cor_deleter = [](hsa_code_object_reader_t* p) {
|
||||
@@ -340,85 +340,6 @@ void load_code_object_and_freeze_executable(
|
||||
code_readers.push_back(move(tmp));
|
||||
}
|
||||
}
|
||||
|
||||
size_t parse_args(
|
||||
const string& metadata,
|
||||
size_t f,
|
||||
size_t l,
|
||||
vector<pair<size_t, size_t>>& size_align) {
|
||||
if (f == l) return f;
|
||||
|
||||
do {
|
||||
static constexpr size_t size_sz{5};
|
||||
f = metadata.find("Size:", f) + size_sz;
|
||||
|
||||
if (l <= f) return f;
|
||||
|
||||
auto size = strtoul(&metadata[f], nullptr, 10);
|
||||
|
||||
static constexpr size_t align_sz{6};
|
||||
f = metadata.find("Align:", f) + align_sz;
|
||||
|
||||
char* l{};
|
||||
auto align = strtoul(&metadata[f], &l, 10);
|
||||
|
||||
f += (l - &metadata[f]) + 1;
|
||||
|
||||
size_align.emplace_back(size, align);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
void read_kernarg_metadata(
|
||||
elfio& reader,
|
||||
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs)
|
||||
{ // TODO: this is inefficient.
|
||||
auto it = find_section_if(
|
||||
reader, [](const section* x) { return x->get_type() == SHT_NOTE; });
|
||||
|
||||
if (!it) return;
|
||||
|
||||
const note_section_accessor acc{reader, it};
|
||||
for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) {
|
||||
ELFIO::Elf_Word type{};
|
||||
string name{};
|
||||
void* desc{};
|
||||
Elf_Word desc_size{};
|
||||
|
||||
acc.get_note(i, type, name, desc, desc_size);
|
||||
|
||||
if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA.
|
||||
|
||||
string tmp{
|
||||
static_cast<char*>(desc), static_cast<char*>(desc) + desc_size};
|
||||
|
||||
auto dx = tmp.find("Kernels:");
|
||||
|
||||
if (dx == string::npos) continue;
|
||||
|
||||
static constexpr decltype(tmp.size()) kernels_sz{8};
|
||||
dx += kernels_sz;
|
||||
|
||||
do {
|
||||
dx = tmp.find("Name:", dx);
|
||||
|
||||
if (dx == string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) name_sz{5};
|
||||
dx = tmp.find_first_not_of(' ', dx + name_sz);
|
||||
|
||||
auto fn = tmp.substr(dx, tmp.find('\n', dx) - dx);
|
||||
dx += fn.size();
|
||||
|
||||
dx = tmp.find("Args:", dx);
|
||||
|
||||
if (dx == string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) args_sz{5};
|
||||
dx = parse_args(
|
||||
tmp, dx + args_sz, tmp.find("CodeProps", dx), kernargs[fn]);
|
||||
} while (true);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace hip_impl {
|
||||
@@ -580,25 +501,6 @@ unordered_map<string, void*>& globals(bool rebuild) {
|
||||
return r;
|
||||
}
|
||||
|
||||
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs() {
|
||||
static unordered_map<string, vector<pair<size_t, size_t>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
for (auto&& blob : code_object_blobs()) {
|
||||
stringstream tmp{std::string{
|
||||
blob.second.front().cbegin(), blob.second.front().cend()}};
|
||||
|
||||
elfio reader;
|
||||
if (!reader.load(tmp)) continue;
|
||||
|
||||
read_kernarg_metadata(reader, r);
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
hsa_executable_t load_executable(const string& file, hsa_executable_t executable,
|
||||
hsa_agent_t agent) {
|
||||
elfio reader;
|
||||
|
||||
Ссылка в новой задаче
Block a user