load program state once per agent
This commit is contained in:
@@ -155,30 +155,17 @@ void hipLaunchKernelGGLImpl(
|
||||
std::uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void** kernarg) {
|
||||
auto it0 = functions().find(function_address);
|
||||
|
||||
if (it0 == functions().cend()) {
|
||||
auto agent = target_agent(stream);
|
||||
auto it = functions(agent).find(function_address);
|
||||
|
||||
if (it == functions(agent).cend()) {
|
||||
hip_throw(std::runtime_error{
|
||||
"No device code available for function: " +
|
||||
name(function_address)});
|
||||
}
|
||||
|
||||
auto agent = target_agent(stream);
|
||||
|
||||
const auto it1 = std::find_if(
|
||||
it0->second.cbegin(),
|
||||
it0->second.cend(),
|
||||
[=](const std::pair<hsa_agent_t, Kernel_descriptor>& x) {
|
||||
return x.first == agent;
|
||||
});
|
||||
|
||||
if (it1 == it0->second.cend()) {
|
||||
hip_throw(std::runtime_error{
|
||||
"No code available for function: " + name(function_address) +
|
||||
", for agent: " + name(agent)});
|
||||
}
|
||||
|
||||
hipModuleLaunchKernel(it1->second, numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
hipModuleLaunchKernel(it->second, numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
|
||||
stream, nullptr, kernarg);
|
||||
}
|
||||
|
||||
@@ -2659,30 +2659,33 @@ inline
|
||||
__attribute__((visibility("hidden")))
|
||||
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
const char* name) {
|
||||
static std::unordered_map<
|
||||
hsa_agent_t, std::vector<Agent_global>> agent_globals;
|
||||
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
|
||||
std::vector<Agent_global>>> agent_globals;
|
||||
static std::once_flag f;
|
||||
auto agent = this_agent();
|
||||
|
||||
std::call_once(f, []() {
|
||||
for (auto&& agent_executables : executables()) {
|
||||
std::vector<Agent_global> tmp0;
|
||||
for (auto&& executable : agent_executables.second) {
|
||||
auto tmp1 = read_agent_globals(agent_executables.first,
|
||||
executable);
|
||||
|
||||
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
|
||||
make_move_iterator(tmp1.end()));
|
||||
}
|
||||
agent_globals.emplace(agent_executables.first, move(tmp0));
|
||||
for (auto&& agent : hip_impl::all_hsa_agents()) {
|
||||
agent_globals[agent].second.clear();
|
||||
}
|
||||
});
|
||||
|
||||
const auto it = agent_globals.find(this_agent());
|
||||
std::call_once(agent_globals[agent].first, [](hsa_agent_t agent) {
|
||||
std::vector<Agent_global> tmp0;
|
||||
for (auto&& executable : executables(agent)) {
|
||||
auto tmp1 = read_agent_globals(agent, executable);
|
||||
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
|
||||
make_move_iterator(tmp1.end()));
|
||||
}
|
||||
agent_globals[agent].second = move(tmp0);
|
||||
}, agent);
|
||||
|
||||
const auto it = agent_globals.find(agent);
|
||||
|
||||
if (it == agent_globals.cend()) return hipErrorNotInitialized;
|
||||
|
||||
std::tie(*dptr, *bytes) = read_global_description(it->second.cbegin(),
|
||||
it->second.cend(), name);
|
||||
std::tie(*dptr, *bytes) = read_global_description(it->second.second.cbegin(),
|
||||
it->second.second.cend(), name);
|
||||
|
||||
return *dptr ? hipSuccess : hipErrorNotFound;
|
||||
}
|
||||
|
||||
@@ -83,6 +83,9 @@ inline constexpr bool operator==(hsa_isa_t x, hsa_isa_t y) {
|
||||
}
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
std::vector<hsa_agent_t> all_hsa_agents();
|
||||
|
||||
class Kernel_descriptor {
|
||||
std::uint64_t kernel_object_{};
|
||||
amd_kernel_code_t const* kernel_header_{nullptr};
|
||||
@@ -376,47 +379,50 @@ hsa_executable_t load_executable(const std::string& file,
|
||||
return executable;
|
||||
}
|
||||
|
||||
std::vector<hsa_agent_t> all_hsa_agents();
|
||||
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
const std::unordered_map<
|
||||
hsa_agent_t, std::vector<hsa_executable_t>>& executables() {
|
||||
static std::unordered_map<hsa_agent_t, std::vector<hsa_executable_t>> r;
|
||||
const std::vector<hsa_executable_t>& executables(hsa_agent_t agent) {
|
||||
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
|
||||
std::vector<hsa_executable_t>>> r;
|
||||
static std::once_flag f;
|
||||
|
||||
// Create placeholder for each agent in the map.
|
||||
std::call_once(f, []() {
|
||||
for (auto&& agent : hip_impl::all_hsa_agents()) {
|
||||
hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) {
|
||||
const auto it = code_object_blobs().find(x);
|
||||
|
||||
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
|
||||
|
||||
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
|
||||
|
||||
for (auto&& blob : it->second) {
|
||||
hsa_executable_t tmp = {};
|
||||
|
||||
hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&tmp);
|
||||
|
||||
// TODO: this is massively inefficient and only meant for
|
||||
// illustration.
|
||||
std::string blob_to_str{blob.cbegin(), blob.cend()};
|
||||
tmp = load_executable(blob_to_str, tmp, a);
|
||||
|
||||
if (tmp.handle) r[a].push_back(tmp);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}, &agent);
|
||||
r[agent].second.clear();
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
std::call_once(r[agent].first, [](hsa_agent_t agent) {
|
||||
hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) {
|
||||
const auto it = code_object_blobs().find(x);
|
||||
|
||||
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
|
||||
|
||||
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
|
||||
|
||||
for (auto&& blob : it->second) {
|
||||
hsa_executable_t tmp = {};
|
||||
|
||||
hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&tmp);
|
||||
|
||||
// TODO: this is massively inefficient and only meant for
|
||||
// illustration.
|
||||
std::string blob_to_str{blob.cbegin(), blob.cend()};
|
||||
tmp = load_executable(blob_to_str, tmp, a);
|
||||
|
||||
if (tmp.handle) r[a].second.push_back(tmp);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}, &agent);
|
||||
}, agent);
|
||||
|
||||
return r[agent].second;
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -477,55 +483,66 @@ const std::unordered_map<std::uintptr_t, std::string>& function_names() {
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
const std::unordered_map<
|
||||
std::string, std::vector<hsa_executable_symbol_t>>& kernels() {
|
||||
static std::unordered_map<
|
||||
std::string, std::vector<hsa_executable_symbol_t>> r;
|
||||
std::string, std::vector<hsa_executable_symbol_t>>& kernels(hsa_agent_t agent) {
|
||||
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
|
||||
std::unordered_map<std::string, std::vector<hsa_executable_symbol_t>>>> r;
|
||||
static std::once_flag f;
|
||||
|
||||
// Create placeholder for each agent in the map.
|
||||
std::call_once(f, []() {
|
||||
for (auto&& agent : hip_impl::all_hsa_agents()) {
|
||||
r[agent].second.clear();
|
||||
}
|
||||
});
|
||||
|
||||
std::call_once(r[agent].first, [](hsa_agent_t agent) {
|
||||
static const auto copy_kernels = [](
|
||||
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void*) {
|
||||
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[name(x)].push_back(x);
|
||||
hsa_executable_t, hsa_agent_t agent, hsa_executable_symbol_t x, void*) {
|
||||
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[agent].second[name(x)].push_back(x);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
for (auto&& agent_executables : executables()) {
|
||||
for (auto&& executable : agent_executables.second) {
|
||||
hsa_executable_iterate_agent_symbols(
|
||||
executable, agent_executables.first, copy_kernels, nullptr);
|
||||
}
|
||||
for (auto&& executable : executables(agent)) {
|
||||
hsa_executable_iterate_agent_symbols(
|
||||
executable, agent, copy_kernels, nullptr);
|
||||
}
|
||||
});
|
||||
}, agent);
|
||||
|
||||
return r;
|
||||
return r[agent].second;
|
||||
}
|
||||
|
||||
inline
|
||||
__attribute__((visibility("hidden")))
|
||||
const std::unordered_map<
|
||||
std::uintptr_t,
|
||||
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>>& functions() {
|
||||
static std::unordered_map<
|
||||
std::uintptr_t,
|
||||
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>> r;
|
||||
Kernel_descriptor>& functions(hsa_agent_t agent) {
|
||||
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
|
||||
std::unordered_map<std::uintptr_t, Kernel_descriptor>>> r;
|
||||
static std::once_flag f;
|
||||
|
||||
// Create placeholder for each agent in the map.
|
||||
std::call_once(f, []() {
|
||||
for (auto&& function : function_names()) {
|
||||
const auto it = kernels().find(function.second);
|
||||
|
||||
if (it == kernels().cend()) continue;
|
||||
|
||||
for (auto&& kernel_symbol : it->second) {
|
||||
r[function.first].emplace_back(
|
||||
agent(kernel_symbol),
|
||||
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
|
||||
}
|
||||
for (auto&& agent : hip_impl::all_hsa_agents()) {
|
||||
r[agent].second.clear();
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
std::call_once(r[agent].first, [](hsa_agent_t agent) {
|
||||
for (auto&& function : function_names()) {
|
||||
const auto it = kernels(agent).find(function.second);
|
||||
|
||||
if (it == kernels(agent).cend()) continue;
|
||||
|
||||
for (auto&& kernel_symbol : it->second) {
|
||||
r[agent].second.emplace(
|
||||
function.first,
|
||||
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
|
||||
}
|
||||
}
|
||||
}, agent);
|
||||
|
||||
return r[agent].second;
|
||||
}
|
||||
|
||||
inline
|
||||
|
||||
@@ -490,21 +490,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
if (!attr) return hipErrorInvalidValue;
|
||||
if (!func) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
|
||||
|
||||
if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
auto agent = this_agent();
|
||||
const auto it1 = find_if(
|
||||
it0->second.cbegin(),
|
||||
it0->second.cend(),
|
||||
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
|
||||
return x.first == agent;
|
||||
});
|
||||
const auto it = functions(agent).find(reinterpret_cast<uintptr_t>(func));
|
||||
|
||||
if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
|
||||
if (it == functions(agent).cend()) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
const auto header = static_cast<hipFunction_t>(it1->second)->_header;
|
||||
const auto header = static_cast<hipFunction_t>(it->second)->_header;
|
||||
|
||||
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user