load program state once per agent

This commit is contained in:
Jeff Daily
2019-03-27 18:19:10 +00:00
rodzic 93906a072c
commit 2845b4c4b8
4 zmienionych plików z 102 dodań i 104 usunięć
@@ -155,30 +155,17 @@ void hipLaunchKernelGGLImpl(
std::uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg) {
auto it0 = functions().find(function_address);
if (it0 == functions().cend()) {
auto agent = target_agent(stream);
auto it = functions(agent).find(function_address);
if (it == functions(agent).cend()) {
hip_throw(std::runtime_error{
"No device code available for function: " +
name(function_address)});
}
auto agent = target_agent(stream);
const auto it1 = std::find_if(
it0->second.cbegin(),
it0->second.cend(),
[=](const std::pair<hsa_agent_t, Kernel_descriptor>& x) {
return x.first == agent;
});
if (it1 == it0->second.cend()) {
hip_throw(std::runtime_error{
"No code available for function: " + name(function_address) +
", for agent: " + name(agent)});
}
hipModuleLaunchKernel(it1->second, numBlocks.x, numBlocks.y, numBlocks.z,
hipModuleLaunchKernel(it->second, numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
stream, nullptr, kernarg);
}
@@ -2659,30 +2659,33 @@ inline
__attribute__((visibility("hidden")))
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
const char* name) {
static std::unordered_map<
hsa_agent_t, std::vector<Agent_global>> agent_globals;
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::vector<Agent_global>>> agent_globals;
static std::once_flag f;
auto agent = this_agent();
std::call_once(f, []() {
for (auto&& agent_executables : executables()) {
std::vector<Agent_global> tmp0;
for (auto&& executable : agent_executables.second) {
auto tmp1 = read_agent_globals(agent_executables.first,
executable);
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
make_move_iterator(tmp1.end()));
}
agent_globals.emplace(agent_executables.first, move(tmp0));
for (auto&& agent : hip_impl::all_hsa_agents()) {
agent_globals[agent].second.clear();
}
});
const auto it = agent_globals.find(this_agent());
std::call_once(agent_globals[agent].first, [](hsa_agent_t agent) {
std::vector<Agent_global> tmp0;
for (auto&& executable : executables(agent)) {
auto tmp1 = read_agent_globals(agent, executable);
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
make_move_iterator(tmp1.end()));
}
agent_globals[agent].second = move(tmp0);
}, agent);
const auto it = agent_globals.find(agent);
if (it == agent_globals.cend()) return hipErrorNotInitialized;
std::tie(*dptr, *bytes) = read_global_description(it->second.cbegin(),
it->second.cend(), name);
std::tie(*dptr, *bytes) = read_global_description(it->second.second.cbegin(),
it->second.second.cend(), name);
return *dptr ? hipSuccess : hipErrorNotFound;
}
@@ -83,6 +83,9 @@ inline constexpr bool operator==(hsa_isa_t x, hsa_isa_t y) {
}
namespace hip_impl {
std::vector<hsa_agent_t> all_hsa_agents();
class Kernel_descriptor {
std::uint64_t kernel_object_{};
amd_kernel_code_t const* kernel_header_{nullptr};
@@ -376,47 +379,50 @@ hsa_executable_t load_executable(const std::string& file,
return executable;
}
std::vector<hsa_agent_t> all_hsa_agents();
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
hsa_agent_t, std::vector<hsa_executable_t>>& executables() {
static std::unordered_map<hsa_agent_t, std::vector<hsa_executable_t>> r;
const std::vector<hsa_executable_t>& executables(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::vector<hsa_executable_t>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& agent : hip_impl::all_hsa_agents()) {
hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only meant for
// illustration.
std::string blob_to_str{blob.cbegin(), blob.cend()};
tmp = load_executable(blob_to_str, tmp, a);
if (tmp.handle) r[a].push_back(tmp);
}
return HSA_STATUS_SUCCESS;
}, &agent);
r[agent].second.clear();
}
});
return r;
std::call_once(r[agent].first, [](hsa_agent_t agent) {
hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only meant for
// illustration.
std::string blob_to_str{blob.cbegin(), blob.cend()};
tmp = load_executable(blob_to_str, tmp, a);
if (tmp.handle) r[a].second.push_back(tmp);
}
return HSA_STATUS_SUCCESS;
}, &agent);
}, agent);
return r[agent].second;
}
inline
@@ -477,55 +483,66 @@ const std::unordered_map<std::uintptr_t, std::string>& function_names() {
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
std::string, std::vector<hsa_executable_symbol_t>>& kernels() {
static std::unordered_map<
std::string, std::vector<hsa_executable_symbol_t>> r;
std::string, std::vector<hsa_executable_symbol_t>>& kernels(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::unordered_map<std::string, std::vector<hsa_executable_symbol_t>>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& agent : hip_impl::all_hsa_agents()) {
r[agent].second.clear();
}
});
std::call_once(r[agent].first, [](hsa_agent_t agent) {
static const auto copy_kernels = [](
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void*) {
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[name(x)].push_back(x);
hsa_executable_t, hsa_agent_t agent, hsa_executable_symbol_t x, void*) {
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[agent].second[name(x)].push_back(x);
return HSA_STATUS_SUCCESS;
};
for (auto&& agent_executables : executables()) {
for (auto&& executable : agent_executables.second) {
hsa_executable_iterate_agent_symbols(
executable, agent_executables.first, copy_kernels, nullptr);
}
for (auto&& executable : executables(agent)) {
hsa_executable_iterate_agent_symbols(
executable, agent, copy_kernels, nullptr);
}
});
}, agent);
return r;
return r[agent].second;
}
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
std::uintptr_t,
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>>& functions() {
static std::unordered_map<
std::uintptr_t,
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>> r;
Kernel_descriptor>& functions(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::unordered_map<std::uintptr_t, Kernel_descriptor>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& function : function_names()) {
const auto it = kernels().find(function.second);
if (it == kernels().cend()) continue;
for (auto&& kernel_symbol : it->second) {
r[function.first].emplace_back(
agent(kernel_symbol),
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
}
for (auto&& agent : hip_impl::all_hsa_agents()) {
r[agent].second.clear();
}
});
return r;
std::call_once(r[agent].first, [](hsa_agent_t agent) {
for (auto&& function : function_names()) {
const auto it = kernels(agent).find(function.second);
if (it == kernels(agent).cend()) continue;
for (auto&& kernel_symbol : it->second) {
r[agent].second.emplace(
function.first,
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
}
}
}, agent);
return r[agent].second;
}
inline
+3 -12
Wyświetl plik
@@ -490,21 +490,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
if (!attr) return hipErrorInvalidValue;
if (!func) return hipErrorInvalidDeviceFunction;
const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
auto agent = this_agent();
const auto it1 = find_if(
it0->second.cbegin(),
it0->second.cend(),
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
return x.first == agent;
});
const auto it = functions(agent).find(reinterpret_cast<uintptr_t>(func));
if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
if (it == functions(agent).cend()) return hipErrorInvalidDeviceFunction;
const auto header = static_cast<hipFunction_t>(it1->second)->_header;
const auto header = static_cast<hipFunction_t>(it->second)->_header;
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};