Merge pull request #991 from jeffdaily/per_agent_program_state

load program state once per agent

[ROCm/hip commit: 8af327e439]
This commit is contained in:
Maneesh Gupta
2019-03-28 04:40:46 +00:00
کامیت شده توسط GitHub
کامیت 6effbfe2dd
4فایلهای تغییر یافته به همراه121 افزوده شده و 105 حذف شده
@@ -155,30 +155,18 @@ void hipLaunchKernelGGLImpl(
std::uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg) {
auto it0 = functions().find(function_address);
if (it0 == functions().cend()) {
hip_throw(std::runtime_error{
"No device code available for function: " +
name(function_address)});
}
auto agent = target_agent(stream);
auto it = functions(agent).find(function_address);
const auto it1 = std::find_if(
it0->second.cbegin(),
it0->second.cend(),
[=](const std::pair<hsa_agent_t, Kernel_descriptor>& x) {
return x.first == agent;
});
if (it1 == it0->second.cend()) {
if (it == functions(agent).cend()) {
hip_throw(std::runtime_error{
"No code available for function: " + name(function_address) +
"No device code available for function: " +
name(function_address) +
", for agent: " + name(agent)});
}
hipModuleLaunchKernel(it1->second, numBlocks.x, numBlocks.y, numBlocks.z,
hipModuleLaunchKernel(it->second, numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
stream, nullptr, kernarg);
}
@@ -2659,30 +2659,38 @@ inline
__attribute__((visibility("hidden")))
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
const char* name) {
static std::unordered_map<
hsa_agent_t, std::vector<Agent_global>> agent_globals;
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::vector<Agent_global>>> globals;
static std::once_flag f;
auto agent = this_agent();
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& agent_executables : executables()) {
std::vector<Agent_global> tmp0;
for (auto&& executable : agent_executables.second) {
auto tmp1 = read_agent_globals(agent_executables.first,
executable);
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
make_move_iterator(tmp1.end()));
}
agent_globals.emplace(agent_executables.first, move(tmp0));
for (auto&& x : hip_impl::all_hsa_agents()) {
(void)globals[x];
}
});
const auto it = agent_globals.find(this_agent());
if (globals.find(agent) == globals.cend()) {
hip_throw(std::runtime_error{"invalid agent"});
}
if (it == agent_globals.cend()) return hipErrorNotInitialized;
std::call_once(globals[agent].first, [](hsa_agent_t aa) {
std::vector<Agent_global> tmp0;
for (auto&& executable : executables(aa)) {
auto tmp1 = read_agent_globals(aa, executable);
tmp0.insert(tmp0.end(), make_move_iterator(tmp1.begin()),
make_move_iterator(tmp1.end()));
}
globals[aa].second = move(tmp0);
}, agent);
std::tie(*dptr, *bytes) = read_global_description(it->second.cbegin(),
it->second.cend(), name);
const auto it = globals.find(agent);
if (it == globals.cend()) return hipErrorNotInitialized;
std::tie(*dptr, *bytes) = read_global_description(it->second.second.cbegin(),
it->second.second.cend(), name);
return *dptr ? hipSuccess : hipErrorNotFound;
}
@@ -83,6 +83,9 @@ inline constexpr bool operator==(hsa_isa_t x, hsa_isa_t y) {
}
namespace hip_impl {
std::vector<hsa_agent_t> all_hsa_agents();
class Kernel_descriptor {
std::uint64_t kernel_object_{};
amd_kernel_code_t const* kernel_header_{nullptr};
@@ -376,47 +379,54 @@ hsa_executable_t load_executable(const std::string& file,
return executable;
}
std::vector<hsa_agent_t> all_hsa_agents();
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
hsa_agent_t, std::vector<hsa_executable_t>>& executables() {
static std::unordered_map<hsa_agent_t, std::vector<hsa_executable_t>> r;
const std::vector<hsa_executable_t>& executables(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::vector<hsa_executable_t>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& agent : hip_impl::all_hsa_agents()) {
hsa_agent_iterate_isas(agent, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only meant for
// illustration.
std::string blob_to_str{blob.cbegin(), blob.cend()};
tmp = load_executable(blob_to_str, tmp, a);
if (tmp.handle) r[a].push_back(tmp);
}
return HSA_STATUS_SUCCESS;
}, &agent);
for (auto&& x : hip_impl::all_hsa_agents()) {
(void)r[x];
}
});
return r;
if (r.find(agent) == r.cend()) {
hip_throw(std::runtime_error{"invalid agent"});
}
std::call_once(r[agent].first, [](hsa_agent_t aa) {
hsa_agent_iterate_isas(aa, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it == code_object_blobs().cend()) return HSA_STATUS_SUCCESS;
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only meant for
// illustration.
std::string blob_to_str{blob.cbegin(), blob.cend()};
tmp = load_executable(blob_to_str, tmp, a);
if (tmp.handle) r[a].second.push_back(tmp);
}
return HSA_STATUS_SUCCESS;
}, &aa);
}, agent);
return r[agent].second;
}
inline
@@ -477,55 +487,74 @@ const std::unordered_map<std::uintptr_t, std::string>& function_names() {
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
std::string, std::vector<hsa_executable_symbol_t>>& kernels() {
static std::unordered_map<
std::string, std::vector<hsa_executable_symbol_t>> r;
std::string, std::vector<hsa_executable_symbol_t>>& kernels(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::unordered_map<std::string, std::vector<hsa_executable_symbol_t>>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& x : hip_impl::all_hsa_agents()) {
(void)r[x];
}
});
if (r.find(agent) == r.cend()) {
hip_throw(std::runtime_error{"invalid agent"});
}
std::call_once(r[agent].first, [](hsa_agent_t aa) {
static const auto copy_kernels = [](
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void*) {
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[name(x)].push_back(x);
hsa_executable_t, hsa_agent_t a, hsa_executable_symbol_t x, void*) {
if (type(x) == HSA_SYMBOL_KIND_KERNEL) r[a].second[name(x)].push_back(x);
return HSA_STATUS_SUCCESS;
};
for (auto&& agent_executables : executables()) {
for (auto&& executable : agent_executables.second) {
hsa_executable_iterate_agent_symbols(
executable, agent_executables.first, copy_kernels, nullptr);
}
for (auto&& executable : executables(aa)) {
hsa_executable_iterate_agent_symbols(
executable, aa, copy_kernels, nullptr);
}
});
}, agent);
return r;
return r[agent].second;
}
inline
__attribute__((visibility("hidden")))
const std::unordered_map<
std::uintptr_t,
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>>& functions() {
static std::unordered_map<
std::uintptr_t,
std::vector<std::pair<hsa_agent_t, Kernel_descriptor>>> r;
Kernel_descriptor>& functions(hsa_agent_t agent) {
static std::unordered_map<hsa_agent_t, std::pair<std::once_flag,
std::unordered_map<std::uintptr_t, Kernel_descriptor>>> r;
static std::once_flag f;
// Create placeholder for each agent in the map.
std::call_once(f, []() {
for (auto&& function : function_names()) {
const auto it = kernels().find(function.second);
if (it == kernels().cend()) continue;
for (auto&& kernel_symbol : it->second) {
r[function.first].emplace_back(
agent(kernel_symbol),
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
}
for (auto&& x : hip_impl::all_hsa_agents()) {
(void)r[x];
}
});
return r;
if (r.find(agent) == r.cend()) {
hip_throw(std::runtime_error{"invalid agent"});
}
std::call_once(r[agent].first, [](hsa_agent_t aa) {
for (auto&& function : function_names()) {
const auto it = kernels(aa).find(function.second);
if (it == kernels(aa).cend()) continue;
for (auto&& kernel_symbol : it->second) {
r[aa].second.emplace(
function.first,
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
}
}
}, agent);
return r[agent].second;
}
inline
@@ -490,21 +490,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
if (!attr) return hipErrorInvalidValue;
if (!func) return hipErrorInvalidDeviceFunction;
const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
auto agent = this_agent();
const auto it1 = find_if(
it0->second.cbegin(),
it0->second.cend(),
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
return x.first == agent;
});
const auto it = functions(agent).find(reinterpret_cast<uintptr_t>(func));
if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
if (it == functions(agent).cend()) return hipErrorInvalidDeviceFunction;
const auto header = static_cast<hipFunction_t>(it1->second)->_header;
const auto header = static_cast<hipFunction_t>(it->second)->_header;
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};