HIP program state re-initialization logic
This commit is to support kernels dynamically loaded thru means such as dlopen() after HIP runtime initializes.
This commit is contained in:
+108
-23
@@ -74,11 +74,15 @@ vector<string> copy_names_of_undefined_symbols(const symbol_section_accessor& se
|
||||
}
|
||||
|
||||
const std::unordered_map<std::string, std::pair<ELFIO::Elf64_Addr, ELFIO::Elf_Xword>>&
|
||||
symbol_addresses() {
|
||||
symbol_addresses(bool rebuild = false) {
|
||||
static unordered_map<string, pair<Elf64_Addr, Elf_Xword>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
}
|
||||
|
||||
dl_iterate_phdr(
|
||||
[](dl_phdr_info* info, size_t, void*) {
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
@@ -108,7 +112,12 @@ symbol_addresses() {
|
||||
return 0;
|
||||
},
|
||||
nullptr);
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -166,13 +175,18 @@ vector<char> code_object_blob_for_process() {
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs() {
|
||||
const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs(bool rebuild = false) {
|
||||
static unordered_map<hsa_isa_t, vector<vector<char>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
static vector<vector<char>> blobs{code_object_blob_for_process()};
|
||||
|
||||
if (rebuild) {
|
||||
blobs.clear();
|
||||
blobs.push_back(code_object_blob_for_process());
|
||||
}
|
||||
|
||||
dl_iterate_phdr(
|
||||
[](dl_phdr_info* info, std::size_t, void*) {
|
||||
elfio tmp;
|
||||
@@ -194,7 +208,13 @@ const unordered_map<hsa_isa_t, vector<vector<char>>>& code_object_blobs() {
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -216,13 +236,13 @@ vector<pair<uintptr_t, string>> function_names_for(const elfio& reader, section*
|
||||
return r;
|
||||
}
|
||||
|
||||
const vector<pair<uintptr_t, string>>& function_names_for_process() {
|
||||
const vector<pair<uintptr_t, string>>& function_names_for_process(bool rebuild = false) {
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
|
||||
static vector<pair<uintptr_t, string>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
elfio reader;
|
||||
|
||||
if (!reader.load(self)) {
|
||||
@@ -233,16 +253,26 @@ const vector<pair<uintptr_t, string>>& function_names_for_process() {
|
||||
find_section_if(reader, [](const section* x) { return x->get_type() == SHT_SYMTAB; });
|
||||
|
||||
if (symtab) r = function_names_for(reader, symtab);
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels() {
|
||||
const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels(bool rebuild = false) {
|
||||
static unordered_map<string, vector<hsa_executable_symbol_t>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
executables(rebuild);
|
||||
}
|
||||
|
||||
static const auto copy_kernels = [](hsa_executable_t, hsa_agent_t,
|
||||
hsa_executable_symbol_t s, void*) {
|
||||
if (type(s) == HSA_SYMBOL_KIND_KERNEL) r[name(s)].push_back(s);
|
||||
@@ -256,7 +286,12 @@ const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels() {
|
||||
copy_kernels, nullptr);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -295,13 +330,18 @@ void load_code_object_and_freeze_executable(
|
||||
|
||||
namespace hip_impl {
|
||||
const unordered_map<hsa_agent_t, vector<hsa_executable_t>>&
|
||||
executables() { // TODO: This leaks the hsa_executable_ts, it should use RAII.
|
||||
executables(bool rebuild) { // TODO: This leaks the hsa_executable_ts, it should use RAII.
|
||||
static unordered_map<hsa_agent_t, vector<hsa_executable_t>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
static const auto accelerators = hc::accelerator::get_all();
|
||||
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
code_object_blobs(rebuild);
|
||||
}
|
||||
|
||||
for (auto&& acc : accelerators) {
|
||||
auto agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
@@ -335,17 +375,29 @@ executables() { // TODO: This leaks the hsa_executable_ts, it should use RAII.
|
||||
},
|
||||
agent);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<uintptr_t, string>& function_names() {
|
||||
const unordered_map<uintptr_t, string>& function_names(bool rebuild) {
|
||||
static unordered_map<uintptr_t, string> r{function_names_for_process().cbegin(),
|
||||
function_names_for_process().cend()};
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
function_names_for_process(rebuild);
|
||||
r.insert(function_names_for_process().cbegin(),
|
||||
function_names_for_process().cend());
|
||||
}
|
||||
|
||||
dl_iterate_phdr(
|
||||
[](dl_phdr_info* info, size_t, void*) {
|
||||
elfio tmp;
|
||||
@@ -365,16 +417,30 @@ const unordered_map<uintptr_t, string>& function_names() {
|
||||
return 0;
|
||||
},
|
||||
nullptr);
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
static mutex mtx;
|
||||
lock_guard<mutex> lck{mtx};
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& functions() {
|
||||
const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& functions(bool rebuild) {
|
||||
static unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
auto cons = [rebuild]() {
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
function_names(rebuild);
|
||||
kernels(rebuild);
|
||||
globals(rebuild);
|
||||
}
|
||||
|
||||
for (auto&& function : function_names()) {
|
||||
const auto it = kernels().find(function.second);
|
||||
|
||||
@@ -386,15 +452,34 @@ const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& fu
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
static mutex mtx;
|
||||
lock_guard<mutex> lck{mtx};
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
unordered_map<string, void*>& globals() {
|
||||
unordered_map<string, void*>& globals(bool rebuild) {
|
||||
static unordered_map<string, void*> r;
|
||||
static once_flag f;
|
||||
call_once(f, []() { r.reserve(symbol_addresses().size()); });
|
||||
auto cons =[rebuild]() {
|
||||
if (rebuild) {
|
||||
r.clear();
|
||||
symbol_addresses(rebuild);
|
||||
}
|
||||
|
||||
r.reserve(symbol_addresses().size());
|
||||
};
|
||||
|
||||
call_once(f, cons);
|
||||
if (rebuild) {
|
||||
cons();
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
مرجع در شماره جدید
Block a user