Refactor the __device__ versions of memset and memcpy to be less awkward i.e. not return nullptr as opposed to the destination pointer (it can only be assumed it was done for maximum confusion) and actually unroll as they claim to. Change all of the {to, from}Symbol functions to use hipModuleGetGlobal, as opposed to hc::accelerator::get_symbol_address which is no longer valid with module based dispatch.

This commit is contained in:
Alex Voicu
2017-11-21 02:40:34 +00:00
والد 1824fb7698
کامیت 9d088d2283
5فایلهای تغییر یافته به همراه206 افزوده شده و 113 حذف شده
+46 -61
مشاهده پرونده
@@ -31,14 +31,6 @@ using namespace std;
namespace std
{
template<>
struct hash<hsa_agent_t> {
size_t operator()(hsa_agent_t x) const
{
return hash<decltype(x.handle)>{}(x.handle);
}
};
template<>
struct hash<hsa_isa_t> {
size_t operator()(hsa_isa_t x) const
@@ -48,13 +40,6 @@ namespace std
};
}
inline
constexpr
bool operator==(hsa_agent_t x, hsa_agent_t y)
{
return x.handle == y.handle;
}
inline
constexpr
bool operator==(hsa_isa_t x, hsa_isa_t y)
@@ -242,52 +227,6 @@ namespace
return r;
}
const unordered_map<hsa_agent_t, vector<hsa_executable_t>>& executables()
{
static unordered_map<hsa_agent_t, vector<hsa_executable_t>> r;
static once_flag f;
call_once(f, []() {
static const auto accelerators = hc::accelerator::get_all();
for (auto&& acc : accelerators) {
auto agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
if (!agent) continue;
hsa_agent_iterate_isas(*agent, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it != code_object_blobs().cend()) {
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only
// meant for illustration.
string blob_to_str{blob.cbegin(), blob.cend()};
stringstream istr{blob_to_str};
tmp = load_executable(tmp, a, istr);
if (tmp.handle) r[a].push_back(tmp);
}
}
return HSA_STATUS_SUCCESS;
}, agent);
}
});
return r;
}
vector<pair<uintptr_t, string>> function_names_for(
const elfio& reader, section* symtab)
{
@@ -467,6 +406,52 @@ namespace
namespace hip_impl
{
const unordered_map<hsa_agent_t, vector<hsa_executable_t>>& executables()
{
static unordered_map<hsa_agent_t, vector<hsa_executable_t>> r;
static once_flag f;
call_once(f, []() {
static const auto accelerators = hc::accelerator::get_all();
for (auto&& acc : accelerators) {
auto agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
if (!agent) continue;
hsa_agent_iterate_isas(*agent, [](hsa_isa_t x, void* pa) {
const auto it = code_object_blobs().find(x);
if (it != code_object_blobs().cend()) {
hsa_agent_t a = *static_cast<hsa_agent_t*>(pa);
for (auto&& blob : it->second) {
hsa_executable_t tmp = {};
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&tmp);
// TODO: this is massively inefficient and only
// meant for illustration.
string blob_to_str{blob.cbegin(), blob.cend()};
stringstream istr{blob_to_str};
tmp = load_executable(tmp, a, istr);
if (tmp.handle) r[a].push_back(tmp);
}
}
return HSA_STATUS_SUCCESS;
}, agent);
}
});
return r;
}
const unordered_map<uintptr_t, string>& function_names()
{
static unordered_map<uintptr_t, string> r{