Merge pull request #279 from AlexVlx/feature_use_module_based_dispatch_instead_of_pfe
Clean up kernel retrieval code / module management
This commit is contained in:
@@ -80,5 +80,7 @@ namespace hip_impl
|
||||
std::unordered_map<std::string, RAII_global>& globals();
|
||||
|
||||
hsa_executable_t load_executable(
|
||||
hsa_executable_t executable, hsa_agent_t agent, std::istream& file);
|
||||
const std::string& file,
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent);
|
||||
} // Namespace hip_impl.
|
||||
+12
-12
@@ -372,16 +372,16 @@ public:
|
||||
};
|
||||
|
||||
|
||||
class ihipModule_t {
|
||||
public:
|
||||
hsa_executable_t executable;
|
||||
hsa_code_object_t object;
|
||||
struct ihipModule_t {
|
||||
std::string fileName;
|
||||
void *ptr;
|
||||
size_t size;
|
||||
std::list<hipFunction_t> funcTrack;
|
||||
std::unordered_map<std::string, uintptr_t> coGlobals;
|
||||
ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {}
|
||||
hsa_executable_t executable = {};
|
||||
hsa_code_object_reader_t coReader = {};
|
||||
|
||||
~ihipModule_t()
|
||||
{
|
||||
if (executable.handle) hsa_executable_destroy(executable);
|
||||
if (coReader.handle) hsa_code_object_reader_destroy(coReader);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -669,11 +669,11 @@ template <typename MUTEX_TYPE>
|
||||
class ihipEventCriticalBase_t : LockedBase<MUTEX_TYPE>
|
||||
{
|
||||
public:
|
||||
explicit ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) :
|
||||
explicit ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) :
|
||||
_parent(parentEvent)
|
||||
{}
|
||||
~ihipEventCriticalBase_t() {};
|
||||
|
||||
|
||||
// Keep data in structure so it can be easily copied into snapshots
|
||||
// (used to reduce lock contention and preserve correct lock order)
|
||||
ihipEventData_t _eventData;
|
||||
@@ -698,7 +698,7 @@ public:
|
||||
// Return a copy of the critical state. The critical data is locked during the copy.
|
||||
ihipEventData_t locked_copyCrit() {
|
||||
LockedAccessor_EventCrit_t crit(_criticalData);
|
||||
return _criticalData._eventData;
|
||||
return _criticalData._eventData;
|
||||
};
|
||||
|
||||
ihipEventCritical_t &criticalData() { return _criticalData; };
|
||||
|
||||
+182
-344
@@ -20,63 +20,65 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "elfio/elfio.hpp"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hcc_detail/program_state.hpp"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hsa_helpers.hpp"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
|
||||
#include "elfio/elfio.hpp"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hcc_detail/program_state.hpp"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
//TODO Use Pool APIs from HCC to get memory regions.
|
||||
|
||||
#include <cassert>
|
||||
using namespace ELFIO;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
|
||||
assert(Align != 0u && "Align can't be 0.");
|
||||
Skew %= Align;
|
||||
return (Value + Align - 1 - Skew) / Align * Align + Skew;
|
||||
}
|
||||
|
||||
|
||||
struct ihipKernArgInfo{
|
||||
std::vector<uint32_t> Size;
|
||||
std::vector<uint32_t> Align;
|
||||
std::vector<std::string> ArgType;
|
||||
std::vector<std::string> ArgName;
|
||||
vector<uint32_t> Size;
|
||||
vector<uint32_t> Align;
|
||||
vector<string> ArgType;
|
||||
vector<string> ArgName;
|
||||
uint32_t totalSize;
|
||||
};
|
||||
|
||||
std::map<std::string,struct ihipKernArgInfo> kernelArguments;
|
||||
|
||||
struct MyElfNote {
|
||||
uint32_t n_namesz = 0;
|
||||
uint32_t n_descsz = 0;
|
||||
uint32_t n_type = 0;
|
||||
|
||||
MyElfNote() = default;
|
||||
};
|
||||
map<string, ihipKernArgInfo> kernelArguments;
|
||||
|
||||
struct ihipModuleSymbol_t{
|
||||
uint64_t _object; // The kernel object.
|
||||
uint32_t _groupSegmentSize;
|
||||
uint32_t _privateSegmentSize;
|
||||
std::string _name; // TODO - review for performance cost. Name is just used for debug.
|
||||
uint64_t _object; // The kernel object.
|
||||
uint32_t _groupSegmentSize;
|
||||
uint32_t _privateSegmentSize;
|
||||
string _name; // TODO - review for performance cost. Name is just used for debug.
|
||||
};
|
||||
|
||||
template <>
|
||||
std::string ToString(hipFunction_t v)
|
||||
string ToString(hipFunction_t v)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << "0x" << std::hex << v->_object;
|
||||
@@ -94,113 +96,20 @@ if (hsaStatus != HSA_STATUS_SUCCESS) {\
|
||||
return ihipLogStatus(hipStatus);\
|
||||
}
|
||||
|
||||
namespace hipdrv {
|
||||
|
||||
hsa_status_t findSystemRegions(hsa_region_t region, void *data){
|
||||
hsa_region_segment_t segment_id;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
|
||||
|
||||
if(segment_id != HSA_REGION_SEGMENT_GLOBAL){
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
|
||||
hsa_region_t *reg = (hsa_region_t*)data;
|
||||
|
||||
if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){
|
||||
*reg = region;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // End namespace hipdrv
|
||||
|
||||
uint64_t PrintSymbolSizes(const void *emi, const char *name){
|
||||
using namespace ELFIO;
|
||||
|
||||
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
|
||||
if(NULL == ehdr || EV_CURRENT != ehdr->e_version){}
|
||||
const Elf64_Shdr * shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
|
||||
for(uint16_t i=0;i<ehdr->e_shnum;++i){
|
||||
if(shdr[i].sh_type == SHT_SYMTAB){
|
||||
const Elf64_Sym *syms = (const Elf64_Sym*)((char*)emi + shdr[i].sh_offset);
|
||||
assert(syms);
|
||||
uint64_t numSyms = shdr[i].sh_size/shdr[i].sh_entsize;
|
||||
const char* strtab = (const char*)((char*)emi + shdr[shdr[i].sh_link].sh_offset);
|
||||
assert(strtab);
|
||||
for(uint64_t i=0;i<numSyms;++i){
|
||||
const char *symname = strtab + syms[i].st_name;
|
||||
assert(symname);
|
||||
uint64_t size = syms[i].st_size;
|
||||
if(strcmp(name, symname) == 0){
|
||||
return size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t ElfSize(const void *emi){
|
||||
using namespace ELFIO;
|
||||
|
||||
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
|
||||
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
|
||||
|
||||
uint64_t max_offset = ehdr->e_shoff;
|
||||
uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
|
||||
|
||||
for(uint16_t i=0;i < ehdr->e_shnum;++i){
|
||||
uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
|
||||
if(max_offset < cur_offset){
|
||||
max_offset = cur_offset;
|
||||
total_size = max_offset;
|
||||
if(SHT_NOBITS != shdr[i].sh_type){
|
||||
total_size += static_cast<uint64_t>(shdr[i].sh_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
{
|
||||
HIP_INIT_API(module, fname);
|
||||
hipError_t ret = hipSuccess;
|
||||
*module = new ihipModule_t;
|
||||
|
||||
if(module == NULL){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if(ctx == nullptr){
|
||||
ret = hipErrorInvalidContext;
|
||||
ifstream file{fname};
|
||||
|
||||
}else{
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
|
||||
|
||||
hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&(*module)->executable);
|
||||
vector<char> tmp{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
std::ifstream file{fname};
|
||||
|
||||
if (!file.is_open()) {
|
||||
return ihipLogStatus(hipErrorFileNotFound);
|
||||
}
|
||||
(*module)->executable = hip_impl::load_executable(
|
||||
(*module)->executable, currentDevice->_hsaAgent, file);
|
||||
ret = (*module)->executable.handle ? hipSuccess : hipErrorUnknown;
|
||||
}
|
||||
|
||||
return ihipLogStatus(ret);
|
||||
return hipModuleLoadData(module, tmp.data());
|
||||
}
|
||||
|
||||
|
||||
@@ -212,92 +121,13 @@ hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
// Currently we want for all inflight activity to complete, but don't prevent another
|
||||
// thread from launching new kernels before we finish this operation.
|
||||
ihipSynchronize();
|
||||
hipError_t ret = hipSuccess;
|
||||
hsa_status_t status = hsa_executable_destroy(hmod->executable);
|
||||
if(status != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ret = hipErrorInvalidValue;
|
||||
}
|
||||
// status = hsa_code_object_destroy(hmod->object);
|
||||
// if(status != HSA_STATUS_SUCCESS)
|
||||
// {
|
||||
// ret = hipErrorInvalidValue;
|
||||
// }
|
||||
// status = hsa_memory_free(hmod->ptr);
|
||||
// if(status != HSA_STATUS_SUCCESS)
|
||||
// {
|
||||
// ret = hipErrorInvalidValue;
|
||||
// }
|
||||
for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) {
|
||||
delete *f;
|
||||
}
|
||||
delete hmod;
|
||||
return ihipLogStatus(ret);
|
||||
|
||||
delete hmod; // The ihipModule_t dtor will clean everything up.
|
||||
hmod = nullptr;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char *name)
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
if (name == nullptr){
|
||||
return (hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (ctx == nullptr){
|
||||
ret = hipErrorInvalidContext;
|
||||
|
||||
} else {
|
||||
std::string str(name);
|
||||
for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) {
|
||||
if((*f)->_name == str) {
|
||||
*func = *f;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ihipModuleSymbol_t *sym = new ihipModuleSymbol_t;
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
|
||||
|
||||
hsa_status_t status;
|
||||
hsa_executable_symbol_t symbol;
|
||||
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol);
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return hipErrorNotFound;
|
||||
}
|
||||
|
||||
status = hsa_executable_symbol_get_info(symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&sym->_object);
|
||||
CHECK_HSA(status, hipErrorNotFound);
|
||||
|
||||
status = hsa_executable_symbol_get_info(symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&sym->_groupSegmentSize);
|
||||
CHECK_HSA(status, hipErrorNotFound);
|
||||
|
||||
status = hsa_executable_symbol_get_info(symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&sym->_privateSegmentSize);
|
||||
CHECK_HSA(status, hipErrorNotFound);
|
||||
|
||||
sym->_name = name;
|
||||
*func = sym;
|
||||
hmod->funcTrack.push_back(*func);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
const char *name){
|
||||
HIP_INIT_API(hfunc, hmod, name);
|
||||
return ihipLogStatus(ihipModuleGetSymbol(hfunc, hmod, name));
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
@@ -448,45 +278,11 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
namespace
|
||||
{
|
||||
struct Agent_global {
|
||||
std::string name;
|
||||
string name;
|
||||
hipDeviceptr_t address;
|
||||
std::uint32_t byte_cnt;
|
||||
uint32_t byte_cnt;
|
||||
};
|
||||
|
||||
inline
|
||||
void* address(hsa_executable_symbol_t x)
|
||||
{
|
||||
void* r = nullptr;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string name(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint32_t sz = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
|
||||
|
||||
std::string r(sz, '\0');
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint32_t size(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t r = 0;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void track(const Agent_global& x)
|
||||
{
|
||||
@@ -511,7 +307,7 @@ namespace
|
||||
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
|
||||
}
|
||||
|
||||
template<typename Container = std::vector<Agent_global>>
|
||||
template<typename Container = vector<Agent_global>>
|
||||
inline
|
||||
hsa_status_t copy_agent_global_variables(
|
||||
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out)
|
||||
@@ -536,26 +332,24 @@ namespace
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) throw std::runtime_error{"No active HIP context."};
|
||||
if (!ctx) throw runtime_error{"No active HIP context."};
|
||||
|
||||
auto device = ctx->getDevice();
|
||||
|
||||
if (!device) throw std::runtime_error{"No device available for HIP."};
|
||||
if (!device) throw runtime_error{"No device available for HIP."};
|
||||
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId);
|
||||
|
||||
if (!currentDevice) {
|
||||
throw std::runtime_error{"No active device for HIP"};
|
||||
}
|
||||
if (!currentDevice) throw runtime_error{"No active device for HIP."};
|
||||
|
||||
return currentDevice->_hsaAgent;
|
||||
}
|
||||
|
||||
inline
|
||||
std::vector<Agent_global> read_agent_globals(
|
||||
vector<Agent_global> read_agent_globals(
|
||||
hsa_agent_t agent, hsa_executable_t executable)
|
||||
{
|
||||
std::vector<Agent_global> r;
|
||||
vector<Agent_global> r;
|
||||
|
||||
hsa_executable_iterate_agent_symbols(
|
||||
executable, agent, copy_agent_global_variables, &r);
|
||||
@@ -564,15 +358,14 @@ namespace
|
||||
}
|
||||
|
||||
template<typename ForwardIterator>
|
||||
std::pair<hipDeviceptr_t, std::size_t> read_global_description(
|
||||
pair<hipDeviceptr_t, size_t> read_global_description(
|
||||
ForwardIterator f, ForwardIterator l, const char* name)
|
||||
{
|
||||
const auto it = std::find_if(
|
||||
f, l, [=](const Agent_global& x) { return x.name == name; });
|
||||
|
||||
return it == l ?
|
||||
std::make_pair(nullptr, 0u) :
|
||||
std::make_pair(it->address, it->byte_cnt);
|
||||
make_pair(nullptr, 0u) : make_pair(it->address, it->byte_cnt);
|
||||
}
|
||||
|
||||
hipError_t read_agent_global_from_module(
|
||||
@@ -581,13 +374,12 @@ namespace
|
||||
hipModule_t hmod,
|
||||
const char* name)
|
||||
{
|
||||
static std::unordered_map<
|
||||
hipModule_t, std::vector<Agent_global>> agent_globals;
|
||||
static unordered_map<hipModule_t, vector<Agent_global>> agent_globals;
|
||||
|
||||
// TODO: this is not particularly robust.
|
||||
if (agent_globals.count(hmod) == 0) {
|
||||
static std::mutex mtx;
|
||||
std::lock_guard<std::mutex> lck{mtx};
|
||||
static mutex mtx;
|
||||
lock_guard<mutex> lck{mtx};
|
||||
|
||||
if (agent_globals.count(hmod) == 0) {
|
||||
agent_globals.emplace(
|
||||
@@ -599,10 +391,10 @@ namespace
|
||||
// It will have to be properly fleshed out in the future.
|
||||
const auto it0 = agent_globals.find(hmod);
|
||||
if (it0 == agent_globals.cend()) {
|
||||
throw std::runtime_error{"agent_globals data structure corrupted."};
|
||||
throw runtime_error{"agent_globals data structure corrupted."};
|
||||
}
|
||||
|
||||
std::tie(*dptr, *bytes) = read_global_description(
|
||||
tie(*dptr, *bytes) = read_global_description(
|
||||
it0->second.cbegin(), it0->second.cend(), name);
|
||||
|
||||
return dptr ? hipSuccess : hipErrorNotFound;
|
||||
@@ -611,22 +403,21 @@ namespace
|
||||
hipError_t read_agent_global_from_process(
|
||||
hipDeviceptr_t *dptr, size_t* bytes, const char* name)
|
||||
{
|
||||
static std::unordered_map<
|
||||
hsa_agent_t, std::vector<Agent_global>> agent_globals;
|
||||
static unordered_map<hsa_agent_t, vector<Agent_global>> agent_globals;
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
call_once(f, []() {
|
||||
for (auto&& agent_executables : hip_impl::executables()) {
|
||||
std::vector<Agent_global> tmp0;
|
||||
vector<Agent_global> tmp0;
|
||||
for (auto&& executable : agent_executables.second) {
|
||||
auto tmp1 = read_agent_globals(
|
||||
agent_executables.first, executable);
|
||||
tmp0.insert(
|
||||
tmp0.end(),
|
||||
std::make_move_iterator(tmp1.begin()),
|
||||
std::make_move_iterator(tmp1.end()));
|
||||
make_move_iterator(tmp1.begin()),
|
||||
make_move_iterator(tmp1.end()));
|
||||
}
|
||||
agent_globals.emplace(agent_executables.first, std::move(tmp0));
|
||||
agent_globals.emplace(agent_executables.first, move(tmp0));
|
||||
}
|
||||
});
|
||||
|
||||
@@ -634,81 +425,129 @@ namespace
|
||||
|
||||
if (it == agent_globals.cend()) return hipErrorNotInitialized;
|
||||
|
||||
std::tie(*dptr, *bytes) = read_global_description(
|
||||
tie(*dptr, *bytes) = read_global_description(
|
||||
it->second.cbegin(), it->second.cend(), name);
|
||||
|
||||
return dptr ? hipSuccess : hipErrorNotFound;
|
||||
}
|
||||
|
||||
hsa_executable_symbol_t find_kernel_by_name(
|
||||
hsa_executable_t executable, const char* kname)
|
||||
{
|
||||
pair<const char*, hsa_executable_symbol_t> r{kname, {}};
|
||||
|
||||
hsa_executable_iterate_agent_symbols(
|
||||
executable,
|
||||
this_agent(),
|
||||
[](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) {
|
||||
auto p =
|
||||
static_cast<pair<const char*, hsa_executable_symbol_t>*>(s);
|
||||
|
||||
if (type(x) != HSA_SYMBOL_KIND_KERNEL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
if (name(x) != p->first) return HSA_STATUS_SUCCESS;
|
||||
|
||||
p->second = x;
|
||||
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}, &r);
|
||||
|
||||
return r.second;
|
||||
}
|
||||
|
||||
string read_elf_file_as_string(const void* file)
|
||||
{ // Precondition: file points to an ELF image that was BITWISE loaded
|
||||
// into process accessible memory, and not one loaded by
|
||||
// the loader. This is because in the latter case
|
||||
// alignment may differ, which will break the size
|
||||
// computation.
|
||||
// the image is Elf64, and matches endianness i.e. it is
|
||||
// Little Endian.
|
||||
if (!file) return {};
|
||||
|
||||
auto h = static_cast<const Elf64_Ehdr*>(file);
|
||||
auto s = static_cast<const char*>(file);
|
||||
// This assumes the common case of SHT being the last part of the ELF.
|
||||
auto sz = sizeof(Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum;
|
||||
|
||||
return string{s, s + sz};
|
||||
}
|
||||
} // Anonymous namespace, internal linkage.
|
||||
|
||||
hipError_t ihipModuleGetFunction(
|
||||
hipFunction_t *func, hipModule_t hmod, const char *name)
|
||||
{
|
||||
HIP_INIT_API(func, hmod, name);
|
||||
|
||||
if (!func || !name) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) return ihipLogStatus(hipErrorInvalidContext);
|
||||
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
*func = new ihipModuleSymbol_t;
|
||||
|
||||
if (!*func) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
auto kernel = find_kernel_by_name(hmod->executable, name);
|
||||
|
||||
if (kernel.handle == 0u) return ihipLogStatus(hipErrorNotFound);
|
||||
|
||||
(*func)->_object = kernel_object(kernel);
|
||||
(*func)->_groupSegmentSize = group_size(kernel);
|
||||
(*func)->_privateSegmentSize = private_size(kernel);
|
||||
(*func)->_name = name;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
const char *name){
|
||||
HIP_INIT_API(hfunc, hmod, name);
|
||||
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name));
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char* name)
|
||||
{
|
||||
HIP_INIT_API(dptr, bytes, hmod, name);
|
||||
hipError_t ret = hipSuccess;
|
||||
if(dptr == NULL || bytes == NULL){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
if(name == NULL){
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
}
|
||||
else{
|
||||
ret = hmod ?
|
||||
read_agent_global_from_module(dptr, bytes, hmod, name) :
|
||||
read_agent_global_from_process(dptr, bytes, name);
|
||||
|
||||
return ihipLogStatus(ret);
|
||||
}
|
||||
if(!dptr || !bytes) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
if(!name) return ihipLogStatus(hipErrorNotInitialized);
|
||||
|
||||
const auto r = hmod ?
|
||||
read_agent_global_from_module(dptr, bytes, hmod, name) :
|
||||
read_agent_global_from_process(dptr, bytes, name);
|
||||
|
||||
return ihipLogStatus(r);
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(module, image);
|
||||
hipError_t ret = hipSuccess;
|
||||
if(image == NULL || module == NULL){
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
} else {
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
*module = new ihipModule_t;
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
|
||||
void *p;
|
||||
uint64_t size = ElfSize(image);
|
||||
hsa_agent_t agent = currentDevice->_hsaAgent;
|
||||
hsa_region_t sysRegion;
|
||||
hsa_status_t status = hsa_agent_iterate_regions(agent, hipdrv::findSystemRegions, &sysRegion);
|
||||
status = hsa_memory_allocate(sysRegion, size, (void**)&p);
|
||||
if (!module) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorOutOfMemory);
|
||||
}
|
||||
*module = new ihipModule_t;
|
||||
|
||||
char *ptr = (char*)p;
|
||||
if(!ptr){
|
||||
return ihipLogStatus(hipErrorOutOfMemory);
|
||||
}
|
||||
(*module)->ptr = p;
|
||||
(*module)->size = size;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (!ctx) return ihipLogStatus(hipErrorInvalidContext);
|
||||
|
||||
memcpy(ptr, image, size);
|
||||
hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&(*module)->executable);
|
||||
|
||||
status = hsa_code_object_deserialize(ptr, size, NULL, &(*module)->object);
|
||||
(*module)->executable = hip_impl::load_executable(
|
||||
read_elf_file_as_string(image), (*module)->executable, this_agent());
|
||||
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorSharedObjectInitFailed);
|
||||
}
|
||||
|
||||
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &(*module)->executable);
|
||||
CHECKLOG_HSA(status, hipErrorNotInitialized);
|
||||
|
||||
status = hsa_executable_load_code_object((*module)->executable, agent, (*module)->object, NULL);
|
||||
CHECKLOG_HSA(status, hipErrorNotInitialized);
|
||||
|
||||
status = hsa_executable_freeze((*module)->executable, NULL);
|
||||
CHECKLOG_HSA(status, hipErrorNotInitialized);
|
||||
}
|
||||
return ihipLogStatus(ret);
|
||||
return ihipLogStatus(
|
||||
(*module)->executable.handle ? hipSuccess : hipErrorUnknown);
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
|
||||
@@ -716,21 +555,20 @@ hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned
|
||||
return hipModuleLoadData(module, image);
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name)
|
||||
hipError_t hipModuleGetTexRef(
|
||||
textureReference** texRef, hipModule_t hmod, const char* name)
|
||||
{
|
||||
HIP_INIT_API(texRef, hmod, name);
|
||||
|
||||
hipError_t ret = hipErrorNotFound;
|
||||
if(texRef == NULL){
|
||||
ret = hipErrorInvalidValue;
|
||||
} else {
|
||||
if(name == NULL || hmod == NULL){
|
||||
ret = hipErrorNotInitialized;
|
||||
} else{
|
||||
const auto it = hip_impl::globals().find(name);
|
||||
if (it == hip_impl::globals().end()) return ihipLogStatus(hipErrorInvalidValue);
|
||||
*texRef = reinterpret_cast<textureReference*>(it->second.get());
|
||||
ret = hipSuccess;
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(ret);
|
||||
if(!texRef) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
if(!hmod || !name) return ihipLogStatus(hipErrorNotInitialized);
|
||||
|
||||
const auto it = globals().find(name);
|
||||
if (it == globals().end()) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
*texRef = static_cast<textureReference*>(it->second.get());
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
inline
|
||||
void* address(hsa_executable_symbol_t x)
|
||||
{
|
||||
void* r = nullptr;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_agent_t agent(hsa_executable_symbol_t x)
|
||||
{
|
||||
hsa_agent_t r = {};
|
||||
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint32_t group_size(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint64_t kernel_object(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint64_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string name(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t sz = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
|
||||
|
||||
std::string r(sz, '\0');
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint32_t private_size(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint32_t size(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t r = 0;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_symbol_kind_t type(hsa_executable_symbol_t x)
|
||||
{
|
||||
hsa_symbol_kind_t r = {};
|
||||
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
+31
-104
@@ -3,6 +3,7 @@
|
||||
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
|
||||
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hsa_helpers.hpp"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include "elfio/elfio.hpp"
|
||||
@@ -146,13 +147,11 @@ namespace
|
||||
|
||||
void associate_code_object_symbols_with_host_allocation(
|
||||
const elfio& reader,
|
||||
const elfio& self_reader,
|
||||
section* code_object_dynsym,
|
||||
section* process_symtab,
|
||||
hsa_agent_t agent,
|
||||
hsa_executable_t executable)
|
||||
{
|
||||
if (!code_object_dynsym || !process_symtab) return;
|
||||
if (!code_object_dynsym) return;
|
||||
|
||||
const auto undefined_symbols = copy_names_of_undefined_symbols(
|
||||
symbol_section_accessor{reader, code_object_dynsym});
|
||||
@@ -294,68 +293,6 @@ namespace
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_agent_t agent(hsa_executable_symbol_t x)
|
||||
{
|
||||
hsa_agent_t r = {};
|
||||
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
uint32_t group_size(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint32_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
uint64_t kernel_object(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint64_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
string name(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint32_t sz = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
|
||||
|
||||
string r(sz, '\0');
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
uint32_t private_size(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint32_t r = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_symbol_kind_t type(hsa_executable_symbol_t x)
|
||||
{
|
||||
hsa_symbol_kind_t r = {};
|
||||
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels()
|
||||
{
|
||||
static unordered_map<string, vector<hsa_executable_symbol_t>> r;
|
||||
@@ -384,42 +321,43 @@ namespace
|
||||
}
|
||||
|
||||
void load_code_object_and_freeze_executable(
|
||||
istream& file, hsa_agent_t agent, hsa_executable_t executable)
|
||||
const string& file, hsa_agent_t agent, hsa_executable_t executable)
|
||||
{ // TODO: the following sequence is inefficient, should be refactored
|
||||
// into a single load of the file and subsequent ELFIO
|
||||
// processing.
|
||||
static const auto cor_deleter = [](hsa_code_object_reader_t* p) {
|
||||
hsa_code_object_reader_destroy(*p);
|
||||
if (p) {
|
||||
hsa_code_object_reader_destroy(*p);
|
||||
delete p;
|
||||
}
|
||||
};
|
||||
|
||||
using RAII_code_reader = unique_ptr<
|
||||
hsa_code_object_reader_t, decltype(cor_deleter)>;
|
||||
|
||||
file.seekg(0);
|
||||
if (!file.empty()) {
|
||||
RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter};
|
||||
hsa_code_object_reader_create_from_memory(
|
||||
file.data(), file.size(), tmp.get());
|
||||
|
||||
vector<uint8_t> blob{
|
||||
istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter};
|
||||
hsa_code_object_reader_create_from_memory(
|
||||
blob.data(), blob.size(), tmp.get());
|
||||
hsa_executable_load_agent_code_object(
|
||||
executable, agent, *tmp, nullptr, nullptr);
|
||||
|
||||
hsa_executable_load_agent_code_object(
|
||||
executable, agent, *tmp, nullptr, nullptr);
|
||||
hsa_executable_freeze(executable, nullptr);
|
||||
|
||||
hsa_executable_freeze(executable, nullptr);
|
||||
static vector<RAII_code_reader> code_readers;
|
||||
static mutex mtx;
|
||||
|
||||
static vector<RAII_code_reader> code_readers;
|
||||
static mutex mtx;
|
||||
|
||||
lock_guard<mutex> lck{mtx};
|
||||
code_readers.push_back(move(tmp));
|
||||
lock_guard<mutex> lck{mtx};
|
||||
code_readers.push_back(move(tmp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
const unordered_map<hsa_agent_t, vector<hsa_executable_t>>& executables()
|
||||
{
|
||||
{ // TODO: This leaks the hsa_executable_ts, it should use RAII.
|
||||
static unordered_map<hsa_agent_t, vector<hsa_executable_t>> r;
|
||||
static once_flag f;
|
||||
|
||||
@@ -449,8 +387,7 @@ namespace hip_impl
|
||||
// TODO: this is massively inefficient and only
|
||||
// meant for illustration.
|
||||
string blob_to_str{blob.cbegin(), blob.cend()};
|
||||
stringstream istr{blob_to_str};
|
||||
tmp = load_executable(tmp, a, istr);
|
||||
tmp = load_executable(blob_to_str, tmp, a);
|
||||
|
||||
if (tmp.handle) r[a].push_back(tmp);
|
||||
}
|
||||
@@ -535,33 +472,23 @@ namespace hip_impl
|
||||
}
|
||||
|
||||
hsa_executable_t load_executable(
|
||||
hsa_executable_t executable, hsa_agent_t agent, istream& file)
|
||||
const string& file, hsa_executable_t executable, hsa_agent_t agent)
|
||||
{
|
||||
elfio reader;
|
||||
if (!reader.load(file)) {
|
||||
return hsa_executable_t{};
|
||||
}
|
||||
else {
|
||||
// TODO: this may benefit from caching as well.
|
||||
elfio self_reader;
|
||||
self_reader.load("/proc/self/exe");
|
||||
stringstream tmp{file};
|
||||
|
||||
const auto symtab =
|
||||
find_section_if(self_reader, [](const ELFIO::section* x) {
|
||||
return x->get_type() == SHT_SYMTAB;
|
||||
});
|
||||
if (!reader.load(tmp)) return hsa_executable_t{};
|
||||
|
||||
const auto code_object_dynsym =
|
||||
find_section_if(reader, [](const ELFIO::section* x) {
|
||||
const auto code_object_dynsym =
|
||||
find_section_if(reader, [](const ELFIO::section* x) {
|
||||
return x->get_type() == SHT_DYNSYM;
|
||||
});
|
||||
});
|
||||
|
||||
associate_code_object_symbols_with_host_allocation(
|
||||
reader, self_reader, code_object_dynsym, symtab, agent, executable);
|
||||
associate_code_object_symbols_with_host_allocation(
|
||||
reader, code_object_dynsym, agent, executable);
|
||||
|
||||
load_code_object_and_freeze_executable(file, agent, executable);
|
||||
load_code_object_and_freeze_executable(file, agent, executable);
|
||||
|
||||
return executable;
|
||||
}
|
||||
return executable;
|
||||
}
|
||||
} // Namespace hip_impl.
|
||||
Referens i nytt ärende
Block a user