Arquivos
rocm-systems/runtime/hsa-runtime/loader/executable.cpp
T
Sean Keely 465a8eb40b PR from github user DiamondLovesYou.
Allow user specified profiles if the HSAIL note is not found.

Konstantin reviewed and approved.  HSAIL note is not generated by LLVM.

Change-Id: I40fbfbaedd6787b6a716507918f698d02007afe1
2019-07-16 13:55:38 -05:00

1784 linhas
57 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "executable.hpp"
#include <algorithm>
#include <cstddef>
#include <cstring>
#include <iostream>
#include <atomic>
#include <fstream>
#include <libelf.h>
#include "amd_hsa_elf.h"
#include "amd_hsa_kernel_code.h"
#include "amd_hsa_code.hpp"
#include "amd_hsa_code_util.hpp"
#include "amd_options.hpp"
#include "AMDHSAKernelDescriptor.h"
using namespace amd::hsa;
using namespace amd::hsa::common;
namespace amd {
namespace hsa {
namespace loader {
class LoaderOptions {
public:
explicit LoaderOptions(std::ostream &error = std::cerr);
const amd::options::NoArgOption* Help() const { return &help; }
const amd::options::NoArgOption* DumpCode() const { return &dump_code; }
const amd::options::NoArgOption* DumpIsa() const { return &dump_isa; }
const amd::options::NoArgOption* DumpExec() const { return &dump_exec; }
const amd::options::NoArgOption* DumpAll() const { return &dump_all; }
const amd::options::ValueOption<std::string>* DumpDir() const { return &dump_dir; }
const amd::options::PrefixOption* Substitute() const { return &substitute; }
bool ParseOptions(const std::string& options);
void Reset();
void PrintHelp(std::ostream& out) const;
private:
/// @brief Copy constructor - not available.
LoaderOptions(const LoaderOptions&);
/// @brief Assignment operator - not available.
LoaderOptions& operator=(const LoaderOptions&);
amd::options::NoArgOption help;
amd::options::NoArgOption dump_code;
amd::options::NoArgOption dump_isa;
amd::options::NoArgOption dump_exec;
amd::options::NoArgOption dump_all;
amd::options::ValueOption<std::string> dump_dir;
amd::options::PrefixOption substitute;
amd::options::OptionParser option_parser;
};
LoaderOptions::LoaderOptions(std::ostream& error) :
help("help", "print help"),
dump_code("dump-code", "Dump finalizer output code object"),
dump_isa("dump-isa", "Dump finalizer output to ISA text file"),
dump_exec("dump-exec", "Dump executable to text file"),
dump_all("dump-all", "Dump all finalizer input and output (as above)"),
dump_dir("dump-dir", "Dump directory"),
substitute("substitute", "Substitute code object with given index or index range on loading from file"),
option_parser(false, error)
{
option_parser.AddOption(&help);
option_parser.AddOption(&dump_code);
option_parser.AddOption(&dump_isa);
option_parser.AddOption(&dump_exec);
option_parser.AddOption(&dump_all);
option_parser.AddOption(&dump_dir);
option_parser.AddOption(&substitute);
}
bool LoaderOptions::ParseOptions(const std::string& options)
{
return option_parser.ParseOptions(options.c_str());
}
void LoaderOptions::Reset()
{
option_parser.Reset();
}
void LoaderOptions::PrintHelp(std::ostream& out) const
{
option_parser.PrintHelp(out);
}
static const char *LOADER_DUMP_PREFIX = "amdcode";
Loader* Loader::Create(Context* context)
{
return new AmdHsaCodeLoader(context);
}
void Loader::Destroy(Loader *loader)
{
delete loader;
}
Executable* AmdHsaCodeLoader::CreateExecutable(
hsa_profile_t profile, const char *options, hsa_default_float_rounding_mode_t default_float_rounding_mode)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
executables.push_back(new ExecutableImpl(profile, context, executables.size(), default_float_rounding_mode));
return executables.back();
}
void AmdHsaCodeLoader::DestroyExecutable(Executable *executable)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
executables[((ExecutableImpl*)executable)->id()] = nullptr;
delete executable;
}
hsa_status_t AmdHsaCodeLoader::IterateExecutables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
assert(callback);
for (auto &exec : executables) {
hsa_status_t status = callback(Executable::Handle(exec), data);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t AmdHsaCodeLoader::QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors)
{
if (!num_segment_descriptors) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
if (*num_segment_descriptors == 0 && segment_descriptors) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
if (*num_segment_descriptors != 0 && !segment_descriptors) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
this->EnableReadOnlyMode();
size_t actual_num_segment_descriptors = 0;
for (auto &executable : executables) {
if (executable) {
actual_num_segment_descriptors += executable->GetNumSegmentDescriptors();
}
}
if (*num_segment_descriptors == 0) {
*num_segment_descriptors = actual_num_segment_descriptors;
this->DisableReadOnlyMode();
return HSA_STATUS_SUCCESS;
}
if (*num_segment_descriptors != actual_num_segment_descriptors) {
this->DisableReadOnlyMode();
return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
}
size_t i = 0;
for (auto &executable : executables) {
if (executable) {
i += executable->QuerySegmentDescriptors(segment_descriptors, actual_num_segment_descriptors, i);
}
}
this->DisableReadOnlyMode();
return HSA_STATUS_SUCCESS;
}
uint64_t AmdHsaCodeLoader::FindHostAddress(uint64_t device_address)
{
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
if (device_address == 0) {
return 0;
}
for (auto &exec : executables) {
if (exec != nullptr) {
uint64_t host_address = exec->FindHostAddress(device_address);
if (host_address != 0) {
return host_address;
}
}
}
return 0;
}
void AmdHsaCodeLoader::PrintHelp(std::ostream& out)
{
LoaderOptions().PrintHelp(out);
}
void AmdHsaCodeLoader::EnableReadOnlyMode()
{
rw_lock_.ReaderLock();
for (auto &executable : executables) {
if (executable) {
((ExecutableImpl*)executable)->EnableReadOnlyMode();
}
}
}
void AmdHsaCodeLoader::DisableReadOnlyMode()
{
rw_lock_.ReaderUnlock();
for (auto &executable : executables) {
if (executable) {
((ExecutableImpl*)executable)->DisableReadOnlyMode();
}
}
}
//===----------------------------------------------------------------------===//
// SymbolImpl. //
//===----------------------------------------------------------------------===//
bool SymbolImpl::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_TYPE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_TYPE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_TYPE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_TYPE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_NAME_LENGTH) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_NAME) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_NAME)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_MODULE_NAME) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_LINKAGE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_IS_DEFINITION) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION)),
"attributes are not compatible"
);
assert(value);
switch (symbol_info) {
case HSA_CODE_SYMBOL_INFO_TYPE: {
*((hsa_symbol_kind_t*)value) = kind;
break;
}
case HSA_CODE_SYMBOL_INFO_NAME_LENGTH: {
*((uint32_t*)value) = symbol_name.size();
break;
}
case HSA_CODE_SYMBOL_INFO_NAME: {
memset(value, 0x0, symbol_name.size());
memcpy(value, symbol_name.c_str(), symbol_name.size());
break;
}
case HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH: {
*((uint32_t*)value) = module_name.size();
break;
}
case HSA_CODE_SYMBOL_INFO_MODULE_NAME: {
memset(value, 0x0, module_name.size());
memcpy(value, module_name.c_str(), module_name.size());
break;
}
case HSA_CODE_SYMBOL_INFO_LINKAGE: {
*((hsa_symbol_linkage_t*)value) = linkage;
break;
}
case HSA_CODE_SYMBOL_INFO_IS_DEFINITION: {
*((bool*)value) = is_definition;
break;
}
case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION: {
*((uint32_t*)value) = 0;
break;
}
case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT:
case HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS: {
if (!is_loaded) {
return false;
}
*((uint64_t*)value) = address;
break;
}
case HSA_EXECUTABLE_SYMBOL_INFO_AGENT: {
if (!is_loaded) {
return false;
}
*((hsa_agent_t*)value) = agent;
break;
}
default: {
return false;
}
}
return true;
}
//===----------------------------------------------------------------------===//
// KernelSymbol. //
//===----------------------------------------------------------------------===//
bool KernelSymbol::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK)),
"attributes are not compatible"
);
assert(value);
switch (symbol_info) {
case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: {
*((uint32_t*)value) = kernarg_segment_size;
break;
}
case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT: {
*((uint32_t*)value) = kernarg_segment_alignment;
break;
}
case HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: {
*((uint32_t*)value) = group_segment_size;
break;
}
case HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: {
*((uint32_t*)value) = private_segment_size;
break;
}
case HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK: {
*((bool*)value) = is_dynamic_callstack;
break;
}
case HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE: {
*((uint32_t*)value) = size;
break;
}
case HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN: {
*((uint32_t*)value) = alignment;
break;
}
default: {
return SymbolImpl::GetInfo(symbol_info, value);
}
}
return true;
}
//===----------------------------------------------------------------------===//
// VariableSymbol. //
//===----------------------------------------------------------------------===//
bool VariableSymbol::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE)),
"attributes are not compatible"
);
static_assert(
(symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST) ==
symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST)),
"attributes are not compatible"
);
switch (symbol_info) {
case HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION: {
*((hsa_variable_allocation_t*)value) = allocation;
break;
}
case HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT: {
*((hsa_variable_segment_t*)value) = segment;
break;
}
case HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT: {
*((uint32_t*)value) = alignment;
break;
}
case HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE: {
*((uint32_t*)value) = size;
break;
}
case HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST: {
*((bool*)value) = is_constant;
break;
}
default: {
return SymbolImpl::GetInfo(symbol_info, value);
}
}
return true;
}
bool LoadedCodeObjectImpl::GetInfo(amd_loaded_code_object_info_t attribute, void *value)
{
assert(value);
switch (attribute) {
case AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE:
((hsa_code_object_t*)value)->handle = reinterpret_cast<uint64_t>(elf_data);
break;
case AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE:
*((size_t*)value) = elf_size;
break;
default: {
return false;
}
}
return true;
}
hsa_status_t LoadedCodeObjectImpl::IterateLoadedSegments(
hsa_status_t (*callback)(
amd_loaded_segment_t loaded_segment,
void *data),
void *data)
{
assert(callback);
for (auto &loaded_segment : loaded_segments) {
hsa_status_t status = callback(LoadedSegment::Handle(loaded_segment), data);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
}
return HSA_STATUS_SUCCESS;
}
void LoadedCodeObjectImpl::Print(std::ostream& out)
{
out << "Code Object" << std::endl;
}
bool Segment::GetInfo(amd_loaded_segment_info_t attribute, void *value)
{
assert(value);
switch (attribute) {
case AMD_LOADED_SEGMENT_INFO_TYPE: {
*((amdgpu_hsa_elf_segment_t*)value) = segment;
break;
}
case AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS: {
*((uint64_t*)value) = vaddr;
break;
}
case AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS: {
*((uint64_t*)value) = reinterpret_cast<uint64_t>(this->Address(this->VAddr()));
break;
}
case AMD_LOADED_SEGMENT_INFO_SIZE: {
*((size_t*)value) = size;
break;
}
default: {
return false;
}
}
return true;
}
uint64_t Segment::Offset(uint64_t addr)
{
assert(IsAddressInSegment(addr));
return addr - vaddr;
}
void* Segment::Address(uint64_t addr)
{
return owner->context()->SegmentAddress(segment, agent, ptr, Offset(addr));
}
bool Segment::Freeze()
{
return !frozen ? (frozen = owner->context()->SegmentFreeze(segment, agent, ptr, size)) : true;
}
bool Segment::IsAddressInSegment(uint64_t addr)
{
return vaddr <= addr && addr < vaddr + size;
}
void Segment::Copy(uint64_t addr, const void* src, size_t size)
{
// loader must do copies before freezing.
assert(!frozen);
if (size > 0) {
owner->context()->SegmentCopy(segment, agent, ptr, Offset(addr), src, size);
}
}
void Segment::Print(std::ostream& out)
{
out << "Segment" << std::endl
<< " Type: " << AmdHsaElfSegmentToString(segment)
<< " Size: " << size
<< " VAddr: " << vaddr << std::endl
<< " Ptr: " << std::hex << ptr << std::dec
<< std::endl;
}
void Segment::Destroy()
{
owner->context()->SegmentFree(segment, agent, ptr, size);
}
//===----------------------------------------------------------------------===//
// ExecutableImpl. //
//===----------------------------------------------------------------------===//
ExecutableImpl::ExecutableImpl(
const hsa_profile_t &_profile,
Context *context,
size_t id,
hsa_default_float_rounding_mode_t default_float_rounding_mode)
: Executable()
, profile_(_profile)
, context_(context)
, id_(id)
, default_float_rounding_mode_(default_float_rounding_mode)
, state_(HSA_EXECUTABLE_STATE_UNFROZEN)
, program_allocation_segment(nullptr)
{
}
ExecutableImpl::~ExecutableImpl() {
for (ExecutableObject* o : objects) {
o->Destroy();
delete o;
}
objects.clear();
for (auto &symbol_entry : program_symbols_) {
delete symbol_entry.second;
}
for (auto &symbol_entry : agent_symbols_) {
delete symbol_entry.second;
}
}
hsa_status_t ExecutableImpl::DefineProgramExternalVariable(
const char *name, void *address)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
assert(name);
if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
}
auto symbol_entry = program_symbols_.find(std::string(name));
if (symbol_entry != program_symbols_.end()) {
return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
}
program_symbols_.insert(
std::make_pair(std::string(name),
new VariableSymbol(true,
"", // Only program linkage symbols can be
// defined.
std::string(name),
HSA_SYMBOL_LINKAGE_PROGRAM,
true,
HSA_VARIABLE_ALLOCATION_PROGRAM,
HSA_VARIABLE_SEGMENT_GLOBAL,
0, // TODO: size.
0, // TODO: align.
false, // TODO: const.
true,
reinterpret_cast<uint64_t>(address))));
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::DefineAgentExternalVariable(
const char *name,
hsa_agent_t agent,
hsa_variable_segment_t segment,
void *address)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
assert(name);
if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
}
auto symbol_entry = agent_symbols_.find(std::make_pair(std::string(name), agent));
if (symbol_entry != agent_symbols_.end()) {
return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
}
auto insert_status = agent_symbols_.insert(
std::make_pair(std::make_pair(std::string(name), agent),
new VariableSymbol(true,
"", // Only program linkage symbols can be
// defined.
std::string(name),
HSA_SYMBOL_LINKAGE_PROGRAM,
true,
HSA_VARIABLE_ALLOCATION_AGENT,
segment,
0, // TODO: size.
0, // TODO: align.
false, // TODO: const.
true,
reinterpret_cast<uint64_t>(address))));
assert(insert_status.second);
insert_status.first->second->agent = agent;
return HSA_STATUS_SUCCESS;
}
bool ExecutableImpl::IsProgramSymbol(const char *symbol_name) {
assert(symbol_name);
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
return program_symbols_.find(std::string(symbol_name)) != program_symbols_.end();
}
Symbol* ExecutableImpl::GetSymbol(
const char *symbol_name,
const hsa_agent_t *agent)
{
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
return this->GetSymbolInternal(symbol_name, agent);
}
Symbol* ExecutableImpl::GetSymbolInternal(
const char *symbol_name,
const hsa_agent_t *agent)
{
assert(symbol_name);
std::string mangled_name = std::string(symbol_name);
if (mangled_name.empty()) {
return nullptr;
}
if (!agent) {
auto program_symbol = program_symbols_.find(mangled_name);
if (program_symbol != program_symbols_.end()) {
return program_symbol->second;
}
return nullptr;
}
auto agent_symbol = agent_symbols_.find(std::make_pair(mangled_name, *agent));
if (agent_symbol != agent_symbols_.end()) {
return agent_symbol->second;
}
return nullptr;
}
hsa_status_t ExecutableImpl::IterateSymbols(
iterate_symbols_f callback, void *data)
{
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
assert(callback);
for (auto &symbol_entry : program_symbols_) {
hsa_status_t hsc =
callback(Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
if (HSA_STATUS_SUCCESS != hsc) {
return hsc;
}
}
for (auto &symbol_entry : agent_symbols_) {
hsa_status_t hsc =
callback(Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
if (HSA_STATUS_SUCCESS != hsc) {
return hsc;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::IterateAgentSymbols(
hsa_agent_t agent,
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_agent_t agent,
hsa_executable_symbol_t symbol,
void *data),
void *data) {
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
assert(callback);
for (auto &symbol_entry : agent_symbols_) {
if (symbol_entry.second->GetAgent().handle != agent.handle) {
continue;
}
hsa_status_t status = callback(
Executable::Handle(this), agent, Symbol::Handle(symbol_entry.second),
data);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::IterateProgramSymbols(
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_executable_symbol_t symbol,
void *data),
void *data) {
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
assert(callback);
for (auto &symbol_entry : program_symbols_) {
hsa_status_t status = callback(
Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::IterateLoadedCodeObjects(
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data)
{
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
assert(callback);
for (auto &loaded_code_object : loaded_code_objects) {
hsa_status_t status = callback(
Executable::Handle(this),
LoadedCodeObject::Handle(loaded_code_object),
data);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
}
return HSA_STATUS_SUCCESS;
}
size_t ExecutableImpl::GetNumSegmentDescriptors()
{
// assuming we are in readonly mode.
size_t actual_num_segment_descriptors = 0;
for (auto &obj : loaded_code_objects) {
actual_num_segment_descriptors += obj->LoadedSegments().size();
}
return actual_num_segment_descriptors;
}
size_t ExecutableImpl::QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t total_num_segment_descriptors,
size_t first_empty_segment_descriptor)
{
// assuming we are in readonly mode.
assert(segment_descriptors);
assert(first_empty_segment_descriptor < total_num_segment_descriptors);
size_t i = first_empty_segment_descriptor;
for (auto &obj : loaded_code_objects) {
assert(i < total_num_segment_descriptors);
for (auto &seg : obj->LoadedSegments()) {
segment_descriptors[i].agent = seg->Agent();
segment_descriptors[i].executable = Executable::Handle(seg->Owner());
segment_descriptors[i].code_object_storage_type = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY;
segment_descriptors[i].code_object_storage_base = obj->ElfData();
segment_descriptors[i].code_object_storage_size = obj->ElfSize();
segment_descriptors[i].code_object_storage_offset = seg->StorageOffset();
segment_descriptors[i].segment_base = seg->Address(seg->VAddr());
segment_descriptors[i].segment_size = seg->Size();
++i;
}
}
return i - first_empty_segment_descriptor;
}
hsa_agent_t LoadedCodeObjectImpl::getAgent() const {
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return loaded_segments.front()->Agent();
}
hsa_executable_t LoadedCodeObjectImpl::getExecutable() const {
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return Executable::Handle(loaded_segments.front()->Owner());
}
uint64_t LoadedCodeObjectImpl::getElfData() const {
return reinterpret_cast<uint64_t>(elf_data);
}
uint64_t LoadedCodeObjectImpl::getElfSize() const {
return (uint64_t)elf_size;
}
uint64_t LoadedCodeObjectImpl::getStorageOffset() const {
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return (uint64_t)loaded_segments.front()->StorageOffset();
}
uint64_t LoadedCodeObjectImpl::getLoadBase() const {
// TODO Add support for code objects with 0 segments.
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return reinterpret_cast<uint64_t>(loaded_segments.front()->Address(0));
}
uint64_t LoadedCodeObjectImpl::getLoadSize() const {
// TODO Add support for code objects with 0 or >1 segments.
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return (uint64_t)loaded_segments.front()->Size();
}
int64_t LoadedCodeObjectImpl::getDelta() const {
// TODO Add support for code objects with 0 segments.
assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
return getLoadBase() - loaded_segments.front()->VAddr();
}
hsa_executable_t AmdHsaCodeLoader::FindExecutable(uint64_t device_address)
{
hsa_executable_t execHandle = {0};
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
if (device_address == 0) {
return execHandle;
}
for (auto &exec : executables) {
if (exec != nullptr) {
uint64_t host_address = exec->FindHostAddress(device_address);
if (host_address != 0) {
return Executable::Handle(exec);
}
}
}
return execHandle;
}
uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address)
{
for (auto &obj : loaded_code_objects) {
assert(obj);
for (auto &seg : obj->LoadedSegments()) {
assert(seg);
uint64_t paddr = (uint64_t)(uintptr_t)seg->Address(seg->VAddr());
if (paddr <= device_address && device_address < paddr + seg->Size()) {
void *haddr = context_->SegmentHostAddress(
seg->ElfSegment(), seg->Agent(), seg->Ptr(), device_address - paddr);
return nullptr == haddr ? 0 : (uint64_t)(uintptr_t)haddr;
}
}
}
return 0;
}
void ExecutableImpl::EnableReadOnlyMode()
{
rw_lock_.ReaderLock();
}
void ExecutableImpl::DisableReadOnlyMode()
{
rw_lock_.ReaderUnlock();
}
#define HSAERRCHECK(hsc) \
if (hsc != HSA_STATUS_SUCCESS) { \
assert(false); \
return hsc; \
} \
hsa_status_t ExecutableImpl::GetInfo(
hsa_executable_info_t executable_info, void *value)
{
ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
assert(value);
switch (executable_info) {
case HSA_EXECUTABLE_INFO_PROFILE: {
*((hsa_profile_t*)value) = profile_;;
break;
}
case HSA_EXECUTABLE_INFO_STATE: {
*((hsa_executable_state_t*)value) = state_;
break;
}
case HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE: {
*((hsa_default_float_rounding_mode_t*)value) =
default_float_rounding_mode_;
break;
}
default: {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
return HSA_STATUS_SUCCESS;
}
static uint32_t NextCodeObjectNum()
{
static std::atomic_uint_fast32_t dumpN(1);
return dumpN++;
}
hsa_status_t ExecutableImpl::LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
const char *options,
hsa_loaded_code_object_t *loaded_code_object)
{
return LoadCodeObject(agent, code_object, 0, options, loaded_code_object);
}
hsa_status_t ExecutableImpl::LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
size_t code_object_size,
const char *options,
hsa_loaded_code_object_t *loaded_code_object)
{
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
}
LoaderOptions loaderOptions;
if (options && !loaderOptions.ParseOptions(options)) {
return HSA_STATUS_ERROR;
}
const char *options_append = getenv("LOADER_OPTIONS_APPEND");
if (options_append && !loaderOptions.ParseOptions(options_append)) {
return HSA_STATUS_ERROR;
}
typedef std::tuple<uint32_t, uint32_t, std::string> Substitute;
std::vector<Substitute> substitutes;
for (const std::string& s : loaderOptions.Substitute()->values()) {
std::string::size_type vi = s.find('=');
if (vi == std::string::npos) { return HSA_STATUS_ERROR; }
std::string value = s.substr(vi + 1);
std::string range = s.substr(0, vi);
std::string::size_type mi = range.find('-');
uint32_t n1 = UINT32_MAX, n2 = UINT32_MAX;
if (mi != std::string::npos) {
std::string s1, s2;
s1 = range.substr(0, mi - 1);
s2 = range.substr(mi + 1);
std::istringstream is1(s1); is1 >> n1;
std::istringstream is2(s2); is2 >> n2;
}
else {
std::istringstream is(range); is >> n1;
n2 = n1;
}
substitutes.push_back(std::make_tuple(n1, n2, value));
}
uint32_t codeNum = NextCodeObjectNum();
code.reset(new code::AmdHsaCode());
std::string substituteFileName;
for (const Substitute& ss : substitutes) {
if (codeNum >= std::get<0>(ss) && codeNum <= std::get<1>(ss)) {
substituteFileName = std::get<2>(ss);
break;
}
}
std::vector<char> buffer;
if (substituteFileName.empty()) {
if (!code->InitAsHandle(code_object)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
} else {
if (!ReadFileIntoBuffer(substituteFileName, buffer)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (!code->InitAsBuffer(&buffer[0], buffer.size())) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
}
if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpCode()->is_set()) {
if (!code->SaveToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "hsaco", codeNum))) {
// Ignore error.
}
}
if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpIsa()->is_set()) {
if (!code->PrintToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "isa", codeNum))) {
// Ignore error.
}
}
std::string codeIsa;
if (!code->GetIsa(codeIsa)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
uint32_t majorVersion, minorVersion;
if (!code->GetCodeObjectVersion(&majorVersion, &minorVersion)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (majorVersion != 1 && majorVersion != 2 && majorVersion != 3) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
if (agent.handle == 0 && majorVersion == 1) { return HSA_STATUS_ERROR_INVALID_AGENT; }
uint32_t codeHsailMajor;
uint32_t codeHsailMinor;
hsa_profile_t codeProfile;
hsa_machine_model_t codeMachineModel;
hsa_default_float_rounding_mode_t codeRoundingMode;
if (!code->GetNoteHsail(&codeHsailMajor, &codeHsailMinor, &codeProfile, &codeMachineModel, &codeRoundingMode)) {
codeProfile = profile_;
}
if (profile_ != codeProfile) {
return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
}
hsa_isa_t objectsIsa = context_->IsaFromName(codeIsa.c_str());
if (!objectsIsa.handle) { return HSA_STATUS_ERROR_INVALID_ISA_NAME; }
if (agent.handle != 0 && !context_->IsaSupportedByAgent(agent, objectsIsa)) {
return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
}
hsa_status_t status;
objects.push_back(new LoadedCodeObjectImpl(this, agent, code->ElfData(), code->ElfSize()));
loaded_code_objects.push_back((LoadedCodeObjectImpl*)objects.back());
status = LoadSegments(agent, code.get(), majorVersion);
if (status != HSA_STATUS_SUCCESS) return status;
for (size_t i = 0; i < code->SymbolCount(); ++i) {
if (majorVersion >= 2 &&
code->GetSymbol(i)->elfSym()->type() != STT_AMDGPU_HSA_KERNEL &&
code->GetSymbol(i)->elfSym()->binding() == STB_LOCAL)
continue;
status = LoadSymbol(agent, code->GetSymbol(i), majorVersion);
if (status != HSA_STATUS_SUCCESS) { return status; }
}
status = ApplyRelocations(agent, code.get());
if (status != HSA_STATUS_SUCCESS) { return status; }
code.reset();
if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpExec()->is_set()) {
if (!PrintToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "exec", codeNum))) {
// Ignore error.
}
}
if (nullptr != loaded_code_object) { *loaded_code_object = LoadedCodeObject::Handle(loaded_code_objects.back()); }
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadSegments(hsa_agent_t agent,
const code::AmdHsaCode *c,
uint32_t majorVersion) {
if (majorVersion < 2)
return LoadSegmentsV1(agent, c);
else
return LoadSegmentsV2(agent, c);
}
hsa_status_t ExecutableImpl::LoadSegmentsV1(hsa_agent_t agent,
const code::AmdHsaCode *c) {
hsa_status_t status = HSA_STATUS_SUCCESS;
for (size_t i = 0; i < c->DataSegmentCount(); ++i) {
status = LoadSegmentV1(agent, c->DataSegment(i));
if (status != HSA_STATUS_SUCCESS) return status;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadSegmentsV2(hsa_agent_t agent,
const code::AmdHsaCode *c) {
assert(c->Machine() == EM_AMDGPU && "Program code objects are not supported");
if (!c->DataSegmentCount()) return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
uint64_t vaddr = c->DataSegment(0)->vaddr();
uint64_t size = c->DataSegment(c->DataSegmentCount() - 1)->vaddr() +
c->DataSegment(c->DataSegmentCount() - 1)->memSize();
void *ptr = context_->SegmentAlloc(AMDGPU_HSA_SEGMENT_CODE_AGENT, agent, size,
AMD_ISA_ALIGN_BYTES, true);
if (!ptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
Segment *load_segment = new Segment(this, agent, AMDGPU_HSA_SEGMENT_CODE_AGENT,
ptr, size, vaddr, c->DataSegment(0)->offset());
if (!load_segment) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
hsa_status_t status = HSA_STATUS_SUCCESS;
for (size_t i = 0; i < c->DataSegmentCount(); ++i) {
status = LoadSegmentV2(c->DataSegment(i), load_segment);
if (status != HSA_STATUS_SUCCESS) return status;
}
objects.push_back(load_segment);
loaded_code_objects.back()->LoadedSegments().push_back(load_segment);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadSegmentV1(hsa_agent_t agent,
const code::Segment *s) {
assert(s->type() < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST);
if (s->memSize() == 0)
return HSA_STATUS_SUCCESS;
amdgpu_hsa_elf_segment_t segment = (amdgpu_hsa_elf_segment_t)(s->type() - PT_LOOS);
Segment *new_seg = nullptr;
bool need_alloc = true;
if (segment == AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM && nullptr != program_allocation_segment) {
new_seg = program_allocation_segment;
need_alloc = false;
}
if (need_alloc) {
void* ptr = context_->SegmentAlloc(segment, agent, s->memSize(), s->align(), true);
if (!ptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; }
new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr(), s->offset());
new_seg->Copy(s->vaddr(), s->data(), s->imageSize());
objects.push_back(new_seg);
if (segment == AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM) {
program_allocation_segment = new_seg;
}
}
assert(new_seg);
loaded_code_objects.back()->LoadedSegments().push_back(new_seg);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadSegmentV2(const code::Segment *data_segment,
loader::Segment *load_segment) {
assert(data_segment && load_segment);
load_segment->Copy(data_segment->vaddr(), data_segment->data(),
data_segment->imageSize());
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadSymbol(hsa_agent_t agent,
code::Symbol* sym,
uint32_t majorVersion)
{
if (sym->IsDeclaration()) {
return LoadDeclarationSymbol(agent, sym, majorVersion);
} else {
return LoadDefinitionSymbol(agent, sym, majorVersion);
}
}
namespace {
bool string_ends_with(const std::string &str, const std::string &suf) {
return str.size() >= suf.size() ? str.compare(str.size() - suf.size(), suf.size(), suf) == 0 : false;
}
}
hsa_status_t ExecutableImpl::LoadDefinitionSymbol(hsa_agent_t agent,
code::Symbol* sym,
uint32_t majorVersion)
{
bool isAgent = sym->IsAgent();
if (majorVersion >= 2) {
isAgent = agent.handle != 0;
}
if (isAgent) {
auto agent_symbol = agent_symbols_.find(std::make_pair(sym->Name(), agent));
if (agent_symbol != agent_symbols_.end()) {
// TODO(spec): this is not spec compliant.
return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
}
} else {
auto program_symbol = program_symbols_.find(sym->Name());
if (program_symbol != program_symbols_.end()) {
// TODO(spec): this is not spec compliant.
return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
}
}
uint64_t address = SymbolAddress(agent, sym);
SymbolImpl *symbol = nullptr;
if (string_ends_with(sym->GetSymbolName(), ".kd")) {
// V3.
llvm::amdhsa::kernel_descriptor_t kd;
sym->GetSection()->getData(sym->SectionOffset(), &kd, sizeof(kd));
uint32_t kernarg_segment_size = 0; // FIXME.
uint32_t kernarg_segment_alignment = 0; // FIXME.
uint32_t group_segment_size = kd.group_segment_fixed_size;
uint32_t private_segment_size = kd.private_segment_fixed_size;
bool is_dynamic_callstack = false;
uint64_t size = sym->Size();
KernelSymbol *kernel_symbol = new KernelSymbol(true,
sym->GetModuleName(),
sym->GetSymbolName(),
sym->Linkage(),
true, // sym->IsDefinition()
kernarg_segment_size,
kernarg_segment_alignment,
group_segment_size,
private_segment_size,
is_dynamic_callstack,
size,
64,
address);
symbol = kernel_symbol;
} else if (sym->IsVariableSymbol()) {
symbol = new VariableSymbol(true,
sym->GetModuleName(),
sym->GetSymbolName(),
sym->Linkage(),
true, // sym->IsDefinition()
sym->Allocation(),
sym->Segment(),
sym->Size(),
sym->Alignment(),
sym->IsConst(),
false,
address);
} else if (sym->IsKernelSymbol()) {
amd_kernel_code_t akc;
sym->GetSection()->getData(sym->SectionOffset(), &akc, sizeof(akc));
uint32_t kernarg_segment_size =
uint32_t(akc.kernarg_segment_byte_size);
uint32_t kernarg_segment_alignment =
uint32_t(1 << akc.kernarg_segment_alignment);
uint32_t group_segment_size =
uint32_t(akc.workgroup_group_segment_byte_size);
uint32_t private_segment_size =
uint32_t(akc.workitem_private_segment_byte_size);
bool is_dynamic_callstack =
AMD_HSA_BITS_GET(akc.kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false;
uint64_t size = sym->Size();
if (!size && sym->SectionOffset() < sym->GetSection()->size()) {
// ORCA Runtime relies on symbol size equal to size of kernel ISA. If symbol size is 0 in ELF,
// calculate end of segment - symbol value.
size = sym->GetSection()->size() - sym->SectionOffset();
}
KernelSymbol *kernel_symbol = new KernelSymbol(true,
sym->GetModuleName(),
sym->GetSymbolName(),
sym->Linkage(),
true, // sym->IsDefinition()
kernarg_segment_size,
kernarg_segment_alignment,
group_segment_size,
private_segment_size,
is_dynamic_callstack,
size,
256,
address);
kernel_symbol->debug_info.elf_raw = code->ElfData();
kernel_symbol->debug_info.elf_size = code->ElfSize();
kernel_symbol->debug_info.kernel_name = kernel_symbol->full_name.c_str();
kernel_symbol->debug_info.owning_segment = (void*)SymbolSegment(agent, sym)->Address(sym->GetSection()->addr());
symbol = kernel_symbol;
// \todo kzhuravl 10/15/15 This is a debugger backdoor: needs to be
// removed.
uint64_t target_address = sym->GetSection()->addr() + sym->SectionOffset() + ((size_t)(&((amd_kernel_code_t*)0)->runtime_loader_kernel_symbol));
uint64_t source_value = (uint64_t) (uintptr_t) &kernel_symbol->debug_info;
SymbolSegment(agent, sym)->Copy(target_address, &source_value, sizeof(source_value));
} else {
assert(!"Unexpected symbol type in LoadDefinitionSymbol");
return HSA_STATUS_ERROR;
}
assert(symbol);
if (isAgent) {
symbol->agent = agent;
agent_symbols_.insert(std::make_pair(std::make_pair(sym->Name(), agent), symbol));
} else {
program_symbols_.insert(std::make_pair(sym->Name(), symbol));
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::LoadDeclarationSymbol(hsa_agent_t agent,
code::Symbol* sym,
uint32_t majorVersion)
{
auto program_symbol = program_symbols_.find(sym->Name());
if (program_symbol == program_symbols_.end()) {
auto agent_symbol = agent_symbols_.find(std::make_pair(sym->Name(), agent));
if (agent_symbol == agent_symbols_.end()) {
// TODO(spec): this is not spec compliant.
return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
}
}
return HSA_STATUS_SUCCESS;
}
Segment* ExecutableImpl::VirtualAddressSegment(uint64_t vaddr)
{
for (auto &seg : loaded_code_objects.back()->LoadedSegments()) {
if (seg->IsAddressInSegment(vaddr)) {
return seg;
}
}
return 0;
}
uint64_t ExecutableImpl::SymbolAddress(hsa_agent_t agent, code::Symbol* sym)
{
code::Section* sec = sym->GetSection();
Segment* seg = SectionSegment(agent, sec);
return nullptr == seg ? 0 : (uint64_t) (uintptr_t) seg->Address(sym->VAddr());
}
uint64_t ExecutableImpl::SymbolAddress(hsa_agent_t agent, elf::Symbol* sym)
{
elf::Section* sec = sym->section();
Segment* seg = SectionSegment(agent, sec);
uint64_t vaddr = sec->addr() + sym->value();
return nullptr == seg ? 0 : (uint64_t) (uintptr_t) seg->Address(vaddr);
}
Segment* ExecutableImpl::SymbolSegment(hsa_agent_t agent, code::Symbol* sym)
{
return SectionSegment(agent, sym->GetSection());
}
Segment* ExecutableImpl::SectionSegment(hsa_agent_t agent, code::Section* sec)
{
for (Segment* seg : loaded_code_objects.back()->LoadedSegments()) {
if (seg->IsAddressInSegment(sec->addr())) {
return seg;
}
}
return 0;
}
hsa_status_t ExecutableImpl::ApplyRelocations(hsa_agent_t agent, amd::hsa::code::AmdHsaCode *c)
{
hsa_status_t status = HSA_STATUS_SUCCESS;
for (size_t i = 0; i < c->RelocationSectionCount(); ++i) {
if (c->GetRelocationSection(i)->targetSection()) {
status = ApplyStaticRelocationSection(agent, c->GetRelocationSection(i));
} else {
// Dynamic relocations are supported starting code object v2.1.
uint32_t majorVersion, minorVersion;
if (!c->GetCodeObjectVersion(&majorVersion, &minorVersion)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (majorVersion < 2) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (majorVersion == 2 && minorVersion < 1) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
status = ApplyDynamicRelocationSection(agent, c->GetRelocationSection(i));
}
if (status != HSA_STATUS_SUCCESS) { return status; }
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::ApplyStaticRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec)
{
// Skip link-time relocations (if any).
if (!(sec->targetSection()->flags() & SHF_ALLOC)) { return HSA_STATUS_SUCCESS; }
hsa_status_t status = HSA_STATUS_SUCCESS;
for (size_t i = 0; i < sec->relocationCount(); ++i) {
status = ApplyStaticRelocation(agent, sec->relocation(i));
if (status != HSA_STATUS_SUCCESS) { return status; }
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::ApplyStaticRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel)
{
hsa_status_t status = HSA_STATUS_SUCCESS;
amd::elf::Symbol* sym = rel->symbol();
code::RelocationSection* rsec = rel->section();
code::Section* sec = rsec->targetSection();
Segment* rseg = SectionSegment(agent, sec);
size_t reladdr = sec->addr() + rel->offset();
switch (rel->type()) {
case R_AMDGPU_32_LOW:
case R_AMDGPU_32_HIGH:
case R_AMDGPU_64:
{
uint64_t addr;
switch (sym->type()) {
case STT_OBJECT:
case STT_SECTION:
case STT_AMDGPU_HSA_KERNEL:
case STT_AMDGPU_HSA_INDIRECT_FUNCTION:
addr = SymbolAddress(agent, sym);
if (!addr) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
break;
case STT_COMMON: {
hsa_agent_t *sagent = &agent;
if (STA_AMDGPU_HSA_GLOBAL_PROGRAM == ELF64_ST_AMDGPU_ALLOCATION(sym->other())) {
sagent = nullptr;
}
SymbolImpl* esym = (SymbolImpl*) GetSymbolInternal(sym->name().c_str(), sagent);
if (!esym) { return HSA_STATUS_ERROR_VARIABLE_UNDEFINED; }
addr = esym->address;
break;
}
default:
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
addr += rel->addend();
uint32_t addr32 = 0;
switch (rel->type()) {
case R_AMDGPU_32_HIGH:
addr32 = uint32_t((addr >> 32) & 0xFFFFFFFF);
rseg->Copy(reladdr, &addr32, sizeof(addr32));
break;
case R_AMDGPU_32_LOW:
addr32 = uint32_t(addr & 0xFFFFFFFF);
rseg->Copy(reladdr, &addr32, sizeof(addr32));
break;
case R_AMDGPU_64:
rseg->Copy(reladdr, &addr, sizeof(addr));
break;
default:
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
break;
}
case R_AMDGPU_INIT_SAMPLER:
{
if (STT_AMDGPU_HSA_METADATA != sym->type() ||
SHT_PROGBITS != sym->section()->type() ||
!(sym->section()->flags() & SHF_MERGE)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
amdgpu_hsa_sampler_descriptor_t desc;
if (!sym->section()->getData(sym->value(), &desc, sizeof(desc))) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (AMDGPU_HSA_METADATA_KIND_INIT_SAMP != desc.kind) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
hsa_ext_sampler_descriptor_t hsa_sampler_descriptor;
hsa_sampler_descriptor.coordinate_mode =
hsa_ext_sampler_coordinate_mode_t(desc.coord);
hsa_sampler_descriptor.filter_mode =
hsa_ext_sampler_filter_mode_t(desc.filter);
hsa_sampler_descriptor.address_mode =
hsa_ext_sampler_addressing_mode_t(desc.addressing);
hsa_ext_sampler_t hsa_sampler = {0};
status = context_->SamplerCreate(agent, &hsa_sampler_descriptor, &hsa_sampler);
if (status != HSA_STATUS_SUCCESS) { return status; }
assert(hsa_sampler.handle);
rseg->Copy(reladdr, &hsa_sampler, sizeof(hsa_sampler));
break;
}
case R_AMDGPU_INIT_IMAGE:
{
if (STT_AMDGPU_HSA_METADATA != sym->type() ||
SHT_PROGBITS != sym->section()->type() ||
!(sym->section()->flags() & SHF_MERGE)) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
amdgpu_hsa_image_descriptor_t desc;
if (!sym->section()->getData(sym->value(), &desc, sizeof(desc))) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
if (AMDGPU_HSA_METADATA_KIND_INIT_ROIMG != desc.kind &&
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG != desc.kind &&
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG != desc.kind) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
hsa_ext_image_format_t hsa_image_format;
hsa_image_format.channel_order =
hsa_ext_image_channel_order_t(desc.channel_order);
hsa_image_format.channel_type =
hsa_ext_image_channel_type_t(desc.channel_type);
hsa_ext_image_descriptor_t hsa_image_descriptor;
hsa_image_descriptor.geometry =
hsa_ext_image_geometry_t(desc.geometry);
hsa_image_descriptor.width = size_t(desc.width);
hsa_image_descriptor.height = size_t(desc.height);
hsa_image_descriptor.depth = size_t(desc.depth);
hsa_image_descriptor.array_size = size_t(desc.array);
hsa_image_descriptor.format = hsa_image_format;
hsa_access_permission_t hsa_image_permission = HSA_ACCESS_PERMISSION_RO;
switch (desc.kind) {
case AMDGPU_HSA_METADATA_KIND_INIT_ROIMG: {
hsa_image_permission = HSA_ACCESS_PERMISSION_RO;
break;
}
case AMDGPU_HSA_METADATA_KIND_INIT_WOIMG: {
hsa_image_permission = HSA_ACCESS_PERMISSION_WO;
break;
}
case AMDGPU_HSA_METADATA_KIND_INIT_RWIMG: {
hsa_image_permission = HSA_ACCESS_PERMISSION_RW;
break;
}
default: {
assert(false);
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
}
hsa_ext_image_t hsa_image = {0};
status = context_->ImageCreate(agent, hsa_image_permission,
&hsa_image_descriptor,
NULL, // TODO: image_data?
&hsa_image);
if (status != HSA_STATUS_SUCCESS) { return status; }
rseg->Copy(reladdr, &hsa_image, sizeof(hsa_image));
break;
}
default:
// Ignore.
break;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::ApplyDynamicRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec)
{
hsa_status_t status = HSA_STATUS_SUCCESS;
for (size_t i = 0; i < sec->relocationCount(); ++i) {
status = ApplyDynamicRelocation(agent, sec->relocation(i));
if (status != HSA_STATUS_SUCCESS) { return status; }
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::ApplyDynamicRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel)
{
Segment* relSeg = VirtualAddressSegment(rel->offset());
uint64_t symAddr = 0;
switch (rel->symbol()->type()) {
case STT_OBJECT:
case STT_AMDGPU_HSA_KERNEL:
case STT_FUNC:
{
Segment* symSeg = VirtualAddressSegment(rel->symbol()->value());
symAddr = reinterpret_cast<uint64_t>(symSeg->Address(rel->symbol()->value()));
break;
}
// External symbols, they must be defined prior loading.
case STT_NOTYPE:
{
// TODO: Only agent allocation variables are supported in v2.1. How will
// we distinguish between program allocation and agent allocation
// variables?
auto agent_symbol = agent_symbols_.find(std::make_pair(rel->symbol()->name(), agent));
if (agent_symbol != agent_symbols_.end())
symAddr = agent_symbol->second->address;
break;
}
default:
// Only objects and kernels are supported in v2.1.
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
symAddr += rel->addend();
switch (rel->type()) {
case R_AMDGPU_32_HIGH:
{
if (!symAddr)
return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
uint32_t symAddr32 = uint32_t((symAddr >> 32) & 0xFFFFFFFF);
relSeg->Copy(rel->offset(), &symAddr32, sizeof(symAddr32));
break;
}
case R_AMDGPU_32_LOW:
{
if (!symAddr)
return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
uint32_t symAddr32 = uint32_t(symAddr & 0xFFFFFFFF);
relSeg->Copy(rel->offset(), &symAddr32, sizeof(symAddr32));
break;
}
case R_AMDGPU_64:
{
if (!symAddr)
return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
relSeg->Copy(rel->offset(), &symAddr, sizeof(symAddr));
break;
}
case R_AMDGPU_RELATIVE64:
{
int64_t baseDelta = reinterpret_cast<uint64_t>(relSeg->Address(0)) - relSeg->VAddr();
uint64_t relocatedAddr = baseDelta + rel->addend();
relSeg->Copy(rel->offset(), &relocatedAddr, sizeof(relocatedAddr));
break;
}
default:
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableImpl::Freeze(const char *options) {
amd::hsa::common::WriterLockGuard<amd::hsa::common::ReaderWriterLock> writer_lock(rw_lock_);
if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
}
for (auto &lco : loaded_code_objects) {
for (auto &ls : lco->LoadedSegments()) {
ls->Freeze();
}
}
state_ = HSA_EXECUTABLE_STATE_FROZEN;
return HSA_STATUS_SUCCESS;
}
void ExecutableImpl::Print(std::ostream& out)
{
out << "AMD Executable" << std::endl;
out << " Id: " << id()
<< " Profile: " << HsaProfileToString(profile())
<< std::endl << std::endl;
out << "Loaded Objects (total " << objects.size() << ")" << std::endl;
size_t i = 0;
for (ExecutableObject* o : objects) {
out << "Loaded Object " << i++ << ": ";
o->Print(out);
out << std::endl;
}
out << "End AMD Executable" << std::endl;
}
bool ExecutableImpl::PrintToFile(const std::string& filename)
{
std::ofstream out(filename);
if (out.fail()) { return false; }
Print(out);
return out.fail();
}
} // namespace loader
} // namespace hsa
} // namespace amd