Migrate amdgpu-windows-interop to rocm-systems (#808)

This commit is contained in:
Joseph Macaranas
2025-09-05 10:32:44 -04:00
committed by GitHub
szülő 3d9d35a1f8
commit 5ca7af2d30
261 fájl változott, egészen pontosan 86831 új sor hozzáadva és 2 régi sor törölve
@@ -0,0 +1,82 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
#-----------------------------------------------------------------------------
# amdhsacode library
#
# This file is expected to be included from top-level CMakeLists.txt.
#
# Dependencies:
# - Compiler definitions
# - elf library
# - hsail library
# - sp3 library
#
# Defines:
# - amdhsacode library and target include directories
set(USE_AMD_LIBELF "no" CACHE STRING "Do not use AMD LIBELF by default")
set(NO_SI_SP3 "no" CACHE STRING "Disable using SP3")
file(GLOB sources *.cpp *.hpp)
add_library(amdhsacode STATIC ${sources})
set_target_properties(amdhsacode PROPERTIES
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ON
)
if(CMAKE_CXX_COMPILER_ID MATCHES "^(GNU|(Apple)?Clang)$")
target_compile_options(amdhsacode PRIVATE
-Werror
-Wno-inconsistent-missing-override
)
endif()
set(LIBELF_LIB)
if(${USE_AMD_LIBELF} STREQUAL "yes")
target_compile_definitions(amdhsacode PRIVATE AMD_LIBELF)
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/common)
if(WIN32)
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/common/win32)
endif()
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/libelf)
set(LIBELF_LIB oclelf)
message(STATUS "Using AMD LIBELF")
else()
find_package(LibElf REQUIRED)
set(LIBELF_LIB elf)
message(STATUS "Using SYSTEM LIBELF")
endif()
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../hsail-tools/libHSAIL)
if(${NO_SI_SP3} STREQUAL "no")
target_include_directories(amdhsacode PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../Chip/sp3)
endif()
#if defined(GFX102_BUILD)
target_compile_definitions(amdhsacode PRIVATE GFX102_BUILD)
#endif
#if defined(GFX11_BUILD)
target_compile_definitions(amdhsacode PRIVATE GFX11_BUILD)
#endif
#if defined(GFX115_BUILD)
target_compile_definitions(amdhsacode PRIVATE GFX115_BUILD)
#endif
#if defined(GFX12_BUILD)
target_compile_definitions(amdhsacode PRIVATE GFX12_BUILD)
#endif
if(${NO_SI_SP3} STREQUAL "yes")
target_compile_definitions(amdhsacode PRIVATE NO_SI_SP3)
endif()
target_link_libraries(amdhsacode PRIVATE ${LIBELF_LIB})
if(${NO_SI_SP3} STREQUAL "no")
set(SC_BUILD_SP3 ON)
if(SC_BUILD_SP3)
target_link_libraries(amdhsacode PUBLIC sp3)
endif()
endif()
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
@@ -0,0 +1,195 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_CODE_UTIL_HPP_
#define AMD_HSA_CODE_UTIL_HPP_
#include <cassert>
#include <string>
#include <vector>
#include <iostream>
#ifdef _WIN32
#include <malloc.h>
#else // _WIN32
#include <cstdlib>
#endif // _WIN32
#include "amd_hsa_kernel_code.h"
#include "amd_hsa_elf.h"
#include "hsa.h"
#include "hsa_ext_finalize.h"
#define hsa_error(e) static_cast<hsa_status_t>(e)
#define release_assert(e) \
if (!(e)) { \
std::cerr << __FILE__ << ":"; \
std::cerr << __LINE__ << ":"; \
std::cerr << " Assertion `" << #e << "' failed." << std::endl; \
std::abort(); \
} \
namespace amd {
namespace hsa {
std::string HsaSymbolKindToString(hsa_symbol_kind_t kind);
std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage);
std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation);
std::string HsaVariableSegmentToString(hsa_variable_segment_t segment);
std::string HsaProfileToString(hsa_profile_t profile);
std::string HsaMachineModelToString(hsa_machine_model_t model);
std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode);
std::string AmdMachineKindToString(amd_machine_kind16_t machine);
std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode);
std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode);
std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id);
std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size);
std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions);
std::string AmdPowerTwoToString(amd_powertwo8_t p);
amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec);
bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec);
std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg);
std::string AmdPTLoadToString(uint64_t type);
void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc);
void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1);
void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2);
void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties);
void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives);
namespace code_options {
// Space between options (not at the beginning).
std::ostream& space(std::ostream& out);
// Control directive option without value.
struct control_directive {
const char *name;
control_directive(const char* name_) : name(name_) { }
};
std::ostream& operator<<(std::ostream& out, const control_directive& d);
// Exceptions mask string.
struct exceptions_mask {
uint16_t mask;
exceptions_mask(uint16_t mask_) : mask(mask_) { }
};
std::ostream& operator<<(std::ostream& out, const exceptions_mask& e);
// Control directives options.
struct control_directives {
const hsa_ext_control_directives_t& d;
control_directives(const hsa_ext_control_directives_t& d_) : d(d_) { }
};
std::ostream& operator<<(std::ostream& out, const control_directives& cd);
}
const char* hsaerr2str(hsa_status_t status);
bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer);
// Create new empty temporary file that will be deleted when closed.
int OpenTempFile(const char* prefix);
void CloseTempFile(int fd);
// Helper comment types for isa disassembler
enum DumpIsaCommentType {
COMMENT_AMD_KERNEL_CODE_T_BEGIN = 1,
COMMENT_AMD_KERNEL_CODE_T_END,
COMMENT_KERNEL_ISA_BEGIN,
};
// Callbacks to create helper comments for isa disassembler
const char * CommentTopCallBack(void *ctx, int type);
const char * CommentRightCallBack(void *ctx, int type);
// Parse disassembler instruction line to find offset
uint32_t ParseInstructionOffset(const std::string& instruction);
// Trim whitespaces from start of string
void ltrim(std::string &str);
// Helper function that allocates an aligned memory.
inline void*
alignedMalloc(size_t size, size_t alignment)
{
#if defined(_WIN32)
return ::_aligned_malloc(size, alignment);
#else
void * ptr = NULL;
alignment = (std::max)(alignment, sizeof(void*));
if (0 == ::posix_memalign(&ptr, alignment, size)) {
return ptr;
}
return NULL;
#endif
}
// Helper function that frees an aligned memory.
inline void
alignedFree(void *ptr)
{
#if defined(_WIN32)
::_aligned_free(ptr);
#else
free(ptr);
#endif
}
inline uint64_t alignUp(uint64_t num, uint64_t align)
{
assert(align);
assert((align & (align - 1)) == 0);
return (num + align - 1) & ~(align - 1);
}
inline uint32_t alignUp(uint32_t num, uint32_t align)
{
assert(align);
assert((align & (align - 1)) == 0);
return (num + align - 1) & ~(align - 1);
}
std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i = 0);
}
}
#endif // AMD_HSA_CODE_UTIL_HPP_
@@ -0,0 +1,94 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "amd_hsa_locks.hpp"
namespace amd {
namespace hsa {
namespace common {
void ReaderWriterLock::ReaderLock()
{
internal_lock_.lock();
while (0 < writers_count_) {
readers_condition_.wait(internal_lock_);
}
readers_count_ += 1;
internal_lock_.unlock();
}
void ReaderWriterLock::ReaderUnlock()
{
internal_lock_.lock();
readers_count_ -= 1;
if (0 == readers_count_ && 0 < writers_waiting_) {
writers_condition_.notify_one();
}
internal_lock_.unlock();
}
void ReaderWriterLock::WriterLock()
{
internal_lock_.lock();
writers_waiting_ += 1;
while (0 < readers_count_ || 0 < writers_count_) {
writers_condition_.wait(internal_lock_);
}
writers_count_ += 1;
writers_waiting_ -= 1;
internal_lock_.unlock();
}
void ReaderWriterLock::WriterUnlock()
{
internal_lock_.lock();
writers_count_ -= 1;
if (0 < writers_waiting_) {
writers_condition_.notify_one();
}
readers_condition_.notify_all();
internal_lock_.unlock();
}
} // namespace common
} // namespace hsa
} // namespace amd
@@ -0,0 +1,127 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_LOCKS_HPP
#define AMD_HSA_LOCKS_HPP
#include <condition_variable>
#include <cstddef>
#include <mutex>
namespace amd {
namespace hsa {
namespace common {
template<typename LockType>
class ReaderLockGuard final {
public:
explicit ReaderLockGuard(LockType &lock):
lock_(lock)
{
lock_.ReaderLock();
}
~ReaderLockGuard()
{
lock_.ReaderUnlock();
}
private:
ReaderLockGuard(const ReaderLockGuard&);
ReaderLockGuard& operator=(const ReaderLockGuard&);
LockType &lock_;
};
template<typename LockType>
class WriterLockGuard final {
public:
explicit WriterLockGuard(LockType &lock):
lock_(lock)
{
lock_.WriterLock();
}
~WriterLockGuard()
{
lock_.WriterUnlock();
}
private:
WriterLockGuard(const WriterLockGuard&);
WriterLockGuard& operator=(const WriterLockGuard&);
LockType &lock_;
};
class ReaderWriterLock final {
public:
ReaderWriterLock():
readers_count_(0), writers_count_(0), writers_waiting_(0) {}
~ReaderWriterLock() {}
void ReaderLock();
void ReaderUnlock();
void WriterLock();
void WriterUnlock();
private:
ReaderWriterLock(const ReaderWriterLock&);
ReaderWriterLock& operator=(const ReaderWriterLock&);
size_t readers_count_;
size_t writers_count_;
size_t writers_waiting_;
std::mutex internal_lock_;
std::condition_variable_any readers_condition_;
std::condition_variable_any writers_condition_;
};
} // namespace common
} // namespace hsa
} // namespace amd
#endif // AMD_HSA_LOCKS_HPP
@@ -0,0 +1,381 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "amd_options.hpp"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <list>
#include <string>
#include <cstddef>
namespace amd {
namespace options {
//===----------------------------------------------------------------------===//
// StringFactory. //
//===----------------------------------------------------------------------===//
std::string StringFactory::Flatten(const char **cstrs,
const uint32_t &cstrs_count,
const char &spacer) {
if (NULL == cstrs || 0 == cstrs_count) {
return std::string();
}
std::string flattened;
for (uint32_t i = 0; i < cstrs_count; ++i) {
if (NULL == cstrs[i]) {
return std::string();
}
flattened += cstrs[i];
if (i != (cstrs_count - 1)) {
flattened += spacer;
}
}
return flattened;
}
std::list<std::string> StringFactory::Tokenize(const char *cstr,
const char &delim) {
if (NULL == cstr) {
return std::list<std::string>();
}
const std::string str = cstr;
size_t start = 0;
size_t end = 0;
std::list<std::string> tokens;
while ((end = str.find(delim, start)) != std::string::npos) {
if (start != end) {
tokens.push_back(str.substr(start, end - start));
}
start = end + 1;
}
if (str.size() > start) {
tokens.push_back(str.substr(start));
}
return tokens;
}
std::string StringFactory::ToLower(const std::string& str) {
std::string lower(str.length(), ' ');
std::transform(str.begin(), str.end(), lower.begin(), ::tolower);
return lower;
}
std::string StringFactory::ToUpper(const std::string& str) {
std::string upper(str.length(), ' ');
std::transform(str.begin(), str.end(), upper.begin(), ::toupper);
return upper;
}
//===----------------------------------------------------------------------===//
// HelpPrinter, HelpStreambuf. //
//===----------------------------------------------------------------------===//
HelpStreambuf::HelpStreambuf(std::ostream& stream)
: basicStream_(&stream),
basicBuf_(stream.rdbuf()),
wrapWidth_(0),
indentSize_(0),
atLineStart_(true),
lineWidth_(0)
{
basicStream_->rdbuf(this);
}
HelpStreambuf::int_type HelpStreambuf::overflow(HelpStreambuf::int_type ch) {
if (atLineStart_ && ch != '\n') {
std::string indent(indentSize_, ' ');
basicBuf_->sputn(indent.data(), indent.size());
lineWidth_ = indentSize_;
atLineStart_ = false;
} else if (ch == '\n') {
atLineStart_ = true;
lineWidth_ = 0;
}
if (wrapWidth_ > 0 && lineWidth_ == wrapWidth_) {
basicBuf_->sputc('\n');
std::string indent(indentSize_, ' ');
basicBuf_->sputn(indent.data(), indent.size());
lineWidth_ = indentSize_;
atLineStart_ = false;
}
lineWidth_++;
return basicBuf_->sputc(ch);
}
HelpPrinter& HelpPrinter::PrintUsage(const std::string& usage) {
sbuf_.IndentSize(0);
sbuf_.WrapWidth(0);
Stream() << usage;
if (usage.length() < USAGE_WIDTH) {
Stream() << std::string(USAGE_WIDTH - usage.length(), ' ');
}
Stream() << std::string(PADDING_WIDTH, ' ');
return *this;
}
HelpPrinter& HelpPrinter::PrintDescription(const std::string& description) {
sbuf_.WrapWidth(USAGE_WIDTH + PADDING_WIDTH + DESCRIPTION_WIDTH);
sbuf_.IndentSize(USAGE_WIDTH + PADDING_WIDTH);
Stream() << description << std::endl;
sbuf_.IndentSize(0);
sbuf_.WrapWidth(0);
return *this;
}
//===----------------------------------------------------------------------===//
// ChoiceOptioin. //
//===----------------------------------------------------------------------===//
ChoiceOption::ChoiceOption(const std::string& name,
const std::vector<std::string>& choices,
const std::string& help,
std::ostream& error)
: OptionBase(name, help, error) {
for (const auto& choice: choices) {
choices_.insert(choice);
}
}
bool ChoiceOption::ProcessTokens(std::list<std::string> &tokens) {
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
if (2 != tokens.size()) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
tokens.pop_front();
if (0 == choices_.count(tokens.front())) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
is_set_ = true;
value_ = tokens.front();
tokens.pop_front();
return true;
}
void ChoiceOption::PrintHelp(HelpPrinter& printer) const {
std::string usage = "-" + name_ + "=[";
bool first = true;
for (const auto& choice: choices_) {
if (!first) {
usage += '|';
} else {
first = false;
}
usage += choice;
}
usage += "]";
printer.PrintUsage(usage).PrintDescription(help_);
}
//===----------------------------------------------------------------------===//
// PrefixOption. //
//===----------------------------------------------------------------------===//
bool PrefixOption::IsValid() const {
return (0 < name_.size()) && (name_.find(':') == std::string::npos);
}
std::string::size_type PrefixOption::FindPrefix(const std::string& token) const {
auto prefix = name_ + ':';
return token.find(prefix);
}
bool PrefixOption::Accept(const std::string& token) const {
return
(token.compare(0, name_.length(), name_) == 0) &&
token.length() > name_.length() &&
token[name_.length()] == ':';
}
bool PrefixOption::ProcessTokens(std::list<std::string> &tokens) {
assert(1 <= tokens.size());
assert(Accept(tokens.front()) && "option name is mismatched");
std::string value = tokens.front(); tokens.pop_front();
value = value.substr(name_.length() + 1);
for (const auto& token: tokens) {
value += '=';
value += token;
}
tokens.clear();
values_.push_back(value);
is_set_ = true;
return true;
}
void PrefixOption::PrintHelp(HelpPrinter& printer) const {
printer.PrintUsage("-" + name_ + ":[value]").PrintDescription(help_);
}
//===----------------------------------------------------------------------===//
// OptionParser. //
//===----------------------------------------------------------------------===//
std::vector<OptionBase*>::iterator
OptionParser::FindOption(const std::string& name) {
std::vector<OptionBase*>::iterator it = options_.begin();
std::vector<OptionBase*>::iterator end = options_.end();
for (; it != end; ++it) {
if ((*it)->Accept(name)) {
return it;
}
}
return end;
}
bool OptionParser::AddOption(OptionBase *option) {
if (NULL == option || !option->IsValid()) {
return false;
}
if (FindOption(option->name()) != options_.end()) {
return false;
}
options_.push_back(option);
return true;
}
const std::string& OptionParser::Unknown() const {
assert(collectUnknown_);
return unknownOptions_;
}
bool OptionParser::ParseOptions(const char *options) {
std::list<std::string> tokens_l1 = StringFactory::Tokenize(options, ' ');
if (0 == tokens_l1.size()) {
return true;
}
std::list<std::string>::iterator tokens_l1i = tokens_l1.begin();
while (tokens_l1i != tokens_l1.end()) {
if ('-' == tokens_l1i->at(0)) {
std::list<std::string>::iterator option_begin = tokens_l1i;
std::list<std::string> tokens_l2;
do {
tokens_l2.push_back(*tokens_l1i);
tokens_l1i++;
} while (tokens_l1i != tokens_l1.end() && '-' != tokens_l1i->at(0));
std::list<std::string>::iterator option_end = tokens_l1i;
tokens_l2.front().erase(0, 1);
if (1 == tokens_l2.size()) {
tokens_l2 = StringFactory::Tokenize(tokens_l2.front().c_str(), '=');
if (2 < tokens_l2.size()) {
if (collectUnknown_) {
unknownOptions_ += *tokens_l1i + " ";
continue;
} else {
error() << "error: invalid option format: \'"
<< tokens_l2.front() << '\'' << std::endl;
Reset();
return false;
}
}
}
auto find_status = FindOption(tokens_l2.front());
if (find_status == options_.end()) {
if (collectUnknown_) {
for (; option_begin != option_end; ++option_begin) {
unknownOptions_ += *option_begin + " ";
}
continue;
} else {
error() << "error: unknown option: \'"
<< tokens_l2.front() << '\'' << std::endl;
Reset();
return false;
}
}
if (!(*find_status)->ProcessTokens(tokens_l2)) {
Reset();
return false;
}
assert(0 == tokens_l2.size());
} else {
if (collectUnknown_) {
unknownOptions_ += *tokens_l1i + " ";
} else {
error() << "error: unknown option: \'"
<< *tokens_l1i << '\'' << std::endl;
Reset();
return false;
}
}
}
return true;
}
void OptionParser::PrintHelp(std::ostream& out, const std::string& addition) const {
HelpPrinter printer(out);
for (const auto& option: options_) {
option->PrintHelp(printer);
}
out << addition << std::endl;
}
void OptionParser::Reset() {
unknownOptions_.clear();
for (auto &option : options_) {
option->Reset();
}
}
} // namespace options
} // namespace amd
@@ -0,0 +1,476 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_OPTIONS_HPP
#define AMD_OPTIONS_HPP
#include <cstdlib>
#include <iostream>
#include <list>
#include <vector>
#include <cstdint>
#include <cassert>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
namespace amd {
namespace options {
//===----------------------------------------------------------------------===//
// StringFactory. //
//===----------------------------------------------------------------------===//
class StringFactory final {
public:
static std::string Flatten(const char **cstrs,
const uint32_t &cstrs_count,
const char &spacer = '\0');
static std::list<std::string> Tokenize(const char *cstr, const char &delim);
static std::string ToLower(const std::string& str);
static std::string ToUpper(const std::string& str);
};
//===----------------------------------------------------------------------===//
// HelpPrinter, HelpStreambuf. //
//===----------------------------------------------------------------------===//
class HelpStreambuf : public std::streambuf {
public:
explicit HelpStreambuf(std::ostream& stream);
virtual ~HelpStreambuf() {
basicStream_->rdbuf(basicBuf_);
}
void IndentSize(unsigned indent) {
assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
indentSize_ = indent;
}
void WrapWidth(unsigned wrap) {
assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
wrapWidth_ = wrap;
}
protected:
virtual int_type overflow(int_type ch) override;
private:
std::ostream* basicStream_;
std::streambuf* basicBuf_;
unsigned wrapWidth_;
unsigned indentSize_;
bool atLineStart_;
unsigned lineWidth_;
};
class HelpPrinter {
private:
static const unsigned USAGE_WIDTH = 30;
static const unsigned PADDING_WIDTH = 2;
static const unsigned DESCRIPTION_WIDTH = 50;
public:
HelpPrinter& PrintUsage(const std::string& usage);
HelpPrinter& PrintDescription(const std::string& description);
std::ostream& Stream() { return *out_; }
private:
explicit HelpPrinter(std::ostream& out = std::cout) : out_(&out), sbuf_(*out_) {}
/// @brief Not copy-constructible.
HelpPrinter(const HelpPrinter&);
/// @brief Not copy-assignable.
HelpPrinter& operator =(const HelpPrinter&);
friend class OptionParser;
std::ostream *out_;
HelpStreambuf sbuf_;
};
//===----------------------------------------------------------------------===//
// OptionBase. //
//===----------------------------------------------------------------------===//
class OptionBase {
public:
virtual ~OptionBase() {}
const std::string& name() const {
return name_;
}
const bool& is_set() const {
return is_set_;
}
virtual bool IsValid() const {
return 0 < name_.size();
}
protected:
explicit OptionBase(const std::string& name,
const std::string& help = "",
std::ostream &error = std::cerr)
: name_(name),
help_(help),
is_set_(false),
error_(&error) {}
virtual void PrintHelp(HelpPrinter& printer) const = 0;
virtual bool Accept(const std::string& name) const { return name_ == name; }
const std::string name_;
const std::string help_;
bool is_set_;
std::ostream &error() const { return *error_; }
private:
/// @brief Not copy-constructible.
OptionBase(const OptionBase &ob);
/// @brief Not copy-assignable.
OptionBase& operator=(const OptionBase &ob);
void Reset() {
is_set_ = false;
}
virtual bool ProcessTokens(std::list<std::string> &tokens) = 0;
friend class OptionParser;
mutable std::ostream *error_;
};
//===----------------------------------------------------------------------===//
// Option<T>. //
//===----------------------------------------------------------------------===//
template<typename T>
class Option final: public OptionBase {
public:
explicit Option(const std::string& name,
const std::string& help = "",
std::ostream& error = std::cerr):
OptionBase(name, help, error) {}
~Option() {}
const std::list<T>& values() const {
return values_;
}
protected:
virtual void PrintHelp(HelpPrinter& printer) const override;
private:
/// @brief Not copy-constructible.
Option(const Option &o);
/// @brief Not copy-assignable.
Option& operator=(const Option &o);
bool ProcessTokens(std::list<std::string> &tokens);
std::list<T> values_;
};
template<typename T>
bool Option<T>::ProcessTokens(std::list<std::string> &tokens) {
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
if (2 > tokens.size()) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
is_set_ = true;
tokens.pop_front();
while (!tokens.empty()) {
std::istringstream token_stream(tokens.front());
if (!token_stream.good()) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
T value;
token_stream >> value;
values_.push_back(value);
tokens.pop_front();
}
return true;
}
template<typename T>
void Option<T>::PrintHelp(HelpPrinter& printer) const {
printer.PrintUsage("-" + name_ + " [" + StringFactory::ToUpper(name_) + "s]")
.PrintDescription(help_);
}
//===----------------------------------------------------------------------===//
// ValueOption<T>. //
//===----------------------------------------------------------------------===//
template<typename T>
class ValueOption final: public OptionBase {
public:
explicit ValueOption(const std::string& name,
const std::string& help = "",
std::ostream& error = std::cerr):
OptionBase(name, help, error) {}
~ValueOption() {}
const T& value() const {
return value_;
}
protected:
void PrintHelp(HelpPrinter& printer) const override;
private:
/// @brief Not copy-constructible.
ValueOption(const ValueOption &o);
/// @brief Not copy-assignable.
ValueOption& operator=(const ValueOption &o);
bool ProcessTokens(std::list<std::string> &tokens) override;
T value_;
};
template<typename T>
bool ValueOption<T>::ProcessTokens(std::list<std::string> &tokens) {
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
if (2 != tokens.size()) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
is_set_ = true;
tokens.pop_front();
std::istringstream token_stream(tokens.front());
if (!token_stream.good()) {
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
return false;
}
token_stream >> value_;
tokens.pop_front();
return true;
}
template<typename T>
void ValueOption<T>::PrintHelp(HelpPrinter& printer) const {
printer.PrintUsage("-" + name_ + "=[VAL]")
.PrintDescription(help_);
}
//===----------------------------------------------------------------------===//
// ChoiceOptioin. //
//===----------------------------------------------------------------------===//
class ChoiceOption final: public OptionBase {
public:
ChoiceOption(const std::string& name,
const std::vector<std::string>& choices,
const std::string& help = "",
std::ostream& error = std::cerr);
~ChoiceOption() {}
const std::string& value() const {
return value_;
}
protected:
void PrintHelp(HelpPrinter& printer) const override;
private:
/// @brief Not copy-constructible.
ChoiceOption(const ChoiceOption&);
/// @brief Not copy-assignable.
ChoiceOption& operator =(const ChoiceOption&);
bool ProcessTokens(std::list<std::string> &tokens) override;
std::unordered_set<std::string> choices_;
std::string value_;
};
//===----------------------------------------------------------------------===//
// Option<void>. //
//===----------------------------------------------------------------------===//
class NoArgOption final: public OptionBase {
public:
explicit NoArgOption(const std::string& name,
const std::string& help = "",
std::ostream& error = std::cerr):
OptionBase(name, help, error) {}
~NoArgOption() {}
protected:
void PrintHelp(HelpPrinter& printer) const override {
printer.PrintUsage("-" + name_).PrintDescription(help_);
}
private:
/// @brief Not copy-constructible.
NoArgOption(const NoArgOption &o);
/// @brief Not copy-assignable.
NoArgOption& operator=(const NoArgOption &o);
bool ProcessTokens(std::list<std::string> &tokens) override {
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
if (1 == tokens.size()) {
tokens.pop_front();
is_set_ = true;
return true;
} else if (2 == tokens.size()) {
tokens.pop_front();
if (tokens.front() == "1") {
is_set_ = true;
tokens.pop_front();
return true;
} else if (tokens.front() == "0") {
is_set_ = false;
tokens.pop_front();
return true;
}
}
error() << "error: invalid option: '" << name_ << "'" << std::endl;
return false;
}
};
//===----------------------------------------------------------------------===//
// PrefixOption. //
//===----------------------------------------------------------------------===//
class PrefixOption final: public OptionBase {
public:
PrefixOption(const std::string& prefix,
const std::string& help = "",
std::ostream& error = std::cerr)
: OptionBase(prefix, help, error) {}
~PrefixOption() {}
const std::vector<std::string>& values() const {
return values_;
}
bool IsValid() const override;
protected:
void PrintHelp(HelpPrinter& printer) const override;
bool Accept(const std::string& token) const override;
private:
/// @brief Not copy-constructible.
PrefixOption(const PrefixOption&);
/// @brief Not copy-assignable.
PrefixOption& operator =(const PrefixOption&);
bool ProcessTokens(std::list<std::string> &tokens);
std::string::size_type FindPrefix(const std::string& token) const;
std::vector<std::string> values_;
};
//===----------------------------------------------------------------------===//
// OptionParser. //
//===----------------------------------------------------------------------===//
class OptionParser final {
public:
explicit OptionParser(bool collectUnknown = false, std::ostream& error = std::cerr)
: collectUnknown_(collectUnknown),
error_(&error) {}
~OptionParser() {}
bool AddOption(OptionBase *option);
bool ParseOptions(const char *options);
const std::string& Unknown() const;
void CollectUnknown(bool b) { collectUnknown_ = b; }
void PrintHelp(std::ostream& out, const std::string& addition = "") const;
void Reset();
private:
/// @brief Not copy-constructible.
OptionParser(const OptionParser &op);
/// @brief Not copy-assignable.
OptionParser& operator=(const OptionParser &op);
std::ostream& error() { return *error_; }
std::vector<OptionBase*>::iterator FindOption(const std::string& name);
std::vector<OptionBase*> options_;
std::string unknownOptions_;
bool collectUnknown_;
std::ostream *error_;
};
} // namespace options
} // namespace amd
#endif // AMD_OPTIONS_HPP
@@ -0,0 +1,263 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#include <cstddef>
#include <cstdint>
// Gets offset of specified member in specified type.
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
#endif // offsetof
// Creates enumeration entries used for packing bits into integers. Enumeration
// entries include bit shift amount, bit width, and bit mask.
#ifndef AMDHSA_BITS_ENUM_ENTRY
#define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
NAME ## _SHIFT = (SHIFT), \
NAME ## _WIDTH = (WIDTH), \
NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
#endif // AMDHSA_BITS_ENUM_ENTRY
// Gets bits for specified bit mask from specified source.
#ifndef AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
#endif // AMDHSA_BITS_GET
// Sets bits for specified bit mask in specified destination.
#ifndef AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL) \
DST &= ~MSK; \
DST |= ((VAL << MSK ## _SHIFT) & MSK)
#endif // AMDHSA_BITS_SET
namespace llvm {
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
enum : uint8_t {
FLOAT_ROUND_MODE_NEAR_EVEN = 0,
FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
FLOAT_ROUND_MODE_ZERO = 3,
};
// Floating point denorm modes. Must match hardware definition.
enum : uint8_t {
FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
FLOAT_DENORM_MODE_FLUSH_DST = 1,
FLOAT_DENORM_MODE_FLUSH_SRC = 2,
FLOAT_DENORM_MODE_FLUSH_NONE = 3,
};
// System VGPR workitem IDs. Must match hardware definition.
enum : uint8_t {
SYSTEM_VGPR_WORKITEM_ID_X = 0,
SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
};
// Compute program resource register 1. Must match hardware definition.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
COMPUTE_PGM_RSRC1(PRIV, 20, 1),
COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1
// Compute program resource register 2. Must match hardware definition.
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
};
#undef COMPUTE_PGM_RSRC2
// Compute program resource register 3 for GFX90A+. Must match hardware
// definition.
#define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
};
#undef COMPUTE_PGM_RSRC3_GFX90A
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
};
#undef KERNEL_CODE_PROPERTY
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
uint32_t group_segment_fixed_size;
uint32_t private_segment_fixed_size;
uint32_t kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint16_t kernel_code_properties;
uint8_t reserved2[6];
};
enum : uint32_t {
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
KERNARG_SIZE_OFFSET = 8,
RESERVED0_OFFSET = 12,
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
RESERVED1_OFFSET = 24,
COMPUTE_PGM_RSRC3_OFFSET = 44,
COMPUTE_PGM_RSRC1_OFFSET = 48,
COMPUTE_PGM_RSRC2_OFFSET = 52,
KERNEL_CODE_PROPERTIES_OFFSET = 56,
RESERVED2_OFFSET = 58,
};
static_assert(
sizeof(kernel_descriptor_t) == 64,
"invalid size for kernel_descriptor_t");
static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
GROUP_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for group_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for private_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
KERNARG_SIZE_OFFSET,
"invalid offset for kernarg_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
"invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
"invalid offset for kernel_code_entry_byte_offset");
static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
"invalid offset for reserved1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
COMPUTE_PGM_RSRC3_OFFSET,
"invalid offset for compute_pgm_rsrc3");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
COMPUTE_PGM_RSRC1_OFFSET,
"invalid offset for compute_pgm_rsrc1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
COMPUTE_PGM_RSRC2_OFFSET,
"invalid offset for compute_pgm_rsrc2");
static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
KERNEL_CODE_PROPERTIES_OFFSET,
"invalid offset for kernel_code_properties");
static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
"invalid offset for reserved2");
} // end namespace amdhsa
} // end namespace llvm
#endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
@@ -0,0 +1,31 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
#-----------------------------------------------------------------------------
# loader library
#
# This file is expected to be included from top-level CMakeLists.txt.
#
# Dependencies:
# - Compiler definitions
# - amdhsacode library
#
# Defines:
# - amdhsaloader library and target include directories
file(GLOB sources *.cpp *.hpp)
add_library(amdhsaloader STATIC ${sources})
set_target_properties(amdhsaloader PROPERTIES
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
POSITION_INDEPENDENT_CODE ON
)
if(CMAKE_CXX_COMPILER_ID MATCHES "^(GNU|(Apple)?Clang)$")
target_compile_options(amdhsaloader PRIVATE
-Werror
-Wno-inconsistent-missing-override
)
endif()
target_include_directories(amdhsaloader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(amdhsaloader amdhsacode)
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
@@ -0,0 +1,686 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
#define HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
#include <array>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <libelf.h>
#include <limits.h>
#include <list>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <cstring>
#include "hsa.h"
#include "hsa_ext_image.h"
#include "amd_hsa_loader.hpp"
#include "amd_hsa_code.hpp"
#include "amd_hsa_kernel_code.h"
#include "amd_hsa_locks.hpp"
#if defined(_WIN32) || defined(_WIN64)
#if _WIN64
#define __WORDSIZE 64
#else
#define __WORDSIZE 32
#endif
#endif
#define __ELF_NATIVE_CLASS __WORDSIZE
/* We use this macro to refer to ELF types independent of the native wordsize.
`ElfW(TYPE)' is used in place of `Elf32_TYPE' or `Elf64_TYPE'. */
#define ElfW(type) _ElfW (Elf, __ELF_NATIVE_CLASS, type)
#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t)
#define _ElfW_1(e,w,t) e##w##t
/* Structure describing a loaded shared object. The `l_next' and `l_prev'
members form a chain of all the shared objects loaded at startup.
These data structures exist in space used by the run-time dynamic linker;
modifying them may have disastrous results. */
struct link_map
{
/* These first few members are part of the protocol with the debugger.
This is the same format used in SVR4. */
ElfW(Addr) l_addr; /* Difference between the address in the ELF
file and the addresses in memory. */
char *l_name; /* Absolute file name object was found in. */
ElfW(Dyn) *l_ld; /* Dynamic section of the shared object. */
struct link_map *l_next, *l_prev; /* Chain of loaded objects. */
};
/* The legacy rendezvous structure used by the run-time dynamic linker to
communicate details of shared object loading to the debugger. */
struct r_debug
{
/* Version number for this protocol. It should be greater than 0. */
int r_version;
struct link_map *r_map; /* Head of the chain of loaded objects. */
/* This is the address of a function internal to the run-time linker,
that will always be called when the linker begins to map in a
library or unmap it, and again when the mapping change is complete.
The debugger can set a breakpoint at this address if it wants to
notice shared object mapping changes. */
ElfW(Addr) r_brk;
enum RT
{
/* This state value describes the mapping change taking place when
the `r_brk' address is called. */
RT_CONSISTENT, /* Mapping change is complete. */
RT_ADD, /* Beginning to add a new object. */
RT_DELETE /* Beginning to remove an object mapping. */
} r_state;
ElfW(Addr) r_ldbase; /* Base address the linker is loaded at. */
};
/* This is the symbol of that structure provided by the dynamic linker. */
extern struct r_debug _r_debug;
namespace amd {
namespace hsa {
namespace loader {
class MemoryAddress;
class SymbolImpl;
class KernelSymbol;
class VariableSymbol;
class ExecutableImpl;
//===----------------------------------------------------------------------===//
// SymbolImpl. //
//===----------------------------------------------------------------------===//
typedef uint32_t symbol_attribute32_t;
class SymbolImpl: public Symbol {
public:
virtual ~SymbolImpl() {}
bool IsKernel() const {
return HSA_SYMBOL_KIND_KERNEL == kind;
}
bool IsVariable() const {
return HSA_SYMBOL_KIND_VARIABLE == kind;
}
bool is_loaded;
hsa_symbol_kind_t kind;
std::string module_name;
std::string symbol_name;
hsa_symbol_linkage_t linkage;
bool is_definition;
uint64_t address;
hsa_agent_t agent;
hsa_agent_t GetAgent() override {
return agent;
}
protected:
SymbolImpl(const bool &_is_loaded,
const hsa_symbol_kind_t &_kind,
const std::string &_module_name,
const std::string &_symbol_name,
const hsa_symbol_linkage_t &_linkage,
const bool &_is_definition,
const uint64_t &_address = 0)
: is_loaded(_is_loaded)
, kind(_kind)
, module_name(_module_name)
, symbol_name(_symbol_name)
, linkage(_linkage)
, is_definition(_is_definition)
, address(_address) {}
virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void* value) override;
private:
SymbolImpl(const SymbolImpl &s);
SymbolImpl& operator=(const SymbolImpl &s);
};
//===----------------------------------------------------------------------===//
// KernelSymbol. //
//===----------------------------------------------------------------------===//
class KernelSymbol final: public SymbolImpl {
public:
KernelSymbol(const bool &_is_loaded,
const std::string &_module_name,
const std::string &_symbol_name,
const hsa_symbol_linkage_t &_linkage,
const bool &_is_definition,
const uint32_t &_kernarg_segment_size,
const uint32_t &_kernarg_segment_alignment,
const uint32_t &_group_segment_size,
const uint32_t &_private_segment_size,
const bool &_is_dynamic_callstack,
const uint32_t &_size,
const uint32_t &_alignment,
const uint64_t &_address = 0)
: SymbolImpl(_is_loaded,
HSA_SYMBOL_KIND_KERNEL,
_module_name,
_symbol_name,
_linkage,
_is_definition,
_address)
, full_name(_module_name.empty() ? _symbol_name : _module_name + "::" + _symbol_name)
, kernarg_segment_size(_kernarg_segment_size)
, kernarg_segment_alignment(_kernarg_segment_alignment)
, group_segment_size(_group_segment_size)
, private_segment_size(_private_segment_size)
, is_dynamic_callstack(_is_dynamic_callstack)
, size(_size)
, alignment(_alignment) {}
~KernelSymbol() {}
bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);
std::string full_name;
uint32_t kernarg_segment_size;
uint32_t kernarg_segment_alignment;
uint32_t group_segment_size;
uint32_t private_segment_size;
bool is_dynamic_callstack;
uint32_t size;
uint32_t alignment;
amd_runtime_loader_debug_info_t debug_info;
private:
KernelSymbol(const KernelSymbol &ks);
KernelSymbol& operator=(const KernelSymbol &ks);
};
//===----------------------------------------------------------------------===//
// VariableSymbol. //
//===----------------------------------------------------------------------===//
class VariableSymbol final: public SymbolImpl {
public:
VariableSymbol(const bool &_is_loaded,
const std::string &_module_name,
const std::string &_symbol_name,
const hsa_symbol_linkage_t &_linkage,
const bool &_is_definition,
const hsa_variable_allocation_t &_allocation,
const hsa_variable_segment_t &_segment,
const uint32_t &_size,
const uint32_t &_alignment,
const bool &_is_constant,
const bool &_is_external = false,
const uint64_t &_address = 0)
: SymbolImpl(_is_loaded,
HSA_SYMBOL_KIND_VARIABLE,
_module_name,
_symbol_name,
_linkage,
_is_definition,
_address)
, allocation(_allocation)
, segment(_segment)
, size(_size)
, alignment(_alignment)
, is_constant(_is_constant)
, is_external(_is_external) {}
~VariableSymbol() {}
bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);
hsa_variable_allocation_t allocation;
hsa_variable_segment_t segment;
uint32_t size;
uint32_t alignment;
bool is_constant;
bool is_external;
private:
VariableSymbol(const VariableSymbol &vs);
VariableSymbol& operator=(const VariableSymbol &vs);
};
//===----------------------------------------------------------------------===//
// Logger. //
//===----------------------------------------------------------------------===//
class Logger final {
public:
Logger(std::ostream &Stream = std::cerr) : OutStream(Stream) {}
template <typename T>
Logger &operator<<(const T &Data) {
if (!IsLoggingEnabled())
return *this;
OutStream << Data;
std::stringstream ss;
ss << Data;
printf("=================== %s\n", ss.str().c_str());
return *this;
}
private:
Logger(const Logger &L);
Logger& operator=(const Logger &L);
bool IsLoggingEnabled() const {
const char *enable_logging = getenv("LOADER_ENABLE_LOGGING");
if (!enable_logging)
return false;
if (std::string(enable_logging) == "0")
return false;
return true;
}
std::ostream &OutStream;
};
//===----------------------------------------------------------------------===//
// Executable. //
//===----------------------------------------------------------------------===//
class ExecutableImpl;
class LoadedCodeObjectImpl;
class Segment;
class ExecutableObject {
protected:
ExecutableImpl *owner;
hsa_agent_t agent;
public:
ExecutableObject(ExecutableImpl *owner_, hsa_agent_t agent_)
: owner(owner_), agent(agent_) { }
ExecutableImpl* Owner() const { return owner; }
hsa_agent_t Agent() const { return agent; }
virtual void Print(std::ostream& out) = 0;
virtual void Destroy() = 0;
virtual ~ExecutableObject() { }
};
class LoadedCodeObjectImpl : public LoadedCodeObject, public ExecutableObject {
friend class AmdHsaCodeLoader;
private:
LoadedCodeObjectImpl(const LoadedCodeObjectImpl&);
LoadedCodeObjectImpl& operator=(const LoadedCodeObjectImpl&);
const void *elf_data;
const size_t elf_size;
std::vector<Segment*> loaded_segments;
public:
LoadedCodeObjectImpl(ExecutableImpl *owner_, hsa_agent_t agent_, const void *elf_data_, size_t elf_size_)
: ExecutableObject(owner_, agent_), elf_data(elf_data_), elf_size(elf_size_) {
memset(&r_debug_info, 0, sizeof(r_debug_info));
}
const void* ElfData() const { return elf_data; }
size_t ElfSize() const { return elf_size; }
std::vector<Segment*>& LoadedSegments() { return loaded_segments; }
bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) override;
hsa_status_t IterateLoadedSegments(
hsa_status_t (*callback)(
amd_loaded_segment_t loaded_segment,
void *data),
void *data) override;
void Print(std::ostream& out) override;
void Destroy() override {}
hsa_agent_t getAgent() const override;
hsa_executable_t getExecutable() const override;
uint64_t getElfData() const override;
uint64_t getElfSize() const override;
uint64_t getStorageOffset() const override;
uint64_t getLoadBase() const override;
uint64_t getLoadSize() const override;
int64_t getDelta() const override;
std::string getUri() const override;
link_map r_debug_info;
};
class Segment : public LoadedSegment, public ExecutableObject {
private:
amdgpu_hsa_elf_segment_t segment;
void *ptr;
size_t size;
uint64_t vaddr;
bool frozen;
size_t storage_offset;
public:
Segment(ExecutableImpl *owner_, hsa_agent_t agent_, amdgpu_hsa_elf_segment_t segment_, void* ptr_, size_t size_, uint64_t vaddr_, size_t storage_offset_)
: ExecutableObject(owner_, agent_), segment(segment_),
ptr(ptr_), size(size_), vaddr(vaddr_), frozen(false), storage_offset(storage_offset_) { }
amdgpu_hsa_elf_segment_t ElfSegment() const { return segment; }
void* Ptr() const { return ptr; }
size_t Size() const { return size; }
uint64_t VAddr() const { return vaddr; }
size_t StorageOffset() const { return storage_offset; }
bool GetInfo(amd_loaded_segment_info_t attribute, void *value) override;
uint64_t Offset(uint64_t addr); // Offset within segment. Used together with ptr with loader context functions.
void* Address(uint64_t addr); // Address in segment. Used for relocations and valid on agent.
bool Freeze();
bool IsAddressInSegment(uint64_t addr);
void Copy(uint64_t addr, const void* src, size_t size);
void Print(std::ostream& out) override;
void Destroy() override;
};
class Sampler : public ExecutableObject {
private:
hsa_ext_sampler_t samp;
public:
Sampler(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_sampler_t samp_)
: ExecutableObject(owner, agent), samp(samp_) { }
void Print(std::ostream& out) override;
void Destroy() override;
};
class Image : public ExecutableObject {
private:
hsa_ext_image_t img;
public:
Image(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_image_t img_)
: ExecutableObject(owner, agent), img(img_) { }
void Print(std::ostream& out) override;
void Destroy() override;
};
typedef std::string ProgramSymbol;
typedef std::unordered_map<ProgramSymbol, SymbolImpl*> ProgramSymbolMap;
typedef std::pair<std::string, hsa_agent_t> AgentSymbol;
struct ASC {
bool operator()(const AgentSymbol &las, const AgentSymbol &ras) const {
return las.first == ras.first && las.second.handle == ras.second.handle;
}
};
struct ASH {
size_t operator()(const AgentSymbol &as) const {
size_t h = std::hash<std::string>()(as.first);
size_t i = std::hash<uint64_t>()(as.second.handle);
return h ^ (i << 1);
}
};
typedef std::unordered_map<AgentSymbol, SymbolImpl*, ASH, ASC> AgentSymbolMap;
class ExecutableImpl final: public Executable {
friend class AmdHsaCodeLoader;
public:
const hsa_profile_t& profile() const {
return profile_;
}
const hsa_executable_state_t& state() const {
return state_;
}
ExecutableImpl(
const hsa_profile_t &_profile,
Context *context,
size_t id,
hsa_default_float_rounding_mode_t default_float_rounding_mode);
~ExecutableImpl();
hsa_status_t GetInfo(hsa_executable_info_t executable_info, void *value) override;
hsa_status_t DefineProgramExternalVariable(
const char *name, void *address) override;
hsa_status_t DefineAgentExternalVariable(
const char *name,
hsa_agent_t agent,
hsa_variable_segment_t segment,
void *address) override;
hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
const char *options,
hsa_loaded_code_object_t *loaded_code_object) override;
hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
size_t code_object_size,
const char *options,
hsa_loaded_code_object_t *loaded_code_object) override;
hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
const char *options,
const std::string &uri,
hsa_loaded_code_object_t *loaded_code_object) override;
hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
size_t code_object_size,
const char *options,
const std::string &uri,
hsa_loaded_code_object_t *loaded_code_object) override;
hsa_status_t Freeze(const char *options) override;
hsa_status_t Validate(uint32_t *result) override {
amd::hsa::common::ReaderLockGuard<amd::hsa::common::ReaderWriterLock> reader_lock(rw_lock_);
assert(result);
*result = 0;
return HSA_STATUS_SUCCESS;
}
/// @note needed for hsa v1.0.
/// @todo remove during loader refactoring.
bool IsProgramSymbol(const char *symbol_name) override;
Symbol* GetSymbol(
const char *symbol_name,
const hsa_agent_t *agent) override;
hsa_status_t IterateSymbols(
iterate_symbols_f callback, void *data) override;
/// @since hsa v1.1.
hsa_status_t IterateAgentSymbols(
hsa_agent_t agent,
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_agent_t agent,
hsa_executable_symbol_t symbol,
void *data),
void *data) override;
/// @since hsa v1.1.
hsa_status_t IterateProgramSymbols(
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_executable_symbol_t symbol,
void *data),
void *data) override;
hsa_status_t IterateLoadedCodeObjects(
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data) override;
size_t GetNumSegmentDescriptors() override;
size_t QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t total_num_segment_descriptors,
size_t first_empty_segment_descriptor) override;
uint64_t FindHostAddress(uint64_t device_address) override;
void EnableReadOnlyMode();
void DisableReadOnlyMode();
void Print(std::ostream& out) override;
bool PrintToFile(const std::string& filename) override;
Context* context() { return context_; }
size_t id() { return id_; }
private:
ExecutableImpl(const ExecutableImpl &e);
ExecutableImpl& operator=(const ExecutableImpl &e);
std::unique_ptr<amd::hsa::code::AmdHsaCode> code;
Symbol* GetSymbolInternal(
const char *symbol_name,
const hsa_agent_t *agent);
hsa_status_t LoadSegments(hsa_agent_t agent, const code::AmdHsaCode *c,
uint32_t majorVersion);
hsa_status_t LoadSegmentsV1(hsa_agent_t agent, const code::AmdHsaCode *c);
hsa_status_t LoadSegmentsV2(hsa_agent_t agent, const code::AmdHsaCode *c);
hsa_status_t LoadSegmentV1(hsa_agent_t agent, const code::Segment *s);
hsa_status_t LoadSegmentV2(const code::Segment *data_segment,
loader::Segment *load_segment);
hsa_status_t LoadSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
hsa_status_t LoadDefinitionSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
hsa_status_t LoadDeclarationSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
hsa_status_t ApplyRelocations(hsa_agent_t agent, amd::hsa::code::AmdHsaCode *c);
hsa_status_t ApplyStaticRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
hsa_status_t ApplyStaticRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);
hsa_status_t ApplyDynamicRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
hsa_status_t ApplyDynamicRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);
Segment* VirtualAddressSegment(uint64_t vaddr);
uint64_t SymbolAddress(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
uint64_t SymbolAddress(hsa_agent_t agent, amd::elf::Symbol* sym);
Segment* SymbolSegment(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
Segment* SectionSegment(hsa_agent_t agent, amd::hsa::code::Section* sec);
amd::hsa::common::ReaderWriterLock rw_lock_;
hsa_profile_t profile_;
Context *context_;
Logger logger_;
const size_t id_;
hsa_default_float_rounding_mode_t default_float_rounding_mode_;
hsa_executable_state_t state_;
ProgramSymbolMap program_symbols_;
AgentSymbolMap agent_symbols_;
std::vector<ExecutableObject*> objects;
Segment *program_allocation_segment;
std::vector<LoadedCodeObjectImpl*> loaded_code_objects;
};
class AmdHsaCodeLoader : public Loader {
private:
Context* context;
std::vector<Executable*> executables;
amd::hsa::common::ReaderWriterLock rw_lock_;
public:
AmdHsaCodeLoader(Context* context_)
: context(context_) { assert(context); }
Context* GetContext() const override { return context; }
Executable* CreateExecutable(
hsa_profile_t profile,
const char *options,
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override;
hsa_status_t FreezeExecutable(Executable *executable, const char *options) override;
void DestroyExecutable(Executable *executable) override;
hsa_status_t IterateExecutables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data) override;
hsa_status_t QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors) override;
hsa_executable_t FindExecutable(uint64_t device_address) override;
uint64_t FindHostAddress(uint64_t device_address) override;
void PrintHelp(std::ostream& out) override;
void EnableReadOnlyMode();
void DisableReadOnlyMode();
};
} // namespace loader
} // namespace hsa
} // namespace amd
#endif // HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
@@ -0,0 +1,307 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include <cstring>
#include <cassert>
#include "loaders.hpp"
namespace amd {
namespace hsa {
namespace loader {
// Helper function that allocates an aligned memory.
static inline void*
alignedMalloc(size_t size, size_t alignment)
{
#if defined(_WIN32)
return ::_aligned_malloc(size, alignment);
#else
void * ptr = NULL;
alignment = (std::max)(alignment, sizeof(void*));
if (0 == ::posix_memalign(&ptr, alignment, size)) {
return ptr;
}
return NULL;
#endif
}
// Helper function that frees an aligned memory.
static inline void
alignedFree(void *ptr)
{
#if defined(_WIN32)
::_aligned_free(ptr);
#else
free(ptr);
#endif
}
OfflineLoaderContext::OfflineLoaderContext()
: out(std::cout)
{
invalid.handle = 0;
gfx700.handle = 700;
gfx701.handle = 701;
gfx800.handle = 800;
gfx801.handle = 801;
gfx802.handle = 802;
gfx803.handle = 803;
gfx804.handle = 804;
gfx810.handle = 810;
gfx900.handle = 900;
gfx901.handle = 901;
gfx902.handle = 902;
gfx903.handle = 903;
gfx904.handle = 904;
gfx905.handle = 905;
gfx906.handle = 906;
gfx907.handle = 907;
gfx1000.handle = 1000;
gfx1001.handle = 1001;
gfx1010.handle = 1010;
gfx1011.handle = 1011;
gfx1012.handle = 1012;
gfx1030.handle = 1030;
gfx4000.handle = 4000;
#if defined(GFX11_BUILD)
gfx1100.handle = 1100;
gfx1101.handle = 1101;
gfx1102.handle = 1102;
gfx1103.handle = 1103;
gfx1150.handle = 1150;
gfx1151.handle = 1151;
#endif // GFX11_BUILD
}
hsa_isa_t OfflineLoaderContext::IsaFromName(const char *name)
{
std::string sname(name);
if (sname == "AMD:AMDGPU:7:0:0") {
return gfx700;
} else if (sname == "AMD:AMDGPU:7:0:1") {
return gfx701;
} else if (sname == "AMD:AMDGPU:8:0:0") {
return gfx800;
} else if (sname == "AMD:AMDGPU:8:0:1") {
return gfx801;
} else if (sname == "AMD:AMDGPU:8:0:2") {
return gfx802;
} else if (sname == "AMD:AMDGPU:8:0:3") {
return gfx803;
} else if (sname == "AMD:AMDGPU:8:0:4") {
return gfx804;
} else if (sname == "AMD:AMDGPU:8:1:0") {
return gfx810;
} else if (sname == "AMD:AMDGPU:9:0:0") {
return gfx900;
} else if (sname == "AMD:AMDGPU:9:0:1") {
return gfx901;
} else if (sname == "AMD:AMDGPU:9:0:2") {
return gfx902;
} else if (sname == "AMD:AMDGPU:9:0:3") {
return gfx903;
}
else if (sname == "AMD:AMDGPU:9:0:4") {
return gfx904;
} else if (sname == "AMD:AMDGPU:9:0:5") {
return gfx905;
}
else if (sname == "AMD:AMDGPU:9:0:6") {
return gfx906;
} else if (sname == "AMD:AMDGPU:9:0:7") {
return gfx907;
}
else if (sname == "AMD:AMDGPU:10:0:0") {
return gfx1000;
} else if (sname == "AMD:AMDGPU:10:0:1") {
return gfx1001;
} else if (sname == "AMD:AMDGPU:10:1:0") {
return gfx1010;
} else if (sname == "AMD:AMDGPU:10:1:1") {
return gfx1011;
} else if (sname == "AMD:AMDGPU:10:1:2") {
return gfx1012;
}
else if (sname == "AMD:AMDGPU:10:3:0") {
return gfx1030;
}
else if (sname == "AMD:AMDGPU:40:0:0") {
return gfx4000;
}
#if defined(GFX11_BUILD)
else if (sname == "AMD:AMDGPU:11:0:0") {
return gfx1100;
} else if (sname == "AMD:AMDGPU:11:0:1") {
return gfx1101;
} else if (sname == "AMD:AMDGPU:11:0:2") {
return gfx1102;
} else if (sname == "AMD:AMDGPU:11:0:3") {
return gfx1103;
} else if (sname == "AMD:AMDGPU:11:5:0") {
return gfx1150;
} else if (sname == "AMD:AMDGPU:11:5:1") {
return gfx1151;
}
#endif // GFX11_BUILD
assert(0);
return invalid;
}
bool OfflineLoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
{
return true;
}
void* OfflineLoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero)
{
void* ptr = alignedMalloc(size, align);
if (zero) { memset(ptr, 0, size); }
out << "SegmentAlloc: " << segment << ": " << "size=" << size << " align=" << align << " zero=" << zero << " result=" << ptr << std::endl;
pointers.insert(ptr);
return ptr;
}
bool OfflineLoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size)
{
out << "SegmentCopy: " << segment << ": " << "dst=" << dst << " offset=" << offset << " src=" << src << " size=" << size << std::endl;
if (!dst || !src || dst == src) {
return false;
}
if (0 == size) {
return true;
}
memcpy((char *) dst + offset, src, size);
return true;
}
void OfflineLoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
out << "SegmentFree: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
pointers.erase(seg);
alignedFree(seg);
}
void* OfflineLoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
out << "SegmentAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
return (char*) seg + offset;
}
void* OfflineLoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
{
out << "SegmentHostAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
return (char*) seg + offset;
}
bool OfflineLoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
{
out << "SegmentFreeze: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
return true;
}
bool OfflineLoaderContext::ImageExtensionSupported()
{
return true;
}
hsa_status_t OfflineLoaderContext::ImageCreate(
hsa_agent_t agent,
hsa_access_permission_t image_permission,
const hsa_ext_image_descriptor_t *image_descriptor,
const void *image_data,
hsa_ext_image_t *image_handle)
{
void* ptr = alignedMalloc(256, 8);
out << "ImageCreate" << ":" <<
" permission=" << image_permission <<
" geometry=" << image_descriptor->geometry <<
" width=" << image_descriptor->width <<
" height=" << image_descriptor->height <<
" depth=" << image_descriptor->depth <<
" array_size=" << image_descriptor->array_size <<
" channel_type=" << image_descriptor->format.channel_type <<
" channel_order=" << image_descriptor->format.channel_order<<
" data=" << image_data <<
std::endl;
pointers.insert(ptr);
image_handle->handle = reinterpret_cast<uint64_t>(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::ImageDestroy(
hsa_agent_t agent, hsa_ext_image_t image_handle)
{
void* ptr = reinterpret_cast<void*>(image_handle.handle);
pointers.erase(ptr);
alignedFree(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::SamplerCreate(
hsa_agent_t agent,
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
hsa_ext_sampler_t *sampler_handle)
{
void* ptr = alignedMalloc(256, 8);
out << "SamplerCreate" << ":" <<
" coordinate_mode=" << sampler_descriptor->coordinate_mode <<
" filter_mode=" << sampler_descriptor->filter_mode <<
" address_mode=" << sampler_descriptor->address_mode <<
std::endl;
pointers.insert(ptr);
sampler_handle->handle = reinterpret_cast<uint64_t>(ptr);
return HSA_STATUS_SUCCESS;
}
hsa_status_t OfflineLoaderContext::SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle)
{
void* ptr = reinterpret_cast<void*>(sampler_handle.handle);
pointers.erase(ptr);
alignedFree(ptr);
return HSA_STATUS_SUCCESS;
}
}
}
}
@@ -0,0 +1,114 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef LOADERS_HPP_
#define LOADERS_HPP_
#include "amd_hsa_loader.hpp"
#include <set>
#include <iostream>
namespace amd {
namespace hsa {
namespace loader {
class OfflineLoaderContext : public amd::hsa::loader::Context {
private:
hsa_isa_t invalid;
hsa_isa_t gfx700, gfx701, gfx800, gfx801, gfx802, gfx803, gfx804, gfx810;
hsa_isa_t gfx900, gfx901, gfx902, gfx903;
hsa_isa_t gfx904, gfx905;
hsa_isa_t gfx906, gfx907;
hsa_isa_t gfx1000, gfx1001, gfx1010, gfx1011, gfx1012;
hsa_isa_t gfx1030;
hsa_isa_t gfx4000;
#if defined(GFX11_BUILD)
hsa_isa_t gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151;
#endif // GFX11_BUILD
std::ostream& out;
typedef std::set<void*> PointerSet;
PointerSet pointers;
public:
OfflineLoaderContext();
hsa_isa_t IsaFromName(const char *name) override;
bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) override;
void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override;
bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override;
void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override;
void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override;
bool ImageExtensionSupported() override;
hsa_status_t ImageCreate(
hsa_agent_t agent,
hsa_access_permission_t image_permission,
const hsa_ext_image_descriptor_t *image_descriptor,
const void *image_data,
hsa_ext_image_t *image_handle) override;
hsa_status_t ImageDestroy(
hsa_agent_t agent, hsa_ext_image_t image_handle) override;
hsa_status_t SamplerCreate(
hsa_agent_t agent,
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
hsa_ext_sampler_t *sampler_handle) override;
hsa_status_t SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override;
};
}
}
}
#endif // LOADERS_HPP_
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
@@ -0,0 +1,266 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
/******************************************************************************
* University of Illinois / NCSA
* Open Source License
*
* Copyright(c) 2011 - 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
* Advanced Micro Devices, Inc.
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files(the "Software"), to deal
* with the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and /
* or sell copies of the Software, and to permit persons to whom the Software
* is furnished to do so, subject to the following conditions:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimers in the documentation
* and / or other materials provided with the distribution.
*
* Neither the names of Advanced Micro Devices, Inc, nor the
mes of its
* contributors may be used to endorse or promote products derived from this
* Software without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
* THE SOFTWARE.
******************************************************************************/
#ifndef AMD_ELF_IMAGE_HPP_
#define AMD_ELF_IMAGE_HPP_
#include <string>
#include <sstream>
#include <vector>
#include <memory>
namespace amd {
namespace elf {
class Symbol;
class SymbolTable;
class Section;
class RelocationSection;
class Segment {
public:
virtual ~Segment() { }
virtual uint64_t type() const = 0;
virtual uint64_t memSize() const = 0;
virtual uint64_t align() const = 0;
virtual uint64_t imageSize() const = 0;
virtual uint64_t vaddr() const = 0;
virtual uint64_t flags() const = 0;
virtual uint64_t offset() const = 0;
virtual const char* data() const = 0;
virtual uint16_t getSegmentIndex() = 0;
virtual bool updateAddSection(Section *section) = 0;
};
class Section {
public:
virtual ~Section() { }
virtual uint16_t getSectionIndex() const = 0;
virtual uint32_t type() const = 0;
virtual std::string Name() const = 0;
virtual uint64_t offset() const = 0;
virtual uint64_t addr() const = 0;
virtual bool updateAddr(uint64_t addr) = 0;
virtual uint64_t addralign() const = 0;
virtual uint64_t flags() const = 0;
virtual uint64_t size() const = 0;
virtual uint64_t nextDataOffset(uint64_t align) const = 0;
virtual uint64_t addData(const void *src, uint64_t size, uint64_t align) = 0;
virtual bool getData(uint64_t offset, void* dest, uint64_t size) = 0;
virtual Segment* segment() = 0;
virtual RelocationSection* asRelocationSection() = 0;
virtual bool hasRelocationSection() const = 0;
virtual RelocationSection* relocationSection(SymbolTable* symtab = 0) = 0;
virtual bool setMemSize(uint64_t s) = 0;
virtual uint64_t memSize() const = 0;
virtual bool setAlign(uint64_t a) = 0;
virtual uint64_t memAlign() const = 0;
};
class Relocation {
public:
virtual ~Relocation() { }
virtual RelocationSection* section() = 0;
virtual uint32_t type() = 0;
virtual uint32_t symbolIndex() = 0;
virtual Symbol* symbol() = 0;
virtual uint64_t offset() = 0;
virtual int64_t addend() = 0;
};
class RelocationSection : public virtual Section {
public:
virtual Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) = 0;
virtual size_t relocationCount() const = 0;
virtual Relocation* relocation(size_t i) = 0;
virtual Section* targetSection() = 0;
};
class StringTable : public virtual Section {
public:
virtual const char* addString(const std::string& s) = 0;
virtual size_t addString1(const std::string& s) = 0;
virtual const char* getString(size_t ndx) = 0;
virtual size_t getStringIndex(const char* name) = 0;
};
class Symbol {
public:
virtual ~Symbol() { }
virtual uint32_t index() = 0;
virtual uint32_t type() = 0;
virtual uint32_t binding() = 0;
virtual uint64_t size() = 0;
virtual uint64_t value() = 0;
virtual unsigned char other() = 0;
virtual std::string name() = 0;
virtual Section* section() = 0;
virtual void setValue(uint64_t value) = 0;
virtual void setSize(uint64_t size) = 0;
};
class SymbolTable : public virtual Section {
public:
virtual Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) = 0;
virtual size_t symbolCount() = 0;
virtual Symbol* symbol(size_t i) = 0;
};
class NoteSection : public virtual Section {
public:
virtual bool addNote(const std::string& name, uint32_t type, const void* desc = 0, uint32_t desc_size = 0) = 0;
virtual bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) = 0;
};
class Image {
public:
virtual ~Image() { }
virtual bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) = 0;
virtual bool loadFromFile(const std::string& filename) = 0;
virtual bool saveToFile(const std::string& filename) = 0;
virtual bool initFromBuffer(const void* buffer, size_t size) = 0;
virtual bool initAsBuffer(const void* buffer, size_t size) = 0;
virtual bool writeTo(const std::string& filename) = 0;
virtual bool copyToBuffer(void** buf, size_t* size = 0) = 0; // Copy to new buffer allocated with malloc
virtual bool copyToBuffer(void* buf, size_t size) = 0; // Copy to existing buffer of given size.
virtual const char* data() = 0;
virtual uint64_t size() = 0;
virtual uint16_t Machine() = 0;
virtual uint16_t Type() = 0;
virtual uint32_t EFlags() = 0;
virtual uint32_t ABIVersion() = 0;
virtual uint32_t EClass() = 0;
virtual uint32_t OsAbi() = 0;
std::string output() { return out.str(); }
virtual bool Freeze() = 0;
virtual bool Validate() = 0;
virtual StringTable* shstrtab() = 0;
virtual StringTable* strtab() = 0;
virtual SymbolTable* symtab() = 0;
virtual SymbolTable* getSymtab(uint16_t index) = 0;
virtual StringTable* addStringTable(const std::string& name) = 0;
virtual StringTable* getStringTable(uint16_t index) = 0;
virtual SymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) = 0;
virtual size_t segmentCount() = 0;
virtual Segment* segment(size_t i) = 0;
virtual Segment* segmentByVAddr(uint64_t vaddr) = 0;
virtual size_t sectionCount() = 0;
virtual Section* section(size_t i) = 0;
virtual Section* sectionByVAddr(uint64_t vaddr) = 0;
virtual NoteSection* note() = 0;
virtual NoteSection* addNoteSection(const std::string& name) = 0;
virtual Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) = 0;
virtual bool addSegments() = 0;
virtual Section* addSection(const std::string &name,
uint32_t type,
uint64_t flags = 0,
uint64_t entsize = 0,
Segment* segment = 0) = 0;
virtual RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) = 0;
protected:
std::ostringstream out;
};
Image* NewElf32Image();
Image* NewElf64Image();
uint64_t ElfSize(const void* buffer);
std::string GetNoteString(uint32_t s_size, const char* s);
}
}
#endif // AMD_ELF_IMAGE_HPP_
@@ -0,0 +1,432 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
/******************************************************************************
* University of Illinois / NCSA
* Open Source License
*
* Copyright(c) 2011 - 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
* Advanced Micro Devices, Inc.
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files(the "Software"), to deal
* with the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and /
* or sell copies of the Software, and to permit persons to whom the Software
* is furnished to do so, subject to the following conditions:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimers in the documentation
* and / or other materials provided with the distribution.
*
* Neither the names of Advanced Micro Devices, Inc, nor the
mes of its
* contributors may be used to endorse or promote products derived from this
* Software without specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
* THE SOFTWARE.
******************************************************************************/
#ifndef AMD_HSA_CODE_HPP_
#define AMD_HSA_CODE_HPP_
#include "amd_elf_image.hpp"
#include "amd_hsa_elf.h"
#include "amd_hsa_kernel_code.h"
#include "hsa.h"
#include "hsa_ext_finalize.h"
#include <memory>
#include <sstream>
#include <cassert>
#include <unordered_map>
namespace amd {
namespace hsa {
namespace common {
template<uint64_t signature>
class Signed {
public:
static const uint64_t CT_SIGNATURE;
const uint64_t RT_SIGNATURE;
protected:
Signed(): RT_SIGNATURE(signature) {}
virtual ~Signed() {}
};
template<uint64_t signature>
const uint64_t Signed<signature>::CT_SIGNATURE = signature;
bool IsAccessibleMemoryAddress(uint64_t address);
template<typename class_type, typename member_type>
size_t OffsetOf(member_type class_type::*member)
{
return (char*)&((class_type*)nullptr->*member) - (char*)nullptr;
}
template<typename class_type>
class_type* ObjectAt(uint64_t address)
{
if (!IsAccessibleMemoryAddress(address)) {
return nullptr;
}
const uint64_t *rt_signature =
(const uint64_t*)(address + OffsetOf(&class_type::RT_SIGNATURE));
if (nullptr == rt_signature) {
return nullptr;
}
if (class_type::CT_SIGNATURE != *rt_signature) {
return nullptr;
}
return (class_type*)address;
}
}
namespace code {
typedef amd::elf::Segment Segment;
typedef amd::elf::Section Section;
typedef amd::elf::RelocationSection RelocationSection;
typedef amd::elf::Relocation Relocation;
class KernelSymbol;
class VariableSymbol;
class Symbol {
protected:
amd::elf::Symbol* elfsym;
public:
explicit Symbol(amd::elf::Symbol* elfsym_)
: elfsym(elfsym_) { }
virtual ~Symbol() { }
virtual bool IsKernelSymbol() const { return false; }
virtual KernelSymbol* AsKernelSymbol() { assert(false); return 0; }
virtual bool IsVariableSymbol() const { return false; }
virtual VariableSymbol* AsVariableSymbol() { assert(false); return 0; }
amd::elf::Symbol* elfSym() { return elfsym; }
std::string Name() const { return elfsym ? elfsym->name() : ""; }
Section* GetSection() { return elfsym->section(); }
virtual uint64_t SectionOffset() const { return elfsym->value(); }
virtual uint64_t VAddr() const { return elfsym->section()->addr() + elfsym->value(); }
uint32_t Index() const { return elfsym ? elfsym->index() : 0; }
bool IsDeclaration() const;
bool IsDefinition() const;
virtual bool IsAgent() const;
virtual hsa_symbol_kind_t Kind() const = 0;
hsa_symbol_linkage_t Linkage() const;
hsa_variable_allocation_t Allocation() const;
hsa_variable_segment_t Segment() const;
uint64_t Size() const;
uint32_t Size32() const;
uint32_t Alignment() const;
bool IsConst() const;
virtual hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value);
static hsa_code_symbol_t ToHandle(Symbol* sym);
static Symbol* FromHandle(hsa_code_symbol_t handle);
void setValue(uint64_t value) { elfsym->setValue(value); }
void setSize(uint32_t size) { elfsym->setSize(size); }
std::string GetModuleName() const;
std::string GetSymbolName() const;
};
class KernelSymbol : public Symbol {
private:
uint32_t kernarg_segment_size, kernarg_segment_alignment;
uint32_t group_segment_size, private_segment_size;
bool is_dynamic_callstack;
public:
explicit KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
bool IsKernelSymbol() const override { return true; }
KernelSymbol* AsKernelSymbol() override { return this; }
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_KERNEL; }
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
};
class VariableSymbol : public Symbol {
public:
explicit VariableSymbol(amd::elf::Symbol* elfsym_)
: Symbol(elfsym_) { }
bool IsVariableSymbol() const override { return true; }
VariableSymbol* AsVariableSymbol() override { return this; }
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_VARIABLE; }
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
};
class AmdHsaCode {
private:
std::ostringstream out;
std::unique_ptr<amd::elf::Image> img;
std::vector<Segment*> dataSegments;
std::vector<Section*> dataSections;
std::vector<RelocationSection*> relocationSections;
std::vector<Symbol*> symbols;
bool combineDataSegments;
Segment* hsaSegments[AMDGPU_HSA_SEGMENT_LAST][2];
Section* hsaSections[AMDGPU_HSA_SECTION_LAST];
amd::elf::Section* hsatext;
amd::elf::Section* imageInit;
amd::elf::Section* samplerInit;
amd::elf::Section* debugInfo;
amd::elf::Section* debugLine;
amd::elf::Section* debugAbbrev;
bool PullElf();
bool PullElfV1();
bool PullElfV2();
void AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size);
template <typename S>
bool GetAmdNote(uint32_t type, S** desc)
{
uint32_t desc_size;
if (!img->note()->getNote("AMD", type, (void**) desc, &desc_size)) {
out << "Failed to find note, type: " << type << std::endl;
return false;
}
if (desc_size < sizeof(S)) {
out << "Note size mismatch, type: " << type << " size: " << desc_size << " expected at least " << sizeof(S) << std::endl;
return false;
}
return true;
}
void PrintSegment(std::ostream& out, Segment* segment);
void PrintSection(std::ostream& out, Section* section);
void PrintRawData(std::ostream& out, Section* section);
void PrintRawData(std::ostream& out, const unsigned char *data, size_t size);
void PrintRelocationData(std::ostream& out, RelocationSection* section);
void PrintSymbol(std::ostream& out, Symbol* sym);
void PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset = 0);
std::string MangleSymbolName(const std::string& module_name, const std::string symbol_name);
bool ElfImageError();
public:
bool HasHsaText() const { return hsatext != 0; }
amd::elf::Section* HsaText() { assert(hsatext); return hsatext; }
const amd::elf::Section* HsaText() const { assert(hsatext); return hsatext; }
amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); }
uint16_t Machine() const { return img->Machine(); }
uint32_t EFlags() const { return img->EFlags(); }
uint32_t EClass() const { return img->EClass(); }
uint32_t OsAbi() const { return img->OsAbi(); }
AmdHsaCode(bool combineDataSegments = true);
virtual ~AmdHsaCode();
std::string output() { return out.str(); }
bool LoadFromFile(const std::string& filename);
bool SaveToFile(const std::string& filename);
bool WriteToBuffer(void* buffer);
bool InitFromBuffer(const void* buffer, size_t size);
bool InitAsBuffer(const void* buffer, size_t size);
bool InitAsHandle(hsa_code_object_t code_handle);
bool InitNew(bool xnack = false);
bool Freeze();
hsa_code_object_t GetHandle();
const char* ElfData();
uint64_t ElfSize();
bool Validate();
void Print(std::ostream& out);
void PrintNotes(std::ostream& out);
void PrintSegments(std::ostream& out);
void PrintSections(std::ostream& out);
void PrintSymbols(std::ostream& out);
void PrintMachineCode(std::ostream& out);
void PrintMachineCode(std::ostream& out, KernelSymbol* sym);
bool PrintToFile(const std::string& filename);
void AddNoteCodeObjectVersion(uint32_t major, uint32_t minor);
bool GetNoteCodeObjectVersion(std::string& version);
void AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode);
bool GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round);
void AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping);
bool GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping);
void AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer);
bool GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name);
void AddNoteProducerOptions(const std::string& options);
void AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options);
bool GetNoteProducerOptions(std::string& options);
bool GetIsa(std::string& isaName, unsigned *genericVersion = nullptr);
bool GetCodeObjectVersion(uint32_t* major, uint32_t* minor);
hsa_status_t GetInfo(hsa_code_object_info_t attribute, void *value);
hsa_status_t GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *sym);
hsa_status_t IterateSymbols(hsa_code_object_t code_object,
hsa_status_t (*callback)(
hsa_code_object_t code_object,
hsa_code_symbol_t symbol,
void* data),
void* data);
void AddHsaTextData(const void* buffer, size_t size);
uint64_t NextKernelCodeOffset() const;
bool AddKernelCode(KernelSymbol* sym, const void* code, size_t size);
Symbol* AddKernelDefinition(const std::string& name, const void* isa, size_t isa_size);
size_t DataSegmentCount() const { return dataSegments.size(); }
Segment* DataSegment(size_t i) const { return dataSegments[i]; }
size_t DataSectionCount() { return dataSections.size(); }
Section* DataSection(size_t i) { return dataSections[i]; }
Section* AddEmptySection();
Section* AddCodeSection(Segment* segment);
Section* AddDataSection(const std::string &name,
uint32_t type,
uint64_t flags,
Segment* segment);
bool HasImageInitSection() const { return imageInit != 0; }
Section* ImageInitSection();
void AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& init);
void AddImageInitializer(Symbol* image, uint64_t destOffset,
amdgpu_hsa_metadata_kind16_t kind,
amdgpu_hsa_image_geometry8_t geometry,
amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
uint64_t width, uint64_t height, uint64_t depth, uint64_t array);
bool HasSamplerInitSection() const { return samplerInit != 0; }
amd::elf::Section* SamplerInitSection();
amd::elf::Section* AddSamplerInit();
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& init);
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
amdgpu_hsa_sampler_coord8_t coord,
amdgpu_hsa_sampler_filter8_t filter,
amdgpu_hsa_sampler_addressing8_t addressing);
void AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend);
void InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
bool AddHsaSegments();
Segment* HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
void InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
Section* HsaDataSection(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
Symbol* AddExecutableSymbol(const std::string &name,
unsigned char type,
unsigned char binding,
unsigned char other,
Section *section = 0);
Symbol* AddVariableSymbol(const std::string &name,
unsigned char type,
unsigned char binding,
unsigned char other,
Section *section,
uint64_t value,
uint64_t size);
void AddSectionSymbols();
size_t RelocationSectionCount() { return relocationSections.size(); }
RelocationSection* GetRelocationSection(size_t i) { return relocationSections[i]; }
size_t SymbolCount() { return symbols.size(); }
Symbol* GetSymbol(size_t i) { return symbols[i]; }
Symbol* GetSymbolByElfIndex(size_t index);
Symbol* FindSymbol(const std::string &n);
void AddData(amdgpu_hsa_elf_section_t section, const void* data = 0, size_t size = 0);
Section* DebugInfo();
Section* DebugLine();
Section* DebugAbbrev();
Section* AddHsaHlDebug(const std::string& name, const void* data, size_t size);
};
class AmdHsaCodeManager {
private:
typedef std::unordered_map<uint64_t, AmdHsaCode*> CodeMap;
CodeMap codeMap;
public:
AmdHsaCode* FromHandle(hsa_code_object_t handle);
bool Destroy(hsa_code_object_t handle);
};
class KernelSymbolV2 : public KernelSymbol {
private:
public:
explicit KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
bool IsAgent() const override { return true; }
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
uint64_t VAddr() const override { return elfsym->value(); }
};
class VariableSymbolV2 : public VariableSymbol {
private:
public:
explicit VariableSymbolV2(amd::elf::Symbol* elfsym_) : VariableSymbol(elfsym_) { }
bool IsAgent() const override { return false; }
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
uint64_t VAddr() const override { return elfsym->value(); }
};
}
}
}
#endif // AMD_HSA_CODE_HPP_
@@ -0,0 +1,91 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// The following set of header files provides definitions for AMD GPU
// Architecture:
// - amd_hsa_common.h
// - amd_hsa_elf.h
// - amd_hsa_kernel_code.h
// - amd_hsa_queue.h
// - amd_hsa_signal.h
//
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
// information.
#ifndef AMD_HSA_COMMON_H
#define AMD_HSA_COMMON_H
#include <stddef.h>
#include <stdint.h>
// Descriptive version of the HSA Application Binary Interface.
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
// Alignment attribute that specifies a minimum alignment (in bytes) for
// variables of the specified type.
#if defined(__GNUC__)
# define __ALIGNED__(x) __attribute__((aligned(x)))
#elif defined(_MSC_VER)
# define __ALIGNED__(x) __declspec(align(x))
#elif defined(RC_INVOKED)
# define __ALIGNED__(x)
#else
# error
#endif
// Creates enumeration entries for packed types. Enumeration entries include
// bit shift amount, bit width, and bit mask.
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
name##_SHIFT = (shift), \
name##_WIDTH = (width), \
name = (((1 << (width)) - 1) << (shift)) \
// Gets bits for specified mask from specified src packed instance.
#define AMD_HSA_BITS_GET(src, mask) \
((src & mask) >> mask ## _SHIFT) \
// Sets val bits for specified mask in specified dst packed instance.
#define AMD_HSA_BITS_SET(dst, mask, val) \
dst &= (~(1 << mask##_SHIFT) & ~mask); \
dst |= (((val) << mask##_SHIFT) & mask) \
#endif // AMD_HSA_COMMON_H
@@ -0,0 +1,476 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// Undefine the macro in case it is defined in the system elf.h.
#undef EM_AMDGPU
#ifndef AMD_HSA_ELF_H
#define AMD_HSA_ELF_H
#include <stdint.h>
// AMD GPU Specific ELF Header Enumeration Values.
//
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
// code object V1 defintions which are not part of the LLVM header. Code object
// V1 was only supported by the Finalizer which is now deprecated and removed.
//
// TODO: Deprecate and remove V1 support and replace this header with using the
// LLVM header.
namespace ELF {
// Machine architectures
// See current registered ELF machine architectures at:
// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
enum {
EM_AMDGPU = 224, // AMD GPU architecture
};
// OS ABI identification.
enum {
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
};
// AMDGPU OS ABI Version identification.
enum {
// ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
// AMDGPU specific e_flags.
enum : unsigned {
// Processor selection mask for EF_AMDGPU_MACH_* values.
EF_AMDGPU_MACH = 0x0ff,
// Not specified processor.
EF_AMDGPU_MACH_NONE = 0x000,
// R600-based processors.
// Radeon HD 2000/3000 Series (R600).
EF_AMDGPU_MACH_R600_R600 = 0x001,
EF_AMDGPU_MACH_R600_R630 = 0x002,
EF_AMDGPU_MACH_R600_RS880 = 0x003,
EF_AMDGPU_MACH_R600_RV670 = 0x004,
// Radeon HD 4000 Series (R700).
EF_AMDGPU_MACH_R600_RV710 = 0x005,
EF_AMDGPU_MACH_R600_RV730 = 0x006,
EF_AMDGPU_MACH_R600_RV770 = 0x007,
// Radeon HD 5000 Series (Evergreen).
EF_AMDGPU_MACH_R600_CEDAR = 0x008,
EF_AMDGPU_MACH_R600_CYPRESS = 0x009,
EF_AMDGPU_MACH_R600_JUNIPER = 0x00a,
EF_AMDGPU_MACH_R600_REDWOOD = 0x00b,
EF_AMDGPU_MACH_R600_SUMO = 0x00c,
// Radeon HD 6000 Series (Northern Islands).
EF_AMDGPU_MACH_R600_BARTS = 0x00d,
EF_AMDGPU_MACH_R600_CAICOS = 0x00e,
EF_AMDGPU_MACH_R600_CAYMAN = 0x00f,
EF_AMDGPU_MACH_R600_TURKS = 0x010,
// Reserved for R600-based processors.
EF_AMDGPU_MACH_R600_RESERVED_FIRST = 0x011,
EF_AMDGPU_MACH_R600_RESERVED_LAST = 0x01f,
// First/last R600-based processors.
EF_AMDGPU_MACH_R600_FIRST = EF_AMDGPU_MACH_R600_R600,
EF_AMDGPU_MACH_R600_LAST = EF_AMDGPU_MACH_R600_TURKS,
// AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1000 = 0x0f1,
#if defined(GFX40_BUILD)
EF_AMDGPU_MACH_AMDGCN_GFX4000 = 0x0f8,
EF_AMDGPU_MACH_AMDGCN_GFX4010 = 0x0f9,
EF_AMDGPU_MACH_AMDGCN_GFX4020 = 0x0fe,
EF_AMDGPU_MACH_AMDGCN_GFX4030 = 0x0f6,
#endif // GFX40_BUILD
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
#if defined(GFX11_BUILD)
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
#endif // GFX11_BUILD
#if defined(GFX12_BUILD)
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
#endif // GFX12_BUILD
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED_LAST = 0x0ff,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_LAST,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
// Indicates if the trap handler is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
// Indicates if the "sramecc" target feature is enabled for all code
// contained in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
// ELFABIVERSION_AMDGPU_HSA_V5.
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
// XNACK is not supported.
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
// XNACK is any/default/unspecified.
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
// XNACK is off.
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
// XNACK is on.
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
// ELFABIVERSION_AMDGPU_HSA_V5.
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
// SRAMECC is not supported.
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
// SRAMECC is any/default/unspecified.
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
// SRAMECC is off.
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
// Generic target versioning. This is contained in the list byte of EFLAGS.
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};
} // end namespace ELF
// ELF Section Header Flag Enumeration Values.
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
//
typedef enum {
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
AMDGPU_HSA_SEGMENT_LAST,
} amdgpu_hsa_elf_segment_t;
// ELF Program Header Type Enumeration Values.
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
// ELF Symbol Type Enumeration Values.
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
// ELF Symbol Binding Enumeration Values.
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
// ELF Symbol Other Information Creation/Retrieval.
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
typedef enum {
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
} amdgpu_hsa_symbol_allocation_t;
// ELF Symbol Allocation Enumeration Values.
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
typedef enum {
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
AMDGPU_HSA_SYMBOL_FLAG_LAST,
} amdgpu_hsa_symbol_flag_t;
// ELF Symbol Flag Enumeration Values.
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
// AMD GPU Relocation Type Enumeration Values.
#define R_AMDGPU_NONE 0
#define R_AMDGPU_32_LOW 1
#define R_AMDGPU_32_HIGH 2
#define R_AMDGPU_64 3
#define R_AMDGPU_INIT_SAMPLER 4
#define R_AMDGPU_INIT_IMAGE 5
#define R_AMDGPU_RELATIVE64 13
// AMD GPU Note Type Enumeration Values.
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
#define NT_AMD_HSA_HSAIL 2
#define NT_AMD_HSA_ISA_VERSION 3
#define NT_AMD_HSA_PRODUCER 4
#define NT_AMD_HSA_PRODUCER_OPTIONS 5
#define NT_AMD_HSA_EXTENSION 6
#define NT_AMD_HSA_ISA_NAME 11
#define NT_AMD_HSA_HLDEBUG_DEBUG 101
#define NT_AMD_HSA_HLDEBUG_TARGET 102
// AMD GPU Metadata Kind Enumeration Values.
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
typedef enum {
AMDGPU_HSA_METADATA_KIND_NONE = 0,
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
} amdgpu_hsa_metadata_kind_t;
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
} amdgpu_hsa_sampler_coord_t;
// AMD GPU Sampler Filter Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
} amdgpu_hsa_sampler_filter_t;
// AMD GPU Sampler Addressing Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
} amdgpu_hsa_sampler_addressing_t;
// AMD GPU Sampler Descriptor.
typedef struct amdgpu_hsa_sampler_descriptor_s {
uint16_t size;
amdgpu_hsa_metadata_kind16_t kind;
amdgpu_hsa_sampler_coord8_t coord;
amdgpu_hsa_sampler_filter8_t filter;
amdgpu_hsa_sampler_addressing8_t addressing;
uint8_t reserved1;
} amdgpu_hsa_sampler_descriptor_t;
// AMD GPU Image Geometry Enumeration Values.
typedef uint8_t amdgpu_hsa_image_geometry8_t;
typedef enum {
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
} amdgpu_hsa_image_geometry_t;
// AMD GPU Image Channel Order Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
typedef enum {
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
} amdgpu_hsa_image_channel_order_t;
// AMD GPU Image Channel Type Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
typedef enum {
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
} amdgpu_hsa_image_channel_type_t;
// AMD GPU Image Descriptor.
typedef struct amdgpu_hsa_image_descriptor_s {
uint16_t size;
amdgpu_hsa_metadata_kind16_t kind;
amdgpu_hsa_image_geometry8_t geometry;
amdgpu_hsa_image_channel_order8_t channel_order;
amdgpu_hsa_image_channel_type8_t channel_type;
uint8_t reserved1;
uint64_t width;
uint64_t height;
uint64_t depth;
uint64_t array;
} amdgpu_hsa_image_descriptor_t;
typedef struct amdgpu_hsa_note_code_object_version_s {
uint32_t major_version;
uint32_t minor_version;
} amdgpu_hsa_note_code_object_version_t;
typedef struct amdgpu_hsa_note_hsail_s {
uint32_t hsail_major_version;
uint32_t hsail_minor_version;
uint8_t profile;
uint8_t machine_model;
uint8_t default_float_round;
} amdgpu_hsa_note_hsail_t;
typedef struct amdgpu_hsa_note_isa_s {
uint16_t vendor_name_size;
uint16_t architecture_name_size;
uint32_t major;
uint32_t minor;
uint32_t stepping;
char vendor_and_architecture_name[1];
} amdgpu_hsa_note_isa_t;
typedef struct amdgpu_hsa_note_producer_s {
uint16_t producer_name_size;
uint16_t reserved;
uint32_t producer_major_version;
uint32_t producer_minor_version;
char producer_name[1];
} amdgpu_hsa_note_producer_t;
typedef struct amdgpu_hsa_note_producer_options_s {
uint16_t producer_options_size;
char producer_options[1];
} amdgpu_hsa_note_producer_options_t;
typedef enum {
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
AMDGPU_HSA_RODATA_READONLY_AGENT,
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
AMDGPU_HSA_DATA_GLOBAL_AGENT,
AMDGPU_HSA_DATA_READONLY_AGENT,
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
AMDGPU_HSA_BSS_GLOBAL_AGENT,
AMDGPU_HSA_BSS_READONLY_AGENT,
AMDGPU_HSA_SECTION_LAST,
} amdgpu_hsa_elf_section_t;
#endif // AMD_HSA_ELF_H
@@ -0,0 +1,274 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_KERNEL_CODE_H
#define AMD_HSA_KERNEL_CODE_H
#include "amd_hsa_common.h"
#include "hsa.h"
// AMD Kernel Code Version Enumeration Values.
typedef uint32_t amd_kernel_code_version32_t;
enum amd_kernel_code_version_t {
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
AMD_KERNEL_CODE_VERSION_MINOR = 1
};
// AMD Machine Kind Enumeration Values.
typedef uint16_t amd_machine_kind16_t;
enum amd_machine_kind_t {
AMD_MACHINE_KIND_UNDEFINED = 0,
AMD_MACHINE_KIND_AMDGPU = 1
};
// AMD Machine Version.
typedef uint16_t amd_machine_version16_t;
// AMD Float Round Mode Enumeration Values.
enum amd_float_round_mode_t {
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
AMD_FLOAT_ROUND_MODE_ZERO = 3
};
// AMD Float Denorm Mode Enumeration Values.
enum amd_float_denorm_mode_t {
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
};
// AMD Compute Program Resource Register One.
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
enum amd_compute_pgm_rsrc_one_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FP16_OVFL, 26, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED0, 27, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_WGP_MODE, 29, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_MEM_ORDERED, 30, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FWD_PROGRESS, 31, 1),
};
// AMD System VGPR Workitem ID Enumeration Values.
enum amd_system_vgpr_workitem_id_t {
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
};
// AMD Compute Program Resource Register Two.
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
enum amd_compute_pgm_rsrc_two_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
};
// AMD Element Byte Size Enumeration Values.
enum amd_element_byte_size_t {
AMD_ELEMENT_BYTE_SIZE_2 = 0,
AMD_ELEMENT_BYTE_SIZE_4 = 1,
AMD_ELEMENT_BYTE_SIZE_8 = 2,
AMD_ELEMENT_BYTE_SIZE_16 = 3
};
// AMD Kernel Code Properties.
typedef uint32_t amd_kernel_code_properties32_t;
enum amd_kernel_code_properties_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
};
// AMD Power Of Two Enumeration Values.
typedef uint8_t amd_powertwo8_t;
enum amd_powertwo_t {
AMD_POWERTWO_1 = 0,
AMD_POWERTWO_2 = 1,
AMD_POWERTWO_4 = 2,
AMD_POWERTWO_8 = 3,
AMD_POWERTWO_16 = 4,
AMD_POWERTWO_32 = 5,
AMD_POWERTWO_64 = 6,
AMD_POWERTWO_128 = 7,
AMD_POWERTWO_256 = 8
};
// AMD Enabled Control Directive Enumeration Values.
typedef uint64_t amd_enabled_control_directive64_t;
enum amd_enabled_control_directive_t {
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
};
// AMD Exception Kind Enumeration Values.
typedef uint16_t amd_exception_kind16_t;
enum amd_exception_kind_t {
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
AMD_EXCEPTION_KIND_OVERFLOW = 4,
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
AMD_EXCEPTION_KIND_INEXACT = 16
};
// AMD Control Directives.
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
amd_enabled_control_directive64_t enabled_control_directives;
uint16_t enable_break_exceptions;
uint16_t enable_detect_exceptions;
uint32_t max_dynamic_group_size;
uint64_t max_flat_grid_size;
uint32_t max_flat_workgroup_size;
uint8_t required_dim;
uint8_t reserved1[3];
uint64_t required_grid_size[3];
uint32_t required_workgroup_size[3];
uint8_t reserved2[60];
} amd_control_directives_t;
// AMD Kernel Code.
#define AMD_ISA_ALIGN_BYTES 256
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
amd_kernel_code_version32_t amd_kernel_code_version_major;
amd_kernel_code_version32_t amd_kernel_code_version_minor;
amd_machine_kind16_t amd_machine_kind;
amd_machine_version16_t amd_machine_version_major;
amd_machine_version16_t amd_machine_version_minor;
amd_machine_version16_t amd_machine_version_stepping;
int64_t kernel_code_entry_byte_offset;
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
uint64_t max_scratch_backing_memory_byte_size;
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
amd_kernel_code_properties32_t kernel_code_properties;
uint32_t workitem_private_segment_byte_size;
uint32_t workgroup_group_segment_byte_size;
uint32_t gds_segment_byte_size;
uint64_t kernarg_segment_byte_size;
uint32_t workgroup_fbarrier_count;
uint16_t wavefront_sgpr_count;
uint16_t workitem_vgpr_count;
uint16_t reserved_vgpr_first;
uint16_t reserved_vgpr_count;
uint16_t reserved_sgpr_first;
uint16_t reserved_sgpr_count;
uint16_t debug_wavefront_private_segment_offset_sgpr;
uint16_t debug_private_segment_buffer_sgpr;
amd_powertwo8_t kernarg_segment_alignment;
amd_powertwo8_t group_segment_alignment;
amd_powertwo8_t private_segment_alignment;
amd_powertwo8_t wavefront_size;
int32_t call_convention;
uint8_t reserved1[12];
uint64_t runtime_loader_kernel_symbol;
amd_control_directives_t control_directives;
} amd_kernel_code_t;
// TODO: this struct should be completely gone once debugger designs/implements
// Debugger APIs.
typedef struct amd_runtime_loader_debug_info_s {
const void* elf_raw;
size_t elf_size;
const char *kernel_name;
const void *owning_segment;
} amd_runtime_loader_debug_info_t;
#endif // AMD_HSA_KERNEL_CODE_H
@@ -0,0 +1,520 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_LOADER_HPP
#define AMD_HSA_LOADER_HPP
#include <cstddef>
#include <cstdint>
#include "hsa.h"
#include "hsa_ext_image.h"
#include "hsa_ven_amd_loader.h"
#include "amd_hsa_elf.h"
#include <string>
#include <mutex>
#include <vector>
#if defined(_WIN32) || defined(_WIN64)
#include <io.h>
#define __read__ _read
#define __lseek__ _lseek
#else
#include <unistd.h>
#define __read__ read
#define __lseek__ lseek
#endif // _WIN32 || _WIN64
/// @brief Major version of the AMD HSA Loader. Major versions are not backwards
/// compatible.
#define AMD_HSA_LOADER_VERSION_MAJOR 0
/// @brief Minor version of the AMD HSA Loader. Minor versions are backwards
/// compatible.
#define AMD_HSA_LOADER_VERSION_MINOR 5
/// @brief Descriptive version of the AMD HSA Loader.
#define AMD_HSA_LOADER_VERSION "AMD HSA Loader v0.05 (June 16, 2015)"
enum hsa_ext_symbol_info_t {
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE = 100,
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN = 101,
};
typedef uint32_t hsa_symbol_info32_t;
typedef hsa_executable_symbol_t hsa_symbol_t;
typedef hsa_executable_symbol_info_t hsa_symbol_info_t;
/// @brief Loaded code object attributes.
enum amd_loaded_code_object_info_t {
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE = 0,
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE = 1
};
/// @brief Loaded segment handle.
typedef struct amd_loaded_segment_s {
uint64_t handle;
} amd_loaded_segment_t;
/// @brief Loaded segment attributes.
enum amd_loaded_segment_info_t {
AMD_LOADED_SEGMENT_INFO_TYPE = 0,
AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS = 1,
AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS = 2,
AMD_LOADED_SEGMENT_INFO_SIZE = 3
};
namespace amd {
namespace hsa {
namespace loader {
/// @class CodeObjectReaderImpl.
/// @brief Code Object Reader Wrapper.
struct CodeObjectReaderImpl final {
public:
/// @returns Handle equivalent of @p object.
static hsa_code_object_reader_t Handle(
const CodeObjectReaderImpl *object) {
hsa_code_object_reader_t handle = {reinterpret_cast<uint64_t>(object)};
return handle;
}
/// @returns Object equivalent of @p handle.
static CodeObjectReaderImpl *Object(
const hsa_code_object_reader_t &handle) {
CodeObjectReaderImpl *object =
reinterpret_cast<CodeObjectReaderImpl*>(handle.handle);
return object;
}
/// @brief Default constructor.
CodeObjectReaderImpl() {}
/// @brief Default destructor.
~CodeObjectReaderImpl();
hsa_status_t SetFile(
hsa_file_t _code_object_file_descriptor,
size_t _code_object_offset = 0,
size_t _code_object_size = 0);
hsa_status_t SetMemory(
const void *_code_object_memory,
size_t _code_object_size);
const void *GetCodeObjectMemory() const { return code_object_memory; };
std::string GetUri() const { return uri; };
private:
const void *code_object_memory{nullptr};
size_t code_object_size{0};
std::string uri{};
bool is_mmap{false};
};
//===----------------------------------------------------------------------===//
// Context. //
//===----------------------------------------------------------------------===//
class Context {
public:
virtual ~Context() {}
virtual hsa_isa_t IsaFromName(const char *name) = 0;
// This function will be deleted in a future patch. Use the overload
// that takes a generic version instead.
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) = 0;
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa, unsigned genericVersion) { return IsaSupportedByAgent(agent, isa); }
virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0;
virtual bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) = 0;
virtual void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
virtual void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
virtual void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
virtual bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
virtual bool ImageExtensionSupported() = 0;
virtual hsa_status_t ImageCreate(
hsa_agent_t agent,
hsa_access_permission_t image_permission,
const hsa_ext_image_descriptor_t *image_descriptor,
const void *image_data,
hsa_ext_image_t *image_handle) = 0;
virtual hsa_status_t ImageDestroy(
hsa_agent_t agent, hsa_ext_image_t image_handle) = 0;
virtual hsa_status_t SamplerCreate(
hsa_agent_t agent,
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
hsa_ext_sampler_t *sampler_handle) = 0;
virtual hsa_status_t SamplerDestroy(
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) = 0;
protected:
Context() {}
private:
Context(const Context &c);
Context& operator=(const Context &c);
};
//===----------------------------------------------------------------------===//
// Symbol. //
//===----------------------------------------------------------------------===//
class Symbol {
public:
static hsa_symbol_t Handle(Symbol *symbol) {
hsa_symbol_t symbol_handle =
{reinterpret_cast<uint64_t>(symbol)};
return symbol_handle;
}
static Symbol* Object(hsa_symbol_t symbol_handle) {
Symbol *symbol =
reinterpret_cast<Symbol*>(symbol_handle.handle);
return symbol;
}
virtual ~Symbol() {}
virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void *value) = 0;
virtual hsa_agent_t GetAgent() = 0;
protected:
Symbol() {}
private:
Symbol(const Symbol &s);
Symbol& operator=(const Symbol &s);
};
//===----------------------------------------------------------------------===//
// LoadedCodeObject. //
//===----------------------------------------------------------------------===//
class LoadedCodeObject {
public:
static hsa_loaded_code_object_t Handle(LoadedCodeObject *object) {
hsa_loaded_code_object_t handle =
{reinterpret_cast<uint64_t>(object)};
return handle;
}
static LoadedCodeObject* Object(hsa_loaded_code_object_t handle) {
LoadedCodeObject *object =
reinterpret_cast<LoadedCodeObject*>(handle.handle);
return object;
}
virtual ~LoadedCodeObject() {}
virtual bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) = 0;
virtual hsa_status_t IterateLoadedSegments(
hsa_status_t (*callback)(
amd_loaded_segment_t loaded_segment,
void *data),
void *data) = 0;
virtual hsa_agent_t getAgent() const = 0;
virtual hsa_executable_t getExecutable() const = 0;
virtual uint64_t getElfData() const = 0;
virtual uint64_t getElfSize() const = 0;
virtual uint64_t getStorageOffset() const = 0;
virtual uint64_t getLoadBase() const = 0;
virtual uint64_t getLoadSize() const = 0;
virtual int64_t getDelta() const = 0;
virtual std::string getUri() const = 0;
protected:
LoadedCodeObject() {}
private:
LoadedCodeObject(const LoadedCodeObject&);
LoadedCodeObject& operator=(const LoadedCodeObject&);
};
//===----------------------------------------------------------------------===//
// LoadedSegment. //
//===----------------------------------------------------------------------===//
class LoadedSegment {
public:
static amd_loaded_segment_t Handle(LoadedSegment *object) {
amd_loaded_segment_t handle =
{reinterpret_cast<uint64_t>(object)};
return handle;
}
static LoadedSegment* Object(amd_loaded_segment_t handle) {
LoadedSegment *object =
reinterpret_cast<LoadedSegment*>(handle.handle);
return object;
}
virtual ~LoadedSegment() {}
virtual bool GetInfo(amd_loaded_segment_info_t attribute, void *value) = 0;
protected:
LoadedSegment() {}
private:
LoadedSegment(const LoadedSegment&);
LoadedSegment& operator=(const LoadedSegment&);
};
//===----------------------------------------------------------------------===//
// Executable. //
//===----------------------------------------------------------------------===//
class Executable {
public:
static hsa_executable_t Handle(Executable *executable) {
hsa_executable_t executable_handle =
{reinterpret_cast<uint64_t>(executable)};
return executable_handle;
}
static Executable* Object(hsa_executable_t executable_handle) {
Executable *executable =
reinterpret_cast<Executable*>(executable_handle.handle);
return executable;
}
virtual ~Executable() {}
virtual hsa_status_t GetInfo(
hsa_executable_info_t executable_info, void *value) = 0;
virtual hsa_status_t DefineProgramExternalVariable(
const char *name, void *address) = 0;
virtual hsa_status_t DefineAgentExternalVariable(
const char *name,
hsa_agent_t agent,
hsa_variable_segment_t segment,
void *address) = 0;
virtual hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
const char *options,
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
virtual hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
size_t code_object_size,
const char *options,
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
virtual hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
const char *options,
const std::string &uri,
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
virtual hsa_status_t LoadCodeObject(
hsa_agent_t agent,
hsa_code_object_t code_object,
size_t code_object_size,
const char *options,
const std::string &uri,
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
virtual hsa_status_t Freeze(const char *options) = 0;
virtual hsa_status_t Validate(uint32_t *result) = 0;
/// @note needed for hsa v1.0.
/// @todo remove during loader refactoring.
virtual bool IsProgramSymbol(const char *symbol_name) = 0;
virtual Symbol* GetSymbol(
const char *symbol_name,
const hsa_agent_t *agent) = 0;
typedef hsa_status_t (*iterate_symbols_f)(
hsa_executable_t executable,
hsa_symbol_t symbol_handle,
void *data);
virtual hsa_status_t IterateSymbols(
iterate_symbols_f callback, void *data) = 0;
/// @since hsa v1.1.
virtual hsa_status_t IterateAgentSymbols(
hsa_agent_t agent,
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_agent_t agent,
hsa_executable_symbol_t symbol,
void *data),
void *data) = 0;
/// @since hsa v1.1.
virtual hsa_status_t IterateProgramSymbols(
hsa_status_t (*callback)(hsa_executable_t exec,
hsa_executable_symbol_t symbol,
void *data),
void *data) = 0;
virtual hsa_status_t IterateLoadedCodeObjects(
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data) = 0;
virtual size_t GetNumSegmentDescriptors() = 0;
virtual size_t QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t total_num_segment_descriptors,
size_t first_empty_segment_descriptor) = 0;
virtual uint64_t FindHostAddress(uint64_t device_address) = 0;
virtual void Print(std::ostream& out) = 0;
virtual bool PrintToFile(const std::string& filename) = 0;
protected:
Executable() {}
private:
Executable(const Executable &e);
Executable& operator=(const Executable &e);
static std::vector<Executable*> executables;
static std::mutex executables_mutex;
};
/// @class Loader
class Loader {
public:
/// @brief Destructor.
virtual ~Loader() {}
/// @brief Creates AMD HSA Loader with specified @p context.
///
/// @param[in] context Context. Must not be null.
///
/// @returns AMD HSA Loader on success, null on failure.
static Loader* Create(Context* context);
/// @brief Destroys AMD HSA Loader @p Loader_object.
///
/// @param[in] loader AMD HSA Loader to destroy. Must not be null.
static void Destroy(Loader *loader);
/// @returns Context associated with Loader.
virtual Context* GetContext() const = 0;
/// @brief Creates empty AMD HSA Executable with specified @p profile,
/// @p options
virtual Executable* CreateExecutable(
hsa_profile_t profile,
const char *options,
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0;
/// @brief Freezes @p executable
virtual hsa_status_t FreezeExecutable(Executable *executable, const char *options) = 0;
/// @brief Destroys @p executable
virtual void DestroyExecutable(Executable *executable) = 0;
/// @brief Invokes @p callback for each created executable
virtual hsa_status_t IterateExecutables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data) = 0;
/// @brief same as hsa_ven_amd_loader_query_segment_descriptors.
virtual hsa_status_t QuerySegmentDescriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors) = 0;
/// @brief Finds the handle of executable to which @p device_address
/// belongs. Return NULL handle if device address is invalid.
virtual hsa_executable_t FindExecutable(uint64_t device_address) = 0;
/// @brief Returns host address given @p device_address. If @p device_address
/// is already host address, returns null pointer. If @p device_address is
/// invalid address, returns null pointer.
virtual uint64_t FindHostAddress(uint64_t device_address) = 0;
/// @brief Print loader help.
virtual void PrintHelp(std::ostream& out) = 0;
protected:
/// @brief Default constructor.
Loader() {}
private:
/// @brief Copy constructor - not available.
Loader(const Loader&);
/// @brief Assignment operator - not available.
Loader& operator=(const Loader&);
};
} // namespace loader
} // namespace hsa
} // namespace amd
#endif // AMD_HSA_LOADER_HPP
@@ -0,0 +1,347 @@
//-----------------------------------------------------------------------------
// Copyright (c) 2011 - 2015 Advanced Micro Devices, Inc. All rights reserved.
//-----------------------------------------------------------------------------
/// @file amd_hsa_program.hpp
/// @author AMD HSA Finalizer Team
///
/// @brief Public AMD HSA Program Interfaces.
#ifndef AMD_HSA_PROGRAM_HPP
#define AMD_HSA_PROGRAM_HPP
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include "amd_hsa_code.hpp"
#include "Brig.h"
#include "hsa.h"
#include "hsa_ext_finalize.h"
/// @brief Descriptive version of AMD HSA Program.
#define AMD_HSA_PROGRAM_VERSION "AMD HSA Program v1.04 (August 3, 2015)"
#define AMD_HSA_PROGRAM_VERSION_MAJOR 1
#define AMD_HSA_PROGRAM_VERSION_MINOR 4
/// @brief Environment variable. If set, overrides options parameter from
/// Program::Create with contents of this environment variable.
#define ENVVAR_AMD_HSA_PROGRAM_CREATE_OPTIONS "AMD_HSA_PROGRAM_CREATE_OPTIONS"
/// @brief Environment variable. If set, concatenates options parameter from
/// Program::Create with contents of this environment variable.
#define ENVVAR_AMD_HSA_PROGRAM_CREATE_OPTIONS_APPEND "AMD_HSA_PROGRAM_CREATE_OPTIONS_APPEND"
/// @brief Environment variable. If set, overrides options parameter from
/// Program::Finalize with contents of this environment variable.
#define ENVVAR_AMD_HSA_PROGRAM_FINALIZE_OPTIONS "AMD_HSA_PROGRAM_FINALIZE_OPTIONS"
/// @brief Environment variable. If set, concatenates options parameter from
/// Program::Finalize with contents of this environment variable.
#define ENVVAR_AMD_HSA_PROGRAM_FINALIZE_OPTIONS_APPEND "AMD_HSA_PROGRAM_FINALIZE_OPTIONS_APPEND"
/// @brief AMD HSA Program attributes (in addition to hsa_ext_program_info_t,
/// which is defined in HSA Runtime Specification), enumeration values below
/// must be negative.
typedef int32_t amd_hsa_program_info32_t;
enum amd_hsa_program_info_t {
/// @brief Major version of BRIG specified when AMD HSA Program was created.
/// The type of this attribute is BrigVersion32_t.
AMD_HSA_PROGRAM_INFO_BRIG_VERSION_MAJOR = -1,
/// @brief Minor version of BRIG specified when AMD HSA Program was created.
/// The type of this attribute is BrigVersion32_t.
AMD_HSA_PROGRAM_INFO_BRIG_VERSION_MINOR = -2,
/// @brief Indicates whether or not AMD HSA Program was created with debugging
/// enabled. The type of this attribute is bool.
AMD_HSA_PROGRAM_INFO_IS_DEBUGGING_ENABLED = -3
};
namespace amd {
namespace hsa {
namespace program {
/// @class Context
class Context {
public:
/// @brief Default destructor.
virtual ~Context() {}
/// @brief Invoked when AMD HSA Program needs to allocate @p size bytes of
/// code object memory whose alignment is specified by @p align.
///
/// @param[in] size Requested allocation size in bytes.
/// @param[in] align Requested alignment.
///
/// @returns Pointer to allocated code object memory on success, null pointer
/// on failure.
virtual void* CodeObjectAlloc(size_t size, size_t align) = 0;
/// @brief Invoked when AMD HSA Program needs to copy @p size bytes from
/// memory pointed to by @p src to code object memory pointed to by @p dst.
///
/// @param[in] dst Pointer to code object memory to copy to.
/// @param[in] src Pointer to memory to copy from.
/// @param[in] size Requested copy size in bytes.
///
/// @returns True on success, false on failure.
virtual bool CodeObjectCopy(void *dst, const void *src, size_t size) = 0;
/// @brief Invoked when AMD HSA Program needs to deallocate @p size bytes of
/// code object memory pointed to by @p ptr.
///
/// @param[in] ptr Pointer to code object memory to deallocate.
/// @param[in] size Requested deallocation size in bytes.
virtual void CodeObjectFree(void *ptr, size_t size) = 0;
/// @brief Invoked when AMD HSA Finalizer and Program needs to reprot message or error
///
/// @param[in] str Message to report.
virtual void ReportMessage(const std::string& str) = 0;
protected:
/// @brief Default constructor.
Context() {}
private:
/// @brief Copy constructor - not available.
Context(const Context&);
/// @brief Assignment operator - not available.
Context& operator=(const Context&);
};
class Finalizer;
/// @class Program
class Program: public amd::hsa::common::Signed<0x71BB0A093D69DA92> {
public:
/// @brief Constant BRIG Module iterator.
typedef std::vector<hsa_ext_module_t>::const_iterator const_module_iterator;
/// @brief BRIG Module iterator.
typedef std::vector<hsa_ext_module_t>::iterator module_iterator;
/// @brief Invalid HSA Program Handle.
static const uint64_t INVALID_HANDLE = 0;
/// @brief Destructor.
virtual ~Program() {}
/// @brief Converts AMD HSA Program @p program_object to HSA Program Handle.
///
/// @param[in] program_object AMD HSA Program to convert. Can be null.
///
/// @returns HSA Program Handle on success, invalid handle on failure.
static hsa_ext_program_t Handle(Program *program_object);
/// @brief Converts HSA Program Handle @p program_handle to AMD HSA Program.
///
/// @param[in] program_handle HSA Program Handle to convert. Can be invalid.
///
/// @returns AMD HSA Program on success, null on failure.
static Program* Object(hsa_ext_program_t program_handle);
/// @returns Constant iterator to first BRIG Module in AMD HSA Program. If
/// AMD HSA Program does not contain any BRIG Modules, returned constant
/// iterator will be equal to Program::module_end().
virtual const_module_iterator module_begin() const = 0;
/// @returns Constant iterator to entity following last BRIG Module in AMD
/// HSA Program.
virtual const_module_iterator module_end() const = 0;
/// @returns Iterator to first BRIG Module in AMD HSA Program. If
/// AMD HSA Program does not contain any BRIG Modules, returned
/// iterator will be equal to Program::module_end().
virtual module_iterator module_begin() = 0;
/// @returns Iterator to entity following last BRIG Module in AMD
/// HSA Program.
virtual module_iterator module_end() = 0;
/// @returns Context associated with AMD HSA Program.
virtual Context* GetContext() const = 0;
/// @returns Finalizer associated with this AMD HSA Program.
virtual Finalizer* GetFinalizer() const = 0;
/// @brief Retrieves current value of specified AMD HSA Program's
/// @p attribute.
///
/// @param[in] attribute AMD HSA Program's attribute to retrieve. Can be
/// invalid.
/// @param[out] value Pointer to client-allocated memory to store attribute's
/// value in. Must not be null. If client-allocated memory is not large enough
/// to hold attribute's value, behaviour is undefined.
///
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
/// @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Specified @p attribute is
/// invalid AMD HSA Program's attribute.
///
/// @note If function failed to execute successfully, details of failure
/// can be retrieved using Program::GetLog.
virtual hsa_status_t GetInfo(amd_hsa_program_info32_t attribute, void *value) const = 0;
/// @brief Adds specified BRIG Module @p module to AMD HSA Program.
///
/// @details AMD HSA Program does not perform deep copy of BRIG Module
/// upon addition, it stores pointer to BRIG Module. BRIG Module is owned by
/// the client, which has to ensure that the lifetime of BRIG Module is
/// greater than the lifetime of AMD HSA Program.
///
/// @param[in] module BRIG Module to add. Must not be null.
///
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
/// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES Function failed to allocate
/// resources.
/// @retval HSA_EXT_STATUS_ERROR_INVALID_MODULE Specified @p module is invalid
/// BRIG module.
/// @retval HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED Specified @p module
/// is already included in AMD HSA Program.
/// @retval HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE Specified @p module is
/// incompatible with AMD HSA Program.
/// @retval HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol in specified @p module
/// is incompatible with symbol in AMD HSA Program.
///
/// @note If function failed to execute successfully, details of failure
/// can be retrieved using Program::GetLog.
virtual hsa_status_t AddModule(hsa_ext_module_t module) = 0;
/// @brief Finalizes AMD HSA Program with specified @p target,
/// @p call_convention, @p options, @p control_directives, and
/// @p code_object_type.
///
/// @details Finalizes all kernels and indirect functions that belong to
/// AMD HSA Program for specified @p target, @p call_convention,
/// @p options, @p control_directives, and @p code_object_type. Transitive
/// closure of all functions specified by call or scall must be defined.
/// All kernels and indirect functions that belong to AMD HSA Program must
/// be defined. Kernels and indirect functions that are referenced in kernels
/// and indirect functions that belong to AMD HSA Program may or may not be
/// defined, but must be declared. All global and readonly variables that
/// belong to AMD HSA Program, or referenced in kernels and indirect functions
/// that belong to AMD HSA Program may or may not be defined, but must be
/// declared.
///
/// @param[in] target Target to finalize for. Must not be null.
/// @param[in] call_convention Call convention to finalize for. Must be valid.
/// @param[in] options Options to finalize for. Can be null.
/// @param[in] control_directives Control directives to finalize for. Can be
/// invalid.
/// @param[in] code_object_type Code object type to create. Must be valid.
/// @param[out] code_object Code object generated by AMD HSA Program. Must
/// not be null.
///
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
/// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES Function failed to allocate
/// resources.
/// @retval HSA_STATUS_ERROR_INVALID_ISA Specified @p target is invalid.
/// @retval HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH Specified
/// @p control_directives does not match control directives in
/// one of kernels or indirect functions that belong to AMD HSA Program.
/// @retval HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED AMD HSA Program failed
/// to finalize.
///
/// @note If function failed to execute successfully, details of failure
/// can be retrieved using Program::GetLog.
///
/// @deprecated @p control_directives will be included in @p options starting
/// AMD HSA Program v2.0.
virtual hsa_status_t Finalize(
const char *target,
int32_t call_convention,
const char *options,
hsa_ext_control_directives_t control_directives,
hsa_code_object_type_t code_object_type,
hsa_code_object_t *code_object) = 0;
protected:
/// @brief Default constructor.
Program() {}
private:
/// @brief Copy constructor - not available.
Program(const Program&);
/// @brief Assignment operator - not available.
Program& operator=(const Program&);
};
/// @class Finalizer
class Finalizer {
public:
/// @brief Destructor.
virtual ~Finalizer() {}
/// @brief Creates AMD HSA Finalizer with specified @p context.
///
/// @param[in] context Context. Must not be null.
///
/// @returns AMD HSA Finalizer on success, null on failure.
static Finalizer* CreateFinalizer(Context* context);
/// @brief Destroys AMD HSA Finalizer @p finalizer_object.
///
/// @param[in] finalizer_object AMD HSA Finalizer to destroy. Must not be null.
static void DestroyFinalizer(Finalizer *finalizer_object);
/// @brief Creates empty AMD HSA Program with specified @p profile,
/// @p machine_model, @p rounding_mode, @p options, @p context, @p major and
/// @p minor BRIG versions.
///
/// @param[in] profile HSA profile. Must be valid.
/// @param[in] machine_model HSA machine model. Must be valid.
/// @param[in] rounding_mode HSA rounding mode. Must be valid.
/// @param[in] options User options. Can be null.
/// @param[in] brig_major Major BRIG version. Must be valid.
/// @param[in] brig_minor Minor BRIG version. Must be valid.
///
/// @returns AMD HSA Program on success, null on failure.
virtual Program* CreateProgram(
hsa_profile_t profile,
hsa_machine_model_t machine_model,
hsa_default_float_rounding_mode_t rounding_mode,
const char *options,
BrigVersion32_t brig_major = BRIG_VERSION_BRIG_MAJOR,
BrigVersion32_t brig_minor = BRIG_VERSION_BRIG_MINOR) = 0;
/// @brief Destroys AMD HSA Program @p program_object.
///
/// @param[in] program_object AMD HSA Program to destroy. Must not be null.
virtual void DestroyProgram(Program *program_object) = 0;
/// @brief Prints available finalizer options as error and exits.
virtual void PrintFinalizerOptions() const = 0;
/// @returns Context associated with Finalizer.
virtual Context* GetContext() const = 0;
/// @brief Enables code cache optimization.
virtual void EnableCodeCache() = 0;
/// @brief Disables code cache optimization.
virtual void DisableCodeCache() = 0;
/// @returns True if code cache is enabled, false otherwise.
virtual bool IsCodeCacheEnabled() const = 0;
/// @returns List of names for supported targets.
virtual const std::vector<std::string>& GetSupportedTargets() const = 0;
protected:
/// @brief Default constructor.
Finalizer() {}
private:
/// @brief Copy constructor - not available.
Finalizer(const Finalizer&);
/// @brief Assignment operator - not available.
Finalizer& operator=(const Finalizer&);
};
} // namespace program
} // namespace hsa
} // namespace amd
#endif // AMD_HSA_PROGRAM_HPP
@@ -0,0 +1,87 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_QUEUE_H
#define AMD_HSA_QUEUE_H
#include "amd_hsa_common.h"
#include "hsa.h"
// AMD Queue Properties.
typedef uint32_t amd_queue_properties32_t;
enum amd_queue_properties_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
};
// AMD Queue.
#define AMD_QUEUE_ALIGN_BYTES 64
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
hsa_queue_t hsa_queue;
uint32_t reserved1[4];
volatile uint64_t write_dispatch_id;
uint32_t group_segment_aperture_base_hi;
uint32_t private_segment_aperture_base_hi;
uint32_t max_cu_id;
uint32_t max_wave_id;
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
volatile uint32_t legacy_doorbell_lock;
uint32_t reserved2[9];
volatile uint64_t read_dispatch_id;
uint32_t read_dispatch_id_field_base_byte_offset;
uint32_t compute_tmpring_size;
uint32_t scratch_resource_descriptor[4];
uint64_t scratch_backing_memory_location;
uint64_t scratch_backing_memory_byte_size;
uint32_t scratch_workitem_byte_size;
amd_queue_properties32_t queue_properties;
uint32_t reserved3[2];
hsa_signal_t queue_inactive_signal;
uint32_t reserved4[14];
} amd_queue_t;
#endif // AMD_HSA_QUEUE_H
@@ -0,0 +1,80 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_SIGNAL_H
#define AMD_HSA_SIGNAL_H
#include "amd_hsa_common.h"
#include "amd_hsa_queue.h"
// AMD Signal Kind Enumeration Values.
typedef int64_t amd_signal_kind64_t;
enum amd_signal_kind_t {
AMD_SIGNAL_KIND_INVALID = 0,
AMD_SIGNAL_KIND_USER = 1,
AMD_SIGNAL_KIND_DOORBELL = -1,
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
};
// AMD Signal.
#define AMD_SIGNAL_ALIGN_BYTES 64
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
amd_signal_kind64_t kind;
union {
volatile int64_t value;
volatile uint32_t* legacy_hardware_doorbell_ptr;
volatile uint64_t* hardware_doorbell_ptr;
};
uint64_t event_mailbox_ptr;
uint32_t event_id;
uint32_t reserved1;
uint64_t start_ts;
uint64_t end_ts;
union {
amd_queue_t* queue_ptr;
uint64_t reserved2;
};
uint32_t reserved3[2];
} amd_signal_t;
#endif // AMD_HSA_SIGNAL_H
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
@@ -0,0 +1,531 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
#include "hsa.h"
#undef HSA_API
#ifdef HSA_EXPORT_FINALIZER
#define HSA_API HSA_API_EXPORT
#else
#define HSA_API HSA_API_IMPORT
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
struct BrigModuleHeader;
typedef struct BrigModuleHeader* BrigModule_t;
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
* @{
*/
/**
* @brief Enumeration constants added to ::hsa_status_t by this extension.
*/
enum {
/**
* The HSAIL program is invalid.
*/
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
/**
* The HSAIL module is invalid.
*/
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
/**
* Machine model or profile of the HSAIL module do not match the machine model
* or profile of the HSAIL program.
*/
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
/**
* The HSAIL module is already a part of the HSAIL program.
*/
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
/**
* Compatibility mismatch between symbol declaration and symbol definition.
*/
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
/**
* The finalization encountered an error while finalizing a kernel or
* indirect function.
*/
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
/**
* Mismatch between a directive in the control directive structure and in
* the HSAIL kernel.
*/
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
};
/** @} */
/** \defgroup ext-alt-finalizer-program Finalization Program
* @{
*/
/**
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
* the definition of the BrigModule_t type.
*/
typedef BrigModule_t hsa_ext_module_t;
/**
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
* modules that collectively define functions and variables used by kernels and
* indirect functions.
*/
typedef struct hsa_ext_program_s {
/**
* Opaque handle.
*/
uint64_t handle;
} hsa_ext_program_t;
/**
* @brief Create an empty HSAIL program.
*
* @param[in] machine_model Machine model used in the HSAIL program.
*
* @param[in] profile Profile used in the HSAIL program.
*
* @param[in] default_float_rounding_mode Default float rounding mode used in
* the HSAIL program.
*
* @param[in] options Vendor-specific options. May be NULL.
*
* @param[out] program Memory location where the HSA runtime stores the newly
* created HSAIL program handle.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
* @p program is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_create(
hsa_machine_model_t machine_model,
hsa_profile_t profile,
hsa_default_float_rounding_mode_t default_float_rounding_mode,
const char *options,
hsa_ext_program_t *program);
/**
* @brief Destroy a HSAIL program.
*
* @details The HSAIL program handle becomes invalid after it has been
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
* still valid after the HSAIL program has been destroyed, and can be used as
* intended. Resources allocated outside and associated with the HSAIL program
* (such as HSAIL modules that are added to the HSAIL program) can be released
* after the finalization program has been destroyed.
*
* @param[in] program HSAIL program.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
* invalid.
*/
hsa_status_t HSA_API hsa_ext_program_destroy(
hsa_ext_program_t program);
/**
* @brief Add a HSAIL module to an existing HSAIL program.
*
* @details The HSA runtime does not perform a deep copy of the HSAIL module
* upon addition. Instead, it stores a pointer to the HSAIL module. The
* ownership of the HSAIL module belongs to the application, which must ensure
* that @p module is not released before destroying the HSAIL program.
*
* The HSAIL module is successfully added to the HSAIL program if @p module is
* valid, if all the declarations and definitions for the same symbol are
* compatible, and if @p module specify machine model and profile that matches
* the HSAIL program.
*
* @param[in] program HSAIL program.
*
* @param[in] module HSAIL module. The application can add the same HSAIL module
* to @p program at most once. The HSAIL module must specify the same machine
* model and profile as @p program. If the floating-mode rounding mode of @p
* module is not default, then it should match that of @p program.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
* module does not match machine model of @p program, or the profile of @p
* module does not match profile of @p program.
*
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
* already a part of the HSAIL program.
*
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
* definition compatibility mismatch. See the symbol compatibility rules in the
* HSA Programming Reference Manual.
*/
hsa_status_t HSA_API hsa_ext_program_add_module(
hsa_ext_program_t program,
hsa_ext_module_t module);
/**
* @brief Iterate over the HSAIL modules in a program, and invoke an
* application-defined callback on every iteration.
*
* @param[in] program HSAIL program.
*
* @param[in] callback Callback to be invoked once per HSAIL module in the
* program. The HSA runtime passes three arguments to the callback: the program,
* a HSAIL module, and the application data. If @p callback returns a status
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
* stops and ::hsa_ext_program_iterate_modules returns that status value.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
hsa_ext_program_t program,
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
void* data),
void* data);
/**
* @brief HSAIL program attributes.
*/
typedef enum {
/**
* Machine model specified when the HSAIL program was created. The type
* of this attribute is ::hsa_machine_model_t.
*/
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
/**
* Profile specified when the HSAIL program was created. The type of
* this attribute is ::hsa_profile_t.
*/
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
/**
* Default float rounding mode specified when the HSAIL program was
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
*/
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
} hsa_ext_program_info_t;
/**
* @brief Get the current value of an attribute for a given HSAIL program.
*
* @param[in] program HSAIL program.
*
* @param[in] attribute Attribute to query.
*
* @param[out] value Pointer to an application-allocated buffer where to store
* the value of the attribute. If the buffer passed by the application is not
* large enough to hold the value of @p attribute, the behaviour is undefined.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
* HSAIL program attribute, or @p value is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_get_info(
hsa_ext_program_t program,
hsa_ext_program_info_t attribute,
void *value);
/**
* @brief Finalizer-determined call convention.
*/
typedef enum {
/**
* Finalizer-determined call convention.
*/
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
} hsa_ext_finalizer_call_convention_t;
/**
* @brief Control directives specify low-level information about the
* finalization process.
*/
typedef struct hsa_ext_control_directives_s {
/**
* Bitset indicating which control directives are enabled. The bit assigned to
* a control directive is determined by the corresponding value in
* BrigControlDirective.
*
* If a control directive is disabled, its corresponding field value (if any)
* must be 0. Control directives that are only present or absent (such as
* partial workgroups) have no corresponding field as the presence of the bit
* in this mask is sufficient.
*/
uint64_t control_directives_mask;
/**
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
* assigned to an HSAIL exception is determined by the corresponding value
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
* control directive, the finalizer uses the union of the two masks.
*/
uint16_t break_exceptions_mask;
/**
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
* bit assigned to an HSAIL exception is determined by the corresponding value
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
* control directive, the finalizer uses the union of the two masks.
*/
uint16_t detect_exceptions_mask;
/**
* Maximum size (in bytes) of dynamic group memory that will be allocated by
* the application for any dispatch of the kernel. If the kernel contains a
* maxdynamicsize control directive, the two values should match.
*/
uint32_t max_dynamic_group_size;
/**
* Maximum number of grid work-items that will be used by the application to
* launch the kernel. If the kernel contains a maxflatgridsize control
* directive, the value of @a max_flat_grid_size must not be greater than the
* value of the directive, and takes precedence.
*
* The value specified for maximum absolute grid size must be greater than or
* equal to the product of the values specified by @a required_grid_size.
*
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
* control_directives_mask, this field must be greater than 0.
*/
uint64_t max_flat_grid_size;
/**
* Maximum number of work-group work-items that will be used by the
* application to launch the kernel. If the kernel contains a
* maxflatworkgroupsize control directive, the value of @a
* max_flat_workgroup_size must not be greater than the value of the
* directive, and takes precedence.
*
* The value specified for maximum absolute grid size must be greater than or
* equal to the product of the values specified by @a required_workgroup_size.
*
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
* control_directives_mask, this field must be greater than 0.
*/
uint32_t max_flat_workgroup_size;
/**
* Reserved. Must be 0.
*/
uint32_t reserved1;
/**
* Grid size that will be used by the application in any dispatch of the
* kernel. If the kernel contains a requiredgridsize control directive, the
* dimensions should match.
*
* The specified grid size must be consistent with @a required_workgroup_size
* and @a required_dim. Also, the product of the three dimensions must not
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
* only if all the corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
* control_directives_mask, the three dimension values must be greater than 0.
*/
uint64_t required_grid_size[3];
/**
* Work-group size that will be used by the application in any dispatch of the
* kernel. If the kernel contains a requiredworkgroupsize control directive,
* the dimensions should match.
*
* The specified work-group size must be consistent with @a required_grid_size
* and @a required_dim. Also, the product of the three dimensions must not
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
* hold only if all the corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
* control_directives_mask, the three dimension values must be greater than 0.
*/
hsa_dim3_t required_workgroup_size;
/**
* Number of dimensions that will be used by the application to launch the
* kernel. If the kernel contains a requireddim control directive, the two
* values should match.
*
* The specified dimensions must be consistent with @a required_grid_size and
* @a required_workgroup_size. This invariant must hold only if all the
* corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
* control_directives_mask, this field must be 1, 2, or 3.
*/
uint8_t required_dim;
/**
* Reserved. Must be 0.
*/
uint8_t reserved2[75];
} hsa_ext_control_directives_t;
/**
* @brief Finalize an HSAIL program for a given instruction set architecture.
*
* @details Finalize all of the kernels and indirect functions that belong to
* the same HSAIL program for a specific instruction set architecture (ISA). The
* transitive closure of all functions specified by call or scall must be
* defined. Kernels and indirect functions that are being finalized must be
* defined. Kernels and indirect functions that are referenced in kernels and
* indirect functions being finalized may or may not be defined, but must be
* declared. All the global/readonly segment variables that are referenced in
* kernels and indirect functions being finalized may or may not be defined, but
* must be declared.
*
* @param[in] program HSAIL program.
*
* @param[in] isa Instruction set architecture to finalize for.
*
* @param[in] call_convention A call convention used in a finalization. Must
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
* isa (not inclusive).
*
* @param[in] control_directives Low-level control directives that influence
* the finalization process.
*
* @param[in] options Vendor-specific options. May be NULL.
*
* @param[in] code_object_type Type of code object to produce.
*
* @param[out] code_object Code object generated by the Finalizer, which
* contains the machine code for the kernels and indirect functions in the HSAIL
* program. The code object is independent of the HSAIL module that was used to
* generate it.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
* invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
* the control directive structure and in the HSAIL kernel mismatch, or if the
* same directive is used with a different value in one of the functions used by
* this kernel.
*
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
* encountered an error while compiling a kernel or an indirect function.
*/
hsa_status_t HSA_API hsa_ext_program_finalize(
hsa_ext_program_t program,
hsa_isa_t isa,
int32_t call_convention,
hsa_ext_control_directives_t control_directives,
const char *options,
hsa_code_object_type_t code_object_type,
hsa_code_object_t *code_object);
/** @} */
#define hsa_ext_finalizer_1_00
typedef struct hsa_ext_finalizer_1_00_pfn_s {
hsa_status_t (*hsa_ext_program_create)(
hsa_machine_model_t machine_model, hsa_profile_t profile,
hsa_default_float_rounding_mode_t default_float_rounding_mode,
const char *options, hsa_ext_program_t *program);
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
hsa_ext_module_t module);
hsa_status_t (*hsa_ext_program_iterate_modules)(
hsa_ext_program_t program,
hsa_status_t (*callback)(hsa_ext_program_t program,
hsa_ext_module_t module, void *data),
void *data);
hsa_status_t (*hsa_ext_program_get_info)(
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
void *value);
hsa_status_t (*hsa_ext_program_finalize)(
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
hsa_ext_control_directives_t control_directives, const char *options,
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
} hsa_ext_finalizer_1_00_pfn_t;
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy Load Diff
@@ -0,0 +1,667 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// HSA AMD extension for additional loader functionality.
#ifndef HSA_VEN_AMD_LOADER_H
#define HSA_VEN_AMD_LOADER_H
#include "hsa.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/**
* @brief Queries equivalent host address for given @p device_address, and
* records it in @p host_address.
*
*
* @details Contents of memory pointed to by @p host_address would be identical
* to contents of memory pointed to by @p device_address. Only difference
* between the two is host accessibility: @p host_address is always accessible
* from host, @p device_address might not be accessible from host.
*
* If @p device_address already points to host accessible memory, then the value
* of @p device_address is simply copied into @p host_address.
*
* The lifetime of @p host_address is the same as the lifetime of @p
* device_address, and both lifetimes are limited by the lifetime of the
* executable that is managing these addresses.
*
*
* @param[in] device_address Device address to query equivalent host address
* for.
*
* @param[out] host_address Pointer to application-allocated buffer to record
* queried equivalent host address in.
*
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
* null, or @p host_address is null.
*/
hsa_status_t hsa_ven_amd_loader_query_host_address(
const void *device_address,
const void **host_address);
/**
* @brief The storage type of the code object that is backing loaded memory
* segment.
*/
typedef enum {
/**
* Loaded memory segment is not backed by any code object (anonymous), as the
* case would be with BSS (uninitialized data).
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
/**
* Loaded memory segment is backed by the code object that is stored in the
* file.
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
/**
* Loaded memory segment is backed by the code object that is stored in the
* memory.
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
} hsa_ven_amd_loader_code_object_storage_type_t;
/**
* @brief Loaded memory segment descriptor.
*
*
* @details Loaded memory segment descriptor describes underlying loaded memory
* segment. Loaded memory segment is created/allocated by the executable during
* the loading of the code object that is backing underlying memory segment.
*
* The lifetime of underlying memory segment is limited by the lifetime of the
* executable that is managing underlying memory segment.
*/
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
/**
* Agent underlying memory segment is allocated on. If the code object that is
* backing underlying memory segment is program code object, then 0.
*/
hsa_agent_t agent;
/**
* Executable that is managing this underlying memory segment.
*/
hsa_executable_t executable;
/**
* Storage type of the code object that is backing underlying memory segment.
*/
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
* filepath to the code object;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
* accessible pointer to the first byte of the code object.
*/
const void *code_object_storage_base;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
* the filepath to the code object (including null-terminating character);
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
* bytes, of the memory occupied by the code object.
*/
size_t code_object_storage_size;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
* - other, then offset, in bytes, from the beginning of the code object to
* the first byte in the code object data is copied from.
*/
size_t code_object_storage_offset;
/**
* Starting address of the underlying memory segment.
*/
const void *segment_base;
/**
* Size, in bytes, of the underlying memory segment.
*/
size_t segment_size;
} hsa_ven_amd_loader_segment_descriptor_t;
/**
* @brief Either queries loaded memory segment descriptors, or total number of
* loaded memory segment descriptors.
*
*
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
* points to number that exactly matches total number of loaded memory segment
* descriptors, then queries loaded memory segment descriptors, and records them
* in @p segment_descriptors. If @p segment_descriptors is null and @p
* num_segment_descriptors points to zero, then queries total number of loaded
* memory segment descriptors, and records it in @p num_segment_descriptors. In
* all other cases returns appropriate error code (see below).
*
* The caller of this function is responsible for the allocation/deallocation
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
*
* The lifetime of loaded memory segments that are described by queried loaded
* memory segment descriptors is limited by the lifetime of the executable that
* is managing loaded memory segments.
*
* Queried loaded memory segment descriptors are always self-consistent: they
* describe a complete set of loaded memory segments that are being backed by
* fully loaded code objects that are present at the time (i.e. this function
* is blocked until all executable manipulations are fully complete).
*
*
* @param[out] segment_descriptors Pointer to application-allocated buffer to
* record queried loaded memory segment descriptors in. Can be null if @p
* num_segment_descriptors points to zero.
*
* @param[in,out] num_segment_descriptors Pointer to application-allocated
* buffer that contains either total number of loaded memory segment descriptors
* or zero.
*
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
* while @p num_segment_descriptors points to non-zero number, @p
* segment_descriptors is not null while @p num_segment_descriptors points to
* zero, or @p num_segment_descriptors is null.
*
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
* does not point to number that exactly matches total number of loaded memory
* segment descriptors.
*/
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
/**
* @brief Obtains the handle of executable to which the device address belongs.
*
* @details This method should not be used to obtain executable handle by using
* a host address. The executable returned is expected to be alive until its
* destroyed by the user.
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
* is no exectuable found for this kernel code object.
*/
hsa_status_t hsa_ven_amd_loader_query_executable(
const void *device_address,
hsa_executable_t *executable);
//===----------------------------------------------------------------------===//
/**
* @brief Iterate over the loaded code objects in an executable, and invoke
* an application-defined callback on every iteration.
*
* @param[in] executable Executable.
*
* @param[in] callback Callback to be invoked once per loaded code object. The
* HSA runtime passes three arguments to the callback: the executable, a
* loaded code object, and the application data. If @p callback returns a
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
* traversal stops and
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
* status value.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
/**
* @brief Loaded code object kind.
*/
typedef enum {
/**
* Program code object.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
/**
* Agent code object.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
} hsa_ven_amd_loader_loaded_code_object_kind_t;
/**
* @brief Loaded code object attributes.
*/
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
/**
* The executable in which this loaded code object is loaded. The
* type of this attribute is ::hsa_executable_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
/**
* The kind of this loaded code object. The type of this attribute is
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
/**
* The agent on which this loaded code object is loaded. The
* value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
* attribute is ::hsa_agent_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
/**
* The storage type of the code object reader used to load the loaded code object.
* The type of this attribute is ::uint32_t interpreted as a
* ::hsa_ven_amd_loader_code_object_storage_type_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
/**
* The memory address of the first byte of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
* attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
/**
* The memory size in bytes of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
* attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
/**
* The file descriptor of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
* attribute is ::int.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
/**
* The signed byte address difference of the memory address at which the code
* object is loaded minus the virtual address specified in the code object
* that is loaded. The value of this attribute is only defined if the
* executable in which the code object is loaded is froozen. The type of this
* attribute is ::int64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
/**
* The base memory address at which the code object is loaded. This is the
* base address of the allocation for the lowest addressed segment of the code
* object that is loaded. Note that any non-loaded segments before the first
* loaded segment are ignored. The value of this attribute is only defined if
* the executable in which the code object is loaded is froozen. The type of
* this attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
/**
* The byte size of the loaded code objects contiguous memory allocation. The
* value of this attribute is only defined if the executable in which the code
* object is loaded is froozen. The type of this attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
/**
* The length of the URI in bytes, not including the NUL terminator. The type
* of this attribute is uint32_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
/**
* The URI name from which the code object was loaded. The type of this
* attribute is a NUL terminated \p char* with the length equal to the value
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
* The URI name syntax is defined by the following BNF syntax:
*
* code_object_uri ::== file_uri | memory_uri
* file_uri ::== "file://" file_path [ range_specifier ]
* memory_uri ::== "memory://" process_id range_specifier
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
* file_path ::== URI_ENCODED_OS_FILE_PATH
* process_id ::== DECIMAL_NUMBER
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
*
* ``number`` is a C integral literal where hexadecimal values are prefixed by
* "0x" or "0X", and octal values by "0".
*
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
* In URI encoding, every character that is not in the regular expression
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
* proceeded by "%". Directories in the path are separated by "/".
*
* ``offset`` is a 0-based byte offset to the start of the code object. For a
* file URI, it is from the start of the file specified by the ``file_path``,
* and if omitted defaults to 0. For a memory URI, it is the memory address
* and is required.
*
* ``size`` is the number of bytes in the code object. For a file URI, if
* omitted it defaults to the size of the file. It is required for a memory
* URI.
*
* ``process_id`` is the identity of the process owning the memory. For Linux
* it is the C unsigned integral decimal literal for the process ID (PID).
*
* For example:
*
* file:///dir1/dir2/file1
* file:///dir3/dir4/file2#offset=0x2000&size=3000
* memory://1234#offset=0x20000&size=3000
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
} hsa_ven_amd_loader_loaded_code_object_info_t;
/**
* @brief Get the current value of an attribute for a given loaded code
* object.
*
* @param[in] loaded_code_object Loaded code object.
*
* @param[in] attribute Attribute to query.
*
* @param[out] value Pointer to an application-allocated buffer where to store
* the value of the attribute. If the buffer passed by the application is not
* large enough to hold the value of @p attribute, the behavior is undefined.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
* invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
* loaded code object attribute, or @p value is NULL.
*/
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
//===----------------------------------------------------------------------===//
/**
* @brief Create a code object reader to operate on a file with size and offset.
*
* @param[in] file File descriptor. The file must have been opened by
* application with at least read permissions prior calling this function. The
* file must contain a vendor-specific code object.
*
* The file is owned and managed by the application; the lifetime of the file
* descriptor must exceed that of any associated code object reader.
*
* @param[in] size Size of the code object embedded in @p file.
*
* @param[in] offset 0-based offset relative to the beginning of the @p file
* that denotes the beginning of the code object embedded within the @p file.
*
* @param[out] code_object_reader Memory location to store the newly created
* code object reader handle. Must not be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
* read permissions. This condition may also be reported as
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
* ::hsa_executable_load_agent_code_object function.
*
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
* do not form a valid code object. If file size is 0. Or offset > file size.
* This condition may also be reported as
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
* ::hsa_executable_load_agent_code_object function.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
* allocate the required resources.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
*/
hsa_status_t
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
//===----------------------------------------------------------------------===//
/**
* @brief Iterate over the available executables, and invoke an
* application-defined callback on every iteration. While
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
* ::hsa_executable_create, ::hsa_executable_create_alt, or
* ::hsa_executable_destroy will be blocked.
*
* @param[in] callback Callback to be invoked once per executable. The HSA
* runtime passes two arguments to the callback: the executable and the
* application data. If @p callback returns a status other than
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
* ::hsa_executable_destroy then the behavior is undefined.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t
hsa_ven_amd_loader_iterate_executables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
//===----------------------------------------------------------------------===//
/**
* @brief Extension version.
*/
#define hsa_ven_amd_loader 001003
/**
* @brief Extension function table version 1.00.
*/
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
} hsa_ven_amd_loader_1_00_pfn_t;
/**
* @brief Extension function table version 1.01.
*/
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
} hsa_ven_amd_loader_1_01_pfn_t;
/**
* @brief Extension function table version 1.02.
*/
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
hsa_status_t
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
} hsa_ven_amd_loader_1_02_pfn_t;
/**
* @brief Extension function table version 1.03.
*/
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
hsa_status_t
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
hsa_status_t
(*hsa_ven_amd_loader_iterate_executables)(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
} hsa_ven_amd_loader_1_03_pfn_t;
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* HSA_VEN_AMD_LOADER_H */
@@ -0,0 +1,30 @@
/*****************************************************************************\
*
* *** NEED TO REPLACE THIS WITH OPEN SOURCE HEADER ***
*
*
* Copyright (c) 1999-2004 ATI Technologies Inc. (unpublished)
*
*
\*****************************************************************************/
#ifndef _AMDID_H
#define _AMDID_H
#define FAMILY_UNKNOWN 0
#define FAMILY_SI 110 // Southern Islands: Tahiti, Pitcairn, Cape Verde
#define FAMILY_TN 105 // Fusion Trinity: Devastator, Scrapper
#define FAMILY_CI 120 // Sea Islands: Hawaii, Bonaire
#define FAMILY_KV 125 // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
#define FAMILY_VI 130 // Volcanic Islands: Iceland, Tonga
#define FAMILY_CZ 135 // Carrizo
#define AMD_VENDOR_ID 0x1002 // used for GPUs -- AMD also has 1022 for CPU HW
#endif // _AMDID_H
@@ -0,0 +1,46 @@
/*****************************************************************************\
*
* *** NEED TO REPLACE THIS WITH OPEN SOURCE HEADER ***
*
*
* Copyright (c) 2013 Advanced Micro Devices, Inc.
*
*
\*****************************************************************************/
#ifndef _CZ_ID_H
#define _CZ_ID_H
enum {
CARRIZO_A1 = 0x02,
CARRIZO_BRISTOL_A0 = 0x10,
STONEY_A0 = 0x61,
CZ_UNKNOWN = 0xFF
};
// for the original CARRIZO + other CARRIZO kickers
#define ASICREV_IS_CARRIZO(eChipRev) (eChipRev == CARRIZO_A1)
//
// Carrizo device IDs
//
#define DEVICE_ID_CZ_9874 0x9874
// CARRIZO ASIC internal revision number
#define INTERNAL_REV_CARRIZO_A1 0x01
// CARRIZO PCI Revision IDs
#define PRID_CZ_C4 0xC4 // Client B10
#define PRID_CZ_C5 0xC5 // Client B8
#define PRID_CZ_C6 0xC6 // Client B6
#define PRID_CZ_C7 0xC7 // Client B4
// eCARRIZO PCI Revision IDs
#define PRID_eCZ_84 0x84 // eCZ B10
#define PRID_eCZ_81 0x81 // eCZ B8/iTemp
#define PRID_eCZ_85 0x85 // eCZ B8
#define PRID_eCZ_87 0x87 // eCZ B4
#endif // _CZ_ID_H