Migrate amdgpu-windows-interop to rocm-systems (#808)
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
#-----------------------------------------------------------------------------
|
||||
# Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# amdhsacode library
|
||||
#
|
||||
# This file is expected to be included from top-level CMakeLists.txt.
|
||||
#
|
||||
# Dependencies:
|
||||
# - Compiler definitions
|
||||
# - elf library
|
||||
# - hsail library
|
||||
# - sp3 library
|
||||
#
|
||||
# Defines:
|
||||
# - amdhsacode library and target include directories
|
||||
|
||||
set(USE_AMD_LIBELF "no" CACHE STRING "Do not use AMD LIBELF by default")
|
||||
set(NO_SI_SP3 "no" CACHE STRING "Disable using SP3")
|
||||
|
||||
file(GLOB sources *.cpp *.hpp)
|
||||
|
||||
add_library(amdhsacode STATIC ${sources})
|
||||
set_target_properties(amdhsacode PROPERTIES
|
||||
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "^(GNU|(Apple)?Clang)$")
|
||||
target_compile_options(amdhsacode PRIVATE
|
||||
-Werror
|
||||
-Wno-inconsistent-missing-override
|
||||
)
|
||||
endif()
|
||||
|
||||
set(LIBELF_LIB)
|
||||
if(${USE_AMD_LIBELF} STREQUAL "yes")
|
||||
target_compile_definitions(amdhsacode PRIVATE AMD_LIBELF)
|
||||
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/common)
|
||||
if(WIN32)
|
||||
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/common/win32)
|
||||
endif()
|
||||
target_include_directories(amdhsacode PUBLIC ${HSAIL_ELFTOOLCHAIN_DIR}/libelf)
|
||||
set(LIBELF_LIB oclelf)
|
||||
message(STATUS "Using AMD LIBELF")
|
||||
else()
|
||||
find_package(LibElf REQUIRED)
|
||||
set(LIBELF_LIB elf)
|
||||
message(STATUS "Using SYSTEM LIBELF")
|
||||
endif()
|
||||
|
||||
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../hsail-tools/libHSAIL)
|
||||
if(${NO_SI_SP3} STREQUAL "no")
|
||||
target_include_directories(amdhsacode PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../Chip/sp3)
|
||||
endif()
|
||||
#if defined(GFX102_BUILD)
|
||||
target_compile_definitions(amdhsacode PRIVATE GFX102_BUILD)
|
||||
#endif
|
||||
#if defined(GFX11_BUILD)
|
||||
target_compile_definitions(amdhsacode PRIVATE GFX11_BUILD)
|
||||
#endif
|
||||
#if defined(GFX115_BUILD)
|
||||
target_compile_definitions(amdhsacode PRIVATE GFX115_BUILD)
|
||||
#endif
|
||||
#if defined(GFX12_BUILD)
|
||||
target_compile_definitions(amdhsacode PRIVATE GFX12_BUILD)
|
||||
#endif
|
||||
|
||||
if(${NO_SI_SP3} STREQUAL "yes")
|
||||
target_compile_definitions(amdhsacode PRIVATE NO_SI_SP3)
|
||||
endif()
|
||||
|
||||
target_link_libraries(amdhsacode PRIVATE ${LIBELF_LIB})
|
||||
|
||||
if(${NO_SI_SP3} STREQUAL "no")
|
||||
set(SC_BUILD_SP3 ON)
|
||||
if(SC_BUILD_SP3)
|
||||
target_link_libraries(amdhsacode PUBLIC sp3)
|
||||
endif()
|
||||
endif()
|
||||
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
+1048
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
+195
@@ -0,0 +1,195 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_CODE_UTIL_HPP_
|
||||
#define AMD_HSA_CODE_UTIL_HPP_
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#else // _WIN32
|
||||
#include <cstdlib>
|
||||
#endif // _WIN32
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "amd_hsa_elf.h"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
|
||||
#define hsa_error(e) static_cast<hsa_status_t>(e)
|
||||
|
||||
#define release_assert(e) \
|
||||
if (!(e)) { \
|
||||
std::cerr << __FILE__ << ":"; \
|
||||
std::cerr << __LINE__ << ":"; \
|
||||
std::cerr << " Assertion `" << #e << "' failed." << std::endl; \
|
||||
std::abort(); \
|
||||
} \
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
|
||||
std::string HsaSymbolKindToString(hsa_symbol_kind_t kind);
|
||||
std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage);
|
||||
std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation);
|
||||
std::string HsaVariableSegmentToString(hsa_variable_segment_t segment);
|
||||
std::string HsaProfileToString(hsa_profile_t profile);
|
||||
std::string HsaMachineModelToString(hsa_machine_model_t model);
|
||||
std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode);
|
||||
std::string AmdMachineKindToString(amd_machine_kind16_t machine);
|
||||
std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode);
|
||||
std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode);
|
||||
std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id);
|
||||
std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size);
|
||||
std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions);
|
||||
std::string AmdPowerTwoToString(amd_powertwo8_t p);
|
||||
amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec);
|
||||
bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec);
|
||||
std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg);
|
||||
std::string AmdPTLoadToString(uint64_t type);
|
||||
|
||||
void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc);
|
||||
void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1);
|
||||
void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2);
|
||||
void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties);
|
||||
void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives);
|
||||
|
||||
namespace code_options {
|
||||
// Space between options (not at the beginning).
|
||||
std::ostream& space(std::ostream& out);
|
||||
|
||||
// Control directive option without value.
|
||||
struct control_directive {
|
||||
const char *name;
|
||||
control_directive(const char* name_) : name(name_) { }
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& out, const control_directive& d);
|
||||
|
||||
// Exceptions mask string.
|
||||
struct exceptions_mask {
|
||||
uint16_t mask;
|
||||
exceptions_mask(uint16_t mask_) : mask(mask_) { }
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& out, const exceptions_mask& e);
|
||||
|
||||
// Control directives options.
|
||||
struct control_directives {
|
||||
const hsa_ext_control_directives_t& d;
|
||||
control_directives(const hsa_ext_control_directives_t& d_) : d(d_) { }
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& out, const control_directives& cd);
|
||||
}
|
||||
|
||||
const char* hsaerr2str(hsa_status_t status);
|
||||
bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer);
|
||||
|
||||
// Create new empty temporary file that will be deleted when closed.
|
||||
int OpenTempFile(const char* prefix);
|
||||
void CloseTempFile(int fd);
|
||||
|
||||
// Helper comment types for isa disassembler
|
||||
enum DumpIsaCommentType {
|
||||
COMMENT_AMD_KERNEL_CODE_T_BEGIN = 1,
|
||||
COMMENT_AMD_KERNEL_CODE_T_END,
|
||||
COMMENT_KERNEL_ISA_BEGIN,
|
||||
};
|
||||
|
||||
// Callbacks to create helper comments for isa disassembler
|
||||
const char * CommentTopCallBack(void *ctx, int type);
|
||||
const char * CommentRightCallBack(void *ctx, int type);
|
||||
|
||||
// Parse disassembler instruction line to find offset
|
||||
uint32_t ParseInstructionOffset(const std::string& instruction);
|
||||
|
||||
// Trim whitespaces from start of string
|
||||
void ltrim(std::string &str);
|
||||
|
||||
|
||||
// Helper function that allocates an aligned memory.
|
||||
inline void*
|
||||
alignedMalloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return ::_aligned_malloc(size, alignment);
|
||||
#else
|
||||
void * ptr = NULL;
|
||||
alignment = (std::max)(alignment, sizeof(void*));
|
||||
if (0 == ::posix_memalign(&ptr, alignment, size)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Helper function that frees an aligned memory.
|
||||
inline void
|
||||
alignedFree(void *ptr)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
::_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t alignUp(uint64_t num, uint64_t align)
|
||||
{
|
||||
assert(align);
|
||||
assert((align & (align - 1)) == 0);
|
||||
return (num + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
inline uint32_t alignUp(uint32_t num, uint32_t align)
|
||||
{
|
||||
assert(align);
|
||||
assert((align & (align - 1)) == 0);
|
||||
return (num + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i = 0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AMD_HSA_CODE_UTIL_HPP_
|
||||
@@ -0,0 +1,94 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "amd_hsa_locks.hpp"
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace common {
|
||||
|
||||
void ReaderWriterLock::ReaderLock()
|
||||
{
|
||||
internal_lock_.lock();
|
||||
while (0 < writers_count_) {
|
||||
readers_condition_.wait(internal_lock_);
|
||||
}
|
||||
readers_count_ += 1;
|
||||
internal_lock_.unlock();
|
||||
}
|
||||
|
||||
void ReaderWriterLock::ReaderUnlock()
|
||||
{
|
||||
internal_lock_.lock();
|
||||
readers_count_ -= 1;
|
||||
if (0 == readers_count_ && 0 < writers_waiting_) {
|
||||
writers_condition_.notify_one();
|
||||
}
|
||||
internal_lock_.unlock();
|
||||
}
|
||||
|
||||
void ReaderWriterLock::WriterLock()
|
||||
{
|
||||
internal_lock_.lock();
|
||||
writers_waiting_ += 1;
|
||||
while (0 < readers_count_ || 0 < writers_count_) {
|
||||
writers_condition_.wait(internal_lock_);
|
||||
}
|
||||
writers_count_ += 1;
|
||||
writers_waiting_ -= 1;
|
||||
internal_lock_.unlock();
|
||||
}
|
||||
|
||||
void ReaderWriterLock::WriterUnlock()
|
||||
{
|
||||
internal_lock_.lock();
|
||||
writers_count_ -= 1;
|
||||
if (0 < writers_waiting_) {
|
||||
writers_condition_.notify_one();
|
||||
}
|
||||
readers_condition_.notify_all();
|
||||
internal_lock_.unlock();
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
@@ -0,0 +1,127 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_LOCKS_HPP
|
||||
#define AMD_HSA_LOCKS_HPP
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <mutex>
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace common {
|
||||
|
||||
template<typename LockType>
|
||||
class ReaderLockGuard final {
|
||||
public:
|
||||
explicit ReaderLockGuard(LockType &lock):
|
||||
lock_(lock)
|
||||
{
|
||||
lock_.ReaderLock();
|
||||
}
|
||||
|
||||
~ReaderLockGuard()
|
||||
{
|
||||
lock_.ReaderUnlock();
|
||||
}
|
||||
|
||||
private:
|
||||
ReaderLockGuard(const ReaderLockGuard&);
|
||||
ReaderLockGuard& operator=(const ReaderLockGuard&);
|
||||
|
||||
LockType &lock_;
|
||||
};
|
||||
|
||||
template<typename LockType>
|
||||
class WriterLockGuard final {
|
||||
public:
|
||||
explicit WriterLockGuard(LockType &lock):
|
||||
lock_(lock)
|
||||
{
|
||||
lock_.WriterLock();
|
||||
}
|
||||
|
||||
~WriterLockGuard()
|
||||
{
|
||||
lock_.WriterUnlock();
|
||||
}
|
||||
|
||||
private:
|
||||
WriterLockGuard(const WriterLockGuard&);
|
||||
WriterLockGuard& operator=(const WriterLockGuard&);
|
||||
|
||||
LockType &lock_;
|
||||
};
|
||||
|
||||
class ReaderWriterLock final {
|
||||
public:
|
||||
ReaderWriterLock():
|
||||
readers_count_(0), writers_count_(0), writers_waiting_(0) {}
|
||||
|
||||
~ReaderWriterLock() {}
|
||||
|
||||
void ReaderLock();
|
||||
|
||||
void ReaderUnlock();
|
||||
|
||||
void WriterLock();
|
||||
|
||||
void WriterUnlock();
|
||||
|
||||
private:
|
||||
ReaderWriterLock(const ReaderWriterLock&);
|
||||
ReaderWriterLock& operator=(const ReaderWriterLock&);
|
||||
|
||||
size_t readers_count_;
|
||||
size_t writers_count_;
|
||||
size_t writers_waiting_;
|
||||
std::mutex internal_lock_;
|
||||
std::condition_variable_any readers_condition_;
|
||||
std::condition_variable_any writers_condition_;
|
||||
};
|
||||
|
||||
} // namespace common
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
|
||||
#endif // AMD_HSA_LOCKS_HPP
|
||||
@@ -0,0 +1,381 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "amd_options.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace amd {
|
||||
namespace options {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// StringFactory. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
std::string StringFactory::Flatten(const char **cstrs,
|
||||
const uint32_t &cstrs_count,
|
||||
const char &spacer) {
|
||||
if (NULL == cstrs || 0 == cstrs_count) {
|
||||
return std::string();
|
||||
}
|
||||
|
||||
std::string flattened;
|
||||
for (uint32_t i = 0; i < cstrs_count; ++i) {
|
||||
if (NULL == cstrs[i]) {
|
||||
return std::string();
|
||||
}
|
||||
flattened += cstrs[i];
|
||||
if (i != (cstrs_count - 1)) {
|
||||
flattened += spacer;
|
||||
}
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
std::list<std::string> StringFactory::Tokenize(const char *cstr,
|
||||
const char &delim) {
|
||||
if (NULL == cstr) {
|
||||
return std::list<std::string>();
|
||||
}
|
||||
|
||||
const std::string str = cstr;
|
||||
size_t start = 0;
|
||||
size_t end = 0;
|
||||
|
||||
std::list<std::string> tokens;
|
||||
while ((end = str.find(delim, start)) != std::string::npos) {
|
||||
if (start != end) {
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
if (str.size() > start) {
|
||||
tokens.push_back(str.substr(start));
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::string StringFactory::ToLower(const std::string& str) {
|
||||
std::string lower(str.length(), ' ');
|
||||
std::transform(str.begin(), str.end(), lower.begin(), ::tolower);
|
||||
return lower;
|
||||
}
|
||||
|
||||
std::string StringFactory::ToUpper(const std::string& str) {
|
||||
std::string upper(str.length(), ' ');
|
||||
std::transform(str.begin(), str.end(), upper.begin(), ::toupper);
|
||||
return upper;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HelpPrinter, HelpStreambuf. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
HelpStreambuf::HelpStreambuf(std::ostream& stream)
|
||||
: basicStream_(&stream),
|
||||
basicBuf_(stream.rdbuf()),
|
||||
wrapWidth_(0),
|
||||
indentSize_(0),
|
||||
atLineStart_(true),
|
||||
lineWidth_(0)
|
||||
{
|
||||
basicStream_->rdbuf(this);
|
||||
}
|
||||
|
||||
HelpStreambuf::int_type HelpStreambuf::overflow(HelpStreambuf::int_type ch) {
|
||||
if (atLineStart_ && ch != '\n') {
|
||||
std::string indent(indentSize_, ' ');
|
||||
basicBuf_->sputn(indent.data(), indent.size());
|
||||
lineWidth_ = indentSize_;
|
||||
atLineStart_ = false;
|
||||
} else if (ch == '\n') {
|
||||
atLineStart_ = true;
|
||||
lineWidth_ = 0;
|
||||
}
|
||||
|
||||
if (wrapWidth_ > 0 && lineWidth_ == wrapWidth_) {
|
||||
basicBuf_->sputc('\n');
|
||||
std::string indent(indentSize_, ' ');
|
||||
basicBuf_->sputn(indent.data(), indent.size());
|
||||
lineWidth_ = indentSize_;
|
||||
atLineStart_ = false;
|
||||
}
|
||||
|
||||
lineWidth_++;
|
||||
return basicBuf_->sputc(ch);
|
||||
}
|
||||
|
||||
HelpPrinter& HelpPrinter::PrintUsage(const std::string& usage) {
|
||||
sbuf_.IndentSize(0);
|
||||
sbuf_.WrapWidth(0);
|
||||
Stream() << usage;
|
||||
if (usage.length() < USAGE_WIDTH) {
|
||||
Stream() << std::string(USAGE_WIDTH - usage.length(), ' ');
|
||||
}
|
||||
Stream() << std::string(PADDING_WIDTH, ' ');
|
||||
return *this;
|
||||
}
|
||||
|
||||
HelpPrinter& HelpPrinter::PrintDescription(const std::string& description) {
|
||||
sbuf_.WrapWidth(USAGE_WIDTH + PADDING_WIDTH + DESCRIPTION_WIDTH);
|
||||
sbuf_.IndentSize(USAGE_WIDTH + PADDING_WIDTH);
|
||||
Stream() << description << std::endl;
|
||||
sbuf_.IndentSize(0);
|
||||
sbuf_.WrapWidth(0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ChoiceOptioin. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
ChoiceOption::ChoiceOption(const std::string& name,
|
||||
const std::vector<std::string>& choices,
|
||||
const std::string& help,
|
||||
std::ostream& error)
|
||||
: OptionBase(name, help, error) {
|
||||
for (const auto& choice: choices) {
|
||||
choices_.insert(choice);
|
||||
}
|
||||
}
|
||||
|
||||
bool ChoiceOption::ProcessTokens(std::list<std::string> &tokens) {
|
||||
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
|
||||
if (2 != tokens.size()) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
tokens.pop_front();
|
||||
|
||||
if (0 == choices_.count(tokens.front())) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
is_set_ = true;
|
||||
value_ = tokens.front();
|
||||
tokens.pop_front();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ChoiceOption::PrintHelp(HelpPrinter& printer) const {
|
||||
std::string usage = "-" + name_ + "=[";
|
||||
bool first = true;
|
||||
for (const auto& choice: choices_) {
|
||||
if (!first) {
|
||||
usage += '|';
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
usage += choice;
|
||||
}
|
||||
usage += "]";
|
||||
printer.PrintUsage(usage).PrintDescription(help_);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PrefixOption. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
bool PrefixOption::IsValid() const {
|
||||
return (0 < name_.size()) && (name_.find(':') == std::string::npos);
|
||||
}
|
||||
|
||||
std::string::size_type PrefixOption::FindPrefix(const std::string& token) const {
|
||||
auto prefix = name_ + ':';
|
||||
return token.find(prefix);
|
||||
}
|
||||
|
||||
bool PrefixOption::Accept(const std::string& token) const {
|
||||
return
|
||||
(token.compare(0, name_.length(), name_) == 0) &&
|
||||
token.length() > name_.length() &&
|
||||
token[name_.length()] == ':';
|
||||
}
|
||||
|
||||
bool PrefixOption::ProcessTokens(std::list<std::string> &tokens) {
|
||||
assert(1 <= tokens.size());
|
||||
assert(Accept(tokens.front()) && "option name is mismatched");
|
||||
|
||||
std::string value = tokens.front(); tokens.pop_front();
|
||||
value = value.substr(name_.length() + 1);
|
||||
|
||||
for (const auto& token: tokens) {
|
||||
value += '=';
|
||||
value += token;
|
||||
}
|
||||
tokens.clear();
|
||||
|
||||
values_.push_back(value);
|
||||
is_set_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void PrefixOption::PrintHelp(HelpPrinter& printer) const {
|
||||
printer.PrintUsage("-" + name_ + ":[value]").PrintDescription(help_);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// OptionParser. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
std::vector<OptionBase*>::iterator
|
||||
OptionParser::FindOption(const std::string& name) {
|
||||
std::vector<OptionBase*>::iterator it = options_.begin();
|
||||
std::vector<OptionBase*>::iterator end = options_.end();
|
||||
for (; it != end; ++it) {
|
||||
if ((*it)->Accept(name)) {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
bool OptionParser::AddOption(OptionBase *option) {
|
||||
if (NULL == option || !option->IsValid()) {
|
||||
return false;
|
||||
}
|
||||
if (FindOption(option->name()) != options_.end()) {
|
||||
return false;
|
||||
}
|
||||
options_.push_back(option);
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::string& OptionParser::Unknown() const {
|
||||
assert(collectUnknown_);
|
||||
return unknownOptions_;
|
||||
}
|
||||
|
||||
bool OptionParser::ParseOptions(const char *options) {
|
||||
std::list<std::string> tokens_l1 = StringFactory::Tokenize(options, ' ');
|
||||
if (0 == tokens_l1.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::list<std::string>::iterator tokens_l1i = tokens_l1.begin();
|
||||
while (tokens_l1i != tokens_l1.end()) {
|
||||
if ('-' == tokens_l1i->at(0)) {
|
||||
std::list<std::string>::iterator option_begin = tokens_l1i;
|
||||
std::list<std::string> tokens_l2;
|
||||
do {
|
||||
tokens_l2.push_back(*tokens_l1i);
|
||||
tokens_l1i++;
|
||||
} while (tokens_l1i != tokens_l1.end() && '-' != tokens_l1i->at(0));
|
||||
std::list<std::string>::iterator option_end = tokens_l1i;
|
||||
tokens_l2.front().erase(0, 1);
|
||||
|
||||
if (1 == tokens_l2.size()) {
|
||||
tokens_l2 = StringFactory::Tokenize(tokens_l2.front().c_str(), '=');
|
||||
if (2 < tokens_l2.size()) {
|
||||
if (collectUnknown_) {
|
||||
unknownOptions_ += *tokens_l1i + " ";
|
||||
continue;
|
||||
} else {
|
||||
error() << "error: invalid option format: \'"
|
||||
<< tokens_l2.front() << '\'' << std::endl;
|
||||
Reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto find_status = FindOption(tokens_l2.front());
|
||||
if (find_status == options_.end()) {
|
||||
if (collectUnknown_) {
|
||||
for (; option_begin != option_end; ++option_begin) {
|
||||
unknownOptions_ += *option_begin + " ";
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
error() << "error: unknown option: \'"
|
||||
<< tokens_l2.front() << '\'' << std::endl;
|
||||
Reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(*find_status)->ProcessTokens(tokens_l2)) {
|
||||
Reset();
|
||||
return false;
|
||||
}
|
||||
assert(0 == tokens_l2.size());
|
||||
} else {
|
||||
if (collectUnknown_) {
|
||||
unknownOptions_ += *tokens_l1i + " ";
|
||||
} else {
|
||||
error() << "error: unknown option: \'"
|
||||
<< *tokens_l1i << '\'' << std::endl;
|
||||
Reset();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OptionParser::PrintHelp(std::ostream& out, const std::string& addition) const {
|
||||
HelpPrinter printer(out);
|
||||
for (const auto& option: options_) {
|
||||
option->PrintHelp(printer);
|
||||
}
|
||||
out << addition << std::endl;
|
||||
}
|
||||
|
||||
void OptionParser::Reset() {
|
||||
unknownOptions_.clear();
|
||||
for (auto &option : options_) {
|
||||
option->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace options
|
||||
} // namespace amd
|
||||
@@ -0,0 +1,476 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_OPTIONS_HPP
|
||||
#define AMD_OPTIONS_HPP
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace amd {
|
||||
namespace options {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// StringFactory. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class StringFactory final {
|
||||
public:
|
||||
static std::string Flatten(const char **cstrs,
|
||||
const uint32_t &cstrs_count,
|
||||
const char &spacer = '\0');
|
||||
|
||||
static std::list<std::string> Tokenize(const char *cstr, const char &delim);
|
||||
|
||||
static std::string ToLower(const std::string& str);
|
||||
static std::string ToUpper(const std::string& str);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HelpPrinter, HelpStreambuf. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class HelpStreambuf : public std::streambuf {
|
||||
public:
|
||||
explicit HelpStreambuf(std::ostream& stream);
|
||||
|
||||
virtual ~HelpStreambuf() {
|
||||
basicStream_->rdbuf(basicBuf_);
|
||||
}
|
||||
|
||||
void IndentSize(unsigned indent) {
|
||||
assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
|
||||
indentSize_ = indent;
|
||||
}
|
||||
|
||||
void WrapWidth(unsigned wrap) {
|
||||
assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
|
||||
wrapWidth_ = wrap;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int_type overflow(int_type ch) override;
|
||||
|
||||
private:
|
||||
std::ostream* basicStream_;
|
||||
std::streambuf* basicBuf_;
|
||||
|
||||
unsigned wrapWidth_;
|
||||
unsigned indentSize_;
|
||||
|
||||
bool atLineStart_;
|
||||
unsigned lineWidth_;
|
||||
};
|
||||
|
||||
|
||||
class HelpPrinter {
|
||||
private:
|
||||
static const unsigned USAGE_WIDTH = 30;
|
||||
static const unsigned PADDING_WIDTH = 2;
|
||||
static const unsigned DESCRIPTION_WIDTH = 50;
|
||||
|
||||
public:
|
||||
HelpPrinter& PrintUsage(const std::string& usage);
|
||||
HelpPrinter& PrintDescription(const std::string& description);
|
||||
|
||||
std::ostream& Stream() { return *out_; }
|
||||
|
||||
private:
|
||||
explicit HelpPrinter(std::ostream& out = std::cout) : out_(&out), sbuf_(*out_) {}
|
||||
|
||||
/// @brief Not copy-constructible.
|
||||
HelpPrinter(const HelpPrinter&);
|
||||
/// @brief Not copy-assignable.
|
||||
HelpPrinter& operator =(const HelpPrinter&);
|
||||
|
||||
friend class OptionParser;
|
||||
|
||||
std::ostream *out_;
|
||||
HelpStreambuf sbuf_;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// OptionBase. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class OptionBase {
|
||||
public:
|
||||
virtual ~OptionBase() {}
|
||||
|
||||
const std::string& name() const {
|
||||
return name_;
|
||||
}
|
||||
const bool& is_set() const {
|
||||
return is_set_;
|
||||
}
|
||||
|
||||
virtual bool IsValid() const {
|
||||
return 0 < name_.size();
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit OptionBase(const std::string& name,
|
||||
const std::string& help = "",
|
||||
std::ostream &error = std::cerr)
|
||||
: name_(name),
|
||||
help_(help),
|
||||
is_set_(false),
|
||||
error_(&error) {}
|
||||
|
||||
virtual void PrintHelp(HelpPrinter& printer) const = 0;
|
||||
virtual bool Accept(const std::string& name) const { return name_ == name; }
|
||||
|
||||
const std::string name_;
|
||||
const std::string help_;
|
||||
bool is_set_;
|
||||
|
||||
std::ostream &error() const { return *error_; }
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
OptionBase(const OptionBase &ob);
|
||||
/// @brief Not copy-assignable.
|
||||
OptionBase& operator=(const OptionBase &ob);
|
||||
|
||||
void Reset() {
|
||||
is_set_ = false;
|
||||
}
|
||||
|
||||
virtual bool ProcessTokens(std::list<std::string> &tokens) = 0;
|
||||
|
||||
friend class OptionParser;
|
||||
|
||||
mutable std::ostream *error_;
|
||||
};
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Option<T>. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
template<typename T>
|
||||
class Option final: public OptionBase {
|
||||
public:
|
||||
explicit Option(const std::string& name,
|
||||
const std::string& help = "",
|
||||
std::ostream& error = std::cerr):
|
||||
OptionBase(name, help, error) {}
|
||||
|
||||
~Option() {}
|
||||
|
||||
const std::list<T>& values() const {
|
||||
return values_;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void PrintHelp(HelpPrinter& printer) const override;
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
Option(const Option &o);
|
||||
/// @brief Not copy-assignable.
|
||||
Option& operator=(const Option &o);
|
||||
|
||||
bool ProcessTokens(std::list<std::string> &tokens);
|
||||
|
||||
std::list<T> values_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
bool Option<T>::ProcessTokens(std::list<std::string> &tokens) {
|
||||
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
|
||||
if (2 > tokens.size()) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
is_set_ = true;
|
||||
tokens.pop_front();
|
||||
|
||||
while (!tokens.empty()) {
|
||||
std::istringstream token_stream(tokens.front());
|
||||
if (!token_stream.good()) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
T value;
|
||||
token_stream >> value;
|
||||
|
||||
values_.push_back(value);
|
||||
tokens.pop_front();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Option<T>::PrintHelp(HelpPrinter& printer) const {
|
||||
printer.PrintUsage("-" + name_ + " [" + StringFactory::ToUpper(name_) + "s]")
|
||||
.PrintDescription(help_);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ValueOption<T>. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
template<typename T>
|
||||
class ValueOption final: public OptionBase {
|
||||
public:
|
||||
explicit ValueOption(const std::string& name,
|
||||
const std::string& help = "",
|
||||
std::ostream& error = std::cerr):
|
||||
OptionBase(name, help, error) {}
|
||||
|
||||
~ValueOption() {}
|
||||
|
||||
const T& value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
protected:
|
||||
void PrintHelp(HelpPrinter& printer) const override;
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
ValueOption(const ValueOption &o);
|
||||
/// @brief Not copy-assignable.
|
||||
ValueOption& operator=(const ValueOption &o);
|
||||
|
||||
bool ProcessTokens(std::list<std::string> &tokens) override;
|
||||
|
||||
T value_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
bool ValueOption<T>::ProcessTokens(std::list<std::string> &tokens) {
|
||||
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
|
||||
if (2 != tokens.size()) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
is_set_ = true;
|
||||
tokens.pop_front();
|
||||
|
||||
std::istringstream token_stream(tokens.front());
|
||||
if (!token_stream.good()) {
|
||||
error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
|
||||
return false;
|
||||
}
|
||||
token_stream >> value_;
|
||||
tokens.pop_front();
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void ValueOption<T>::PrintHelp(HelpPrinter& printer) const {
|
||||
printer.PrintUsage("-" + name_ + "=[VAL]")
|
||||
.PrintDescription(help_);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ChoiceOptioin. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
class ChoiceOption final: public OptionBase {
|
||||
public:
|
||||
ChoiceOption(const std::string& name,
|
||||
const std::vector<std::string>& choices,
|
||||
const std::string& help = "",
|
||||
std::ostream& error = std::cerr);
|
||||
|
||||
~ChoiceOption() {}
|
||||
|
||||
const std::string& value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
protected:
|
||||
void PrintHelp(HelpPrinter& printer) const override;
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
ChoiceOption(const ChoiceOption&);
|
||||
/// @brief Not copy-assignable.
|
||||
ChoiceOption& operator =(const ChoiceOption&);
|
||||
|
||||
bool ProcessTokens(std::list<std::string> &tokens) override;
|
||||
|
||||
std::unordered_set<std::string> choices_;
|
||||
std::string value_;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Option<void>. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class NoArgOption final: public OptionBase {
|
||||
public:
|
||||
explicit NoArgOption(const std::string& name,
|
||||
const std::string& help = "",
|
||||
std::ostream& error = std::cerr):
|
||||
OptionBase(name, help, error) {}
|
||||
|
||||
~NoArgOption() {}
|
||||
|
||||
protected:
|
||||
void PrintHelp(HelpPrinter& printer) const override {
|
||||
printer.PrintUsage("-" + name_).PrintDescription(help_);
|
||||
}
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
NoArgOption(const NoArgOption &o);
|
||||
/// @brief Not copy-assignable.
|
||||
NoArgOption& operator=(const NoArgOption &o);
|
||||
|
||||
bool ProcessTokens(std::list<std::string> &tokens) override {
|
||||
assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
|
||||
if (1 == tokens.size()) {
|
||||
tokens.pop_front();
|
||||
is_set_ = true;
|
||||
return true;
|
||||
} else if (2 == tokens.size()) {
|
||||
tokens.pop_front();
|
||||
if (tokens.front() == "1") {
|
||||
is_set_ = true;
|
||||
tokens.pop_front();
|
||||
return true;
|
||||
} else if (tokens.front() == "0") {
|
||||
is_set_ = false;
|
||||
tokens.pop_front();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
error() << "error: invalid option: '" << name_ << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PrefixOption. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
class PrefixOption final: public OptionBase {
|
||||
public:
|
||||
PrefixOption(const std::string& prefix,
|
||||
const std::string& help = "",
|
||||
std::ostream& error = std::cerr)
|
||||
: OptionBase(prefix, help, error) {}
|
||||
|
||||
~PrefixOption() {}
|
||||
|
||||
const std::vector<std::string>& values() const {
|
||||
return values_;
|
||||
}
|
||||
|
||||
bool IsValid() const override;
|
||||
|
||||
protected:
|
||||
void PrintHelp(HelpPrinter& printer) const override;
|
||||
bool Accept(const std::string& token) const override;
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
PrefixOption(const PrefixOption&);
|
||||
/// @brief Not copy-assignable.
|
||||
PrefixOption& operator =(const PrefixOption&);
|
||||
|
||||
bool ProcessTokens(std::list<std::string> &tokens);
|
||||
|
||||
std::string::size_type FindPrefix(const std::string& token) const;
|
||||
|
||||
std::vector<std::string> values_;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// OptionParser. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class OptionParser final {
|
||||
public:
|
||||
explicit OptionParser(bool collectUnknown = false, std::ostream& error = std::cerr)
|
||||
: collectUnknown_(collectUnknown),
|
||||
error_(&error) {}
|
||||
|
||||
~OptionParser() {}
|
||||
|
||||
bool AddOption(OptionBase *option);
|
||||
|
||||
bool ParseOptions(const char *options);
|
||||
|
||||
const std::string& Unknown() const;
|
||||
void CollectUnknown(bool b) { collectUnknown_ = b; }
|
||||
|
||||
void PrintHelp(std::ostream& out, const std::string& addition = "") const;
|
||||
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
/// @brief Not copy-constructible.
|
||||
OptionParser(const OptionParser &op);
|
||||
/// @brief Not copy-assignable.
|
||||
OptionParser& operator=(const OptionParser &op);
|
||||
|
||||
std::ostream& error() { return *error_; }
|
||||
|
||||
std::vector<OptionBase*>::iterator FindOption(const std::string& name);
|
||||
|
||||
std::vector<OptionBase*> options_;
|
||||
|
||||
std::string unknownOptions_;
|
||||
bool collectUnknown_;
|
||||
|
||||
std::ostream *error_;
|
||||
};
|
||||
|
||||
} // namespace options
|
||||
} // namespace amd
|
||||
|
||||
#endif // AMD_OPTIONS_HPP
|
||||
@@ -0,0 +1,263 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
|
||||
#define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// Gets offset of specified member in specified type.
|
||||
#ifndef offsetof
|
||||
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
|
||||
#endif // offsetof
|
||||
|
||||
// Creates enumeration entries used for packing bits into integers. Enumeration
|
||||
// entries include bit shift amount, bit width, and bit mask.
|
||||
#ifndef AMDHSA_BITS_ENUM_ENTRY
|
||||
#define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
|
||||
NAME ## _SHIFT = (SHIFT), \
|
||||
NAME ## _WIDTH = (WIDTH), \
|
||||
NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
|
||||
#endif // AMDHSA_BITS_ENUM_ENTRY
|
||||
|
||||
// Gets bits for specified bit mask from specified source.
|
||||
#ifndef AMDHSA_BITS_GET
|
||||
#define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
|
||||
#endif // AMDHSA_BITS_GET
|
||||
|
||||
// Sets bits for specified bit mask in specified destination.
|
||||
#ifndef AMDHSA_BITS_SET
|
||||
#define AMDHSA_BITS_SET(DST, MSK, VAL) \
|
||||
DST &= ~MSK; \
|
||||
DST |= ((VAL << MSK ## _SHIFT) & MSK)
|
||||
#endif // AMDHSA_BITS_SET
|
||||
|
||||
namespace llvm {
|
||||
namespace amdhsa {
|
||||
|
||||
// Floating point rounding modes. Must match hardware definition.
|
||||
enum : uint8_t {
|
||||
FLOAT_ROUND_MODE_NEAR_EVEN = 0,
|
||||
FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
|
||||
FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
|
||||
FLOAT_ROUND_MODE_ZERO = 3,
|
||||
};
|
||||
|
||||
// Floating point denorm modes. Must match hardware definition.
|
||||
enum : uint8_t {
|
||||
FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
|
||||
FLOAT_DENORM_MODE_FLUSH_DST = 1,
|
||||
FLOAT_DENORM_MODE_FLUSH_SRC = 2,
|
||||
FLOAT_DENORM_MODE_FLUSH_NONE = 3,
|
||||
};
|
||||
|
||||
// System VGPR workitem IDs. Must match hardware definition.
|
||||
enum : uint8_t {
|
||||
SYSTEM_VGPR_WORKITEM_ID_X = 0,
|
||||
SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
|
||||
SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
|
||||
SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
|
||||
};
|
||||
|
||||
// Compute program resource register 1. Must match hardware definition.
|
||||
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
|
||||
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
|
||||
COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
|
||||
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
|
||||
COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
|
||||
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
|
||||
COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
|
||||
COMPUTE_PGM_RSRC1(PRIV, 20, 1),
|
||||
COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
|
||||
COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
|
||||
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
|
||||
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
|
||||
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
|
||||
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
|
||||
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
|
||||
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
|
||||
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
|
||||
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC1
|
||||
|
||||
// Compute program resource register 2. Must match hardware definition.
|
||||
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
|
||||
COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
|
||||
COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
|
||||
COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
|
||||
COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC2
|
||||
|
||||
// Compute program resource register 3 for GFX90A+. Must match hardware
|
||||
// definition.
|
||||
#define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
|
||||
COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
|
||||
COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
|
||||
COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC3_GFX90A
|
||||
|
||||
// Compute program resource register 3 for GFX10+. Must match hardware
|
||||
// definition.
|
||||
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
|
||||
COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),
|
||||
};
|
||||
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
|
||||
|
||||
// Kernel code properties. Must be kept backwards compatible.
|
||||
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
|
||||
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
|
||||
enum : int32_t {
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
|
||||
KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
|
||||
KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
|
||||
KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
|
||||
};
|
||||
#undef KERNEL_CODE_PROPERTY
|
||||
|
||||
// Kernel descriptor. Must be kept backwards compatible.
|
||||
struct kernel_descriptor_t {
|
||||
uint32_t group_segment_fixed_size;
|
||||
uint32_t private_segment_fixed_size;
|
||||
uint32_t kernarg_size;
|
||||
uint8_t reserved0[4];
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
uint8_t reserved1[20];
|
||||
uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
|
||||
uint32_t compute_pgm_rsrc1;
|
||||
uint32_t compute_pgm_rsrc2;
|
||||
uint16_t kernel_code_properties;
|
||||
uint8_t reserved2[6];
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
|
||||
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
|
||||
KERNARG_SIZE_OFFSET = 8,
|
||||
RESERVED0_OFFSET = 12,
|
||||
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
|
||||
RESERVED1_OFFSET = 24,
|
||||
COMPUTE_PGM_RSRC3_OFFSET = 44,
|
||||
COMPUTE_PGM_RSRC1_OFFSET = 48,
|
||||
COMPUTE_PGM_RSRC2_OFFSET = 52,
|
||||
KERNEL_CODE_PROPERTIES_OFFSET = 56,
|
||||
RESERVED2_OFFSET = 58,
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(kernel_descriptor_t) == 64,
|
||||
"invalid size for kernel_descriptor_t");
|
||||
static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
|
||||
GROUP_SEGMENT_FIXED_SIZE_OFFSET,
|
||||
"invalid offset for group_segment_fixed_size");
|
||||
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
|
||||
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
|
||||
"invalid offset for private_segment_fixed_size");
|
||||
static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
|
||||
KERNARG_SIZE_OFFSET,
|
||||
"invalid offset for kernarg_size");
|
||||
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
|
||||
"invalid offset for reserved0");
|
||||
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
|
||||
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
|
||||
"invalid offset for kernel_code_entry_byte_offset");
|
||||
static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
|
||||
"invalid offset for reserved1");
|
||||
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
|
||||
COMPUTE_PGM_RSRC3_OFFSET,
|
||||
"invalid offset for compute_pgm_rsrc3");
|
||||
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
|
||||
COMPUTE_PGM_RSRC1_OFFSET,
|
||||
"invalid offset for compute_pgm_rsrc1");
|
||||
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
|
||||
COMPUTE_PGM_RSRC2_OFFSET,
|
||||
"invalid offset for compute_pgm_rsrc2");
|
||||
static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
|
||||
KERNEL_CODE_PROPERTIES_OFFSET,
|
||||
"invalid offset for kernel_code_properties");
|
||||
static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
|
||||
"invalid offset for reserved2");
|
||||
|
||||
} // end namespace amdhsa
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
|
||||
@@ -0,0 +1,31 @@
|
||||
#-----------------------------------------------------------------------------
|
||||
# Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# loader library
|
||||
#
|
||||
# This file is expected to be included from top-level CMakeLists.txt.
|
||||
#
|
||||
# Dependencies:
|
||||
# - Compiler definitions
|
||||
# - amdhsacode library
|
||||
#
|
||||
# Defines:
|
||||
# - amdhsaloader library and target include directories
|
||||
|
||||
file(GLOB sources *.cpp *.hpp)
|
||||
add_library(amdhsaloader STATIC ${sources})
|
||||
set_target_properties(amdhsaloader PROPERTIES
|
||||
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>"
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "^(GNU|(Apple)?Clang)$")
|
||||
target_compile_options(amdhsaloader PRIVATE
|
||||
-Werror
|
||||
-Wno-inconsistent-missing-override
|
||||
)
|
||||
endif()
|
||||
|
||||
target_include_directories(amdhsaloader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(amdhsaloader amdhsacode)
|
||||
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
@@ -0,0 +1,686 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
|
||||
#define HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <libelf.h>
|
||||
#include <limits.h>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
#include "amd_hsa_loader.hpp"
|
||||
#include "amd_hsa_code.hpp"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "amd_hsa_locks.hpp"
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#if _WIN64
|
||||
#define __WORDSIZE 64
|
||||
#else
|
||||
#define __WORDSIZE 32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define __ELF_NATIVE_CLASS __WORDSIZE
|
||||
|
||||
/* We use this macro to refer to ELF types independent of the native wordsize.
|
||||
`ElfW(TYPE)' is used in place of `Elf32_TYPE' or `Elf64_TYPE'. */
|
||||
#define ElfW(type) _ElfW (Elf, __ELF_NATIVE_CLASS, type)
|
||||
#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t)
|
||||
#define _ElfW_1(e,w,t) e##w##t
|
||||
|
||||
/* Structure describing a loaded shared object. The `l_next' and `l_prev'
|
||||
members form a chain of all the shared objects loaded at startup.
|
||||
|
||||
These data structures exist in space used by the run-time dynamic linker;
|
||||
modifying them may have disastrous results. */
|
||||
|
||||
struct link_map
|
||||
{
|
||||
/* These first few members are part of the protocol with the debugger.
|
||||
This is the same format used in SVR4. */
|
||||
|
||||
ElfW(Addr) l_addr; /* Difference between the address in the ELF
|
||||
file and the addresses in memory. */
|
||||
char *l_name; /* Absolute file name object was found in. */
|
||||
ElfW(Dyn) *l_ld; /* Dynamic section of the shared object. */
|
||||
struct link_map *l_next, *l_prev; /* Chain of loaded objects. */
|
||||
};
|
||||
|
||||
/* The legacy rendezvous structure used by the run-time dynamic linker to
|
||||
communicate details of shared object loading to the debugger. */
|
||||
|
||||
struct r_debug
|
||||
{
|
||||
/* Version number for this protocol. It should be greater than 0. */
|
||||
int r_version;
|
||||
|
||||
struct link_map *r_map; /* Head of the chain of loaded objects. */
|
||||
|
||||
/* This is the address of a function internal to the run-time linker,
|
||||
that will always be called when the linker begins to map in a
|
||||
library or unmap it, and again when the mapping change is complete.
|
||||
The debugger can set a breakpoint at this address if it wants to
|
||||
notice shared object mapping changes. */
|
||||
ElfW(Addr) r_brk;
|
||||
enum RT
|
||||
{
|
||||
/* This state value describes the mapping change taking place when
|
||||
the `r_brk' address is called. */
|
||||
RT_CONSISTENT, /* Mapping change is complete. */
|
||||
RT_ADD, /* Beginning to add a new object. */
|
||||
RT_DELETE /* Beginning to remove an object mapping. */
|
||||
} r_state;
|
||||
|
||||
ElfW(Addr) r_ldbase; /* Base address the linker is loaded at. */
|
||||
};
|
||||
|
||||
/* This is the symbol of that structure provided by the dynamic linker. */
|
||||
extern struct r_debug _r_debug;
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
|
||||
class MemoryAddress;
|
||||
class SymbolImpl;
|
||||
class KernelSymbol;
|
||||
class VariableSymbol;
|
||||
class ExecutableImpl;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SymbolImpl. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
typedef uint32_t symbol_attribute32_t;
|
||||
|
||||
class SymbolImpl: public Symbol {
|
||||
public:
|
||||
virtual ~SymbolImpl() {}
|
||||
|
||||
bool IsKernel() const {
|
||||
return HSA_SYMBOL_KIND_KERNEL == kind;
|
||||
}
|
||||
bool IsVariable() const {
|
||||
return HSA_SYMBOL_KIND_VARIABLE == kind;
|
||||
}
|
||||
|
||||
bool is_loaded;
|
||||
hsa_symbol_kind_t kind;
|
||||
std::string module_name;
|
||||
std::string symbol_name;
|
||||
hsa_symbol_linkage_t linkage;
|
||||
bool is_definition;
|
||||
uint64_t address;
|
||||
hsa_agent_t agent;
|
||||
|
||||
hsa_agent_t GetAgent() override {
|
||||
return agent;
|
||||
}
|
||||
|
||||
protected:
|
||||
SymbolImpl(const bool &_is_loaded,
|
||||
const hsa_symbol_kind_t &_kind,
|
||||
const std::string &_module_name,
|
||||
const std::string &_symbol_name,
|
||||
const hsa_symbol_linkage_t &_linkage,
|
||||
const bool &_is_definition,
|
||||
const uint64_t &_address = 0)
|
||||
: is_loaded(_is_loaded)
|
||||
, kind(_kind)
|
||||
, module_name(_module_name)
|
||||
, symbol_name(_symbol_name)
|
||||
, linkage(_linkage)
|
||||
, is_definition(_is_definition)
|
||||
, address(_address) {}
|
||||
|
||||
virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void* value) override;
|
||||
|
||||
private:
|
||||
SymbolImpl(const SymbolImpl &s);
|
||||
SymbolImpl& operator=(const SymbolImpl &s);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KernelSymbol. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class KernelSymbol final: public SymbolImpl {
|
||||
public:
|
||||
KernelSymbol(const bool &_is_loaded,
|
||||
const std::string &_module_name,
|
||||
const std::string &_symbol_name,
|
||||
const hsa_symbol_linkage_t &_linkage,
|
||||
const bool &_is_definition,
|
||||
const uint32_t &_kernarg_segment_size,
|
||||
const uint32_t &_kernarg_segment_alignment,
|
||||
const uint32_t &_group_segment_size,
|
||||
const uint32_t &_private_segment_size,
|
||||
const bool &_is_dynamic_callstack,
|
||||
const uint32_t &_size,
|
||||
const uint32_t &_alignment,
|
||||
const uint64_t &_address = 0)
|
||||
: SymbolImpl(_is_loaded,
|
||||
HSA_SYMBOL_KIND_KERNEL,
|
||||
_module_name,
|
||||
_symbol_name,
|
||||
_linkage,
|
||||
_is_definition,
|
||||
_address)
|
||||
, full_name(_module_name.empty() ? _symbol_name : _module_name + "::" + _symbol_name)
|
||||
, kernarg_segment_size(_kernarg_segment_size)
|
||||
, kernarg_segment_alignment(_kernarg_segment_alignment)
|
||||
, group_segment_size(_group_segment_size)
|
||||
, private_segment_size(_private_segment_size)
|
||||
, is_dynamic_callstack(_is_dynamic_callstack)
|
||||
, size(_size)
|
||||
, alignment(_alignment) {}
|
||||
|
||||
~KernelSymbol() {}
|
||||
|
||||
bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);
|
||||
|
||||
std::string full_name;
|
||||
uint32_t kernarg_segment_size;
|
||||
uint32_t kernarg_segment_alignment;
|
||||
uint32_t group_segment_size;
|
||||
uint32_t private_segment_size;
|
||||
bool is_dynamic_callstack;
|
||||
uint32_t size;
|
||||
uint32_t alignment;
|
||||
amd_runtime_loader_debug_info_t debug_info;
|
||||
|
||||
private:
|
||||
KernelSymbol(const KernelSymbol &ks);
|
||||
KernelSymbol& operator=(const KernelSymbol &ks);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VariableSymbol. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class VariableSymbol final: public SymbolImpl {
|
||||
public:
|
||||
VariableSymbol(const bool &_is_loaded,
|
||||
const std::string &_module_name,
|
||||
const std::string &_symbol_name,
|
||||
const hsa_symbol_linkage_t &_linkage,
|
||||
const bool &_is_definition,
|
||||
const hsa_variable_allocation_t &_allocation,
|
||||
const hsa_variable_segment_t &_segment,
|
||||
const uint32_t &_size,
|
||||
const uint32_t &_alignment,
|
||||
const bool &_is_constant,
|
||||
const bool &_is_external = false,
|
||||
const uint64_t &_address = 0)
|
||||
: SymbolImpl(_is_loaded,
|
||||
HSA_SYMBOL_KIND_VARIABLE,
|
||||
_module_name,
|
||||
_symbol_name,
|
||||
_linkage,
|
||||
_is_definition,
|
||||
_address)
|
||||
, allocation(_allocation)
|
||||
, segment(_segment)
|
||||
, size(_size)
|
||||
, alignment(_alignment)
|
||||
, is_constant(_is_constant)
|
||||
, is_external(_is_external) {}
|
||||
|
||||
~VariableSymbol() {}
|
||||
|
||||
bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);
|
||||
|
||||
hsa_variable_allocation_t allocation;
|
||||
hsa_variable_segment_t segment;
|
||||
uint32_t size;
|
||||
uint32_t alignment;
|
||||
bool is_constant;
|
||||
bool is_external;
|
||||
|
||||
private:
|
||||
VariableSymbol(const VariableSymbol &vs);
|
||||
VariableSymbol& operator=(const VariableSymbol &vs);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Logger. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Logger final {
|
||||
public:
|
||||
Logger(std::ostream &Stream = std::cerr) : OutStream(Stream) {}
|
||||
|
||||
template <typename T>
|
||||
Logger &operator<<(const T &Data) {
|
||||
if (!IsLoggingEnabled())
|
||||
return *this;
|
||||
OutStream << Data;
|
||||
std::stringstream ss;
|
||||
ss << Data;
|
||||
printf("=================== %s\n", ss.str().c_str());
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
Logger(const Logger &L);
|
||||
Logger& operator=(const Logger &L);
|
||||
|
||||
bool IsLoggingEnabled() const {
|
||||
const char *enable_logging = getenv("LOADER_ENABLE_LOGGING");
|
||||
if (!enable_logging)
|
||||
return false;
|
||||
if (std::string(enable_logging) == "0")
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream &OutStream;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Executable. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class ExecutableImpl;
|
||||
class LoadedCodeObjectImpl;
|
||||
class Segment;
|
||||
|
||||
class ExecutableObject {
|
||||
protected:
|
||||
ExecutableImpl *owner;
|
||||
hsa_agent_t agent;
|
||||
|
||||
public:
|
||||
ExecutableObject(ExecutableImpl *owner_, hsa_agent_t agent_)
|
||||
: owner(owner_), agent(agent_) { }
|
||||
|
||||
ExecutableImpl* Owner() const { return owner; }
|
||||
hsa_agent_t Agent() const { return agent; }
|
||||
virtual void Print(std::ostream& out) = 0;
|
||||
virtual void Destroy() = 0;
|
||||
|
||||
virtual ~ExecutableObject() { }
|
||||
};
|
||||
|
||||
class LoadedCodeObjectImpl : public LoadedCodeObject, public ExecutableObject {
|
||||
friend class AmdHsaCodeLoader;
|
||||
private:
|
||||
LoadedCodeObjectImpl(const LoadedCodeObjectImpl&);
|
||||
LoadedCodeObjectImpl& operator=(const LoadedCodeObjectImpl&);
|
||||
|
||||
const void *elf_data;
|
||||
const size_t elf_size;
|
||||
std::vector<Segment*> loaded_segments;
|
||||
|
||||
public:
|
||||
LoadedCodeObjectImpl(ExecutableImpl *owner_, hsa_agent_t agent_, const void *elf_data_, size_t elf_size_)
|
||||
: ExecutableObject(owner_, agent_), elf_data(elf_data_), elf_size(elf_size_) {
|
||||
memset(&r_debug_info, 0, sizeof(r_debug_info));
|
||||
}
|
||||
|
||||
const void* ElfData() const { return elf_data; }
|
||||
size_t ElfSize() const { return elf_size; }
|
||||
std::vector<Segment*>& LoadedSegments() { return loaded_segments; }
|
||||
|
||||
bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) override;
|
||||
|
||||
hsa_status_t IterateLoadedSegments(
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data) override;
|
||||
|
||||
void Print(std::ostream& out) override;
|
||||
|
||||
void Destroy() override {}
|
||||
|
||||
hsa_agent_t getAgent() const override;
|
||||
hsa_executable_t getExecutable() const override;
|
||||
uint64_t getElfData() const override;
|
||||
uint64_t getElfSize() const override;
|
||||
uint64_t getStorageOffset() const override;
|
||||
uint64_t getLoadBase() const override;
|
||||
uint64_t getLoadSize() const override;
|
||||
int64_t getDelta() const override;
|
||||
std::string getUri() const override;
|
||||
|
||||
link_map r_debug_info;
|
||||
};
|
||||
|
||||
class Segment : public LoadedSegment, public ExecutableObject {
|
||||
private:
|
||||
amdgpu_hsa_elf_segment_t segment;
|
||||
void *ptr;
|
||||
size_t size;
|
||||
uint64_t vaddr;
|
||||
bool frozen;
|
||||
size_t storage_offset;
|
||||
|
||||
public:
|
||||
Segment(ExecutableImpl *owner_, hsa_agent_t agent_, amdgpu_hsa_elf_segment_t segment_, void* ptr_, size_t size_, uint64_t vaddr_, size_t storage_offset_)
|
||||
: ExecutableObject(owner_, agent_), segment(segment_),
|
||||
ptr(ptr_), size(size_), vaddr(vaddr_), frozen(false), storage_offset(storage_offset_) { }
|
||||
|
||||
amdgpu_hsa_elf_segment_t ElfSegment() const { return segment; }
|
||||
void* Ptr() const { return ptr; }
|
||||
size_t Size() const { return size; }
|
||||
uint64_t VAddr() const { return vaddr; }
|
||||
size_t StorageOffset() const { return storage_offset; }
|
||||
|
||||
bool GetInfo(amd_loaded_segment_info_t attribute, void *value) override;
|
||||
|
||||
uint64_t Offset(uint64_t addr); // Offset within segment. Used together with ptr with loader context functions.
|
||||
|
||||
void* Address(uint64_t addr); // Address in segment. Used for relocations and valid on agent.
|
||||
|
||||
bool Freeze();
|
||||
|
||||
bool IsAddressInSegment(uint64_t addr);
|
||||
void Copy(uint64_t addr, const void* src, size_t size);
|
||||
void Print(std::ostream& out) override;
|
||||
void Destroy() override;
|
||||
};
|
||||
|
||||
class Sampler : public ExecutableObject {
|
||||
private:
|
||||
hsa_ext_sampler_t samp;
|
||||
|
||||
public:
|
||||
Sampler(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_sampler_t samp_)
|
||||
: ExecutableObject(owner, agent), samp(samp_) { }
|
||||
void Print(std::ostream& out) override;
|
||||
void Destroy() override;
|
||||
};
|
||||
|
||||
class Image : public ExecutableObject {
|
||||
private:
|
||||
hsa_ext_image_t img;
|
||||
|
||||
public:
|
||||
Image(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_image_t img_)
|
||||
: ExecutableObject(owner, agent), img(img_) { }
|
||||
void Print(std::ostream& out) override;
|
||||
void Destroy() override;
|
||||
};
|
||||
|
||||
typedef std::string ProgramSymbol;
|
||||
typedef std::unordered_map<ProgramSymbol, SymbolImpl*> ProgramSymbolMap;
|
||||
|
||||
typedef std::pair<std::string, hsa_agent_t> AgentSymbol;
|
||||
struct ASC {
|
||||
bool operator()(const AgentSymbol &las, const AgentSymbol &ras) const {
|
||||
return las.first == ras.first && las.second.handle == ras.second.handle;
|
||||
}
|
||||
};
|
||||
struct ASH {
|
||||
size_t operator()(const AgentSymbol &as) const {
|
||||
size_t h = std::hash<std::string>()(as.first);
|
||||
size_t i = std::hash<uint64_t>()(as.second.handle);
|
||||
return h ^ (i << 1);
|
||||
}
|
||||
};
|
||||
typedef std::unordered_map<AgentSymbol, SymbolImpl*, ASH, ASC> AgentSymbolMap;
|
||||
|
||||
class ExecutableImpl final: public Executable {
|
||||
friend class AmdHsaCodeLoader;
|
||||
public:
|
||||
const hsa_profile_t& profile() const {
|
||||
return profile_;
|
||||
}
|
||||
const hsa_executable_state_t& state() const {
|
||||
return state_;
|
||||
}
|
||||
|
||||
ExecutableImpl(
|
||||
const hsa_profile_t &_profile,
|
||||
Context *context,
|
||||
size_t id,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode);
|
||||
|
||||
~ExecutableImpl();
|
||||
|
||||
hsa_status_t GetInfo(hsa_executable_info_t executable_info, void *value) override;
|
||||
|
||||
hsa_status_t DefineProgramExternalVariable(
|
||||
const char *name, void *address) override;
|
||||
|
||||
hsa_status_t DefineAgentExternalVariable(
|
||||
const char *name,
|
||||
hsa_agent_t agent,
|
||||
hsa_variable_segment_t segment,
|
||||
void *address) override;
|
||||
|
||||
hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
hsa_loaded_code_object_t *loaded_code_object) override;
|
||||
|
||||
hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
size_t code_object_size,
|
||||
const char *options,
|
||||
hsa_loaded_code_object_t *loaded_code_object) override;
|
||||
|
||||
hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
const std::string &uri,
|
||||
hsa_loaded_code_object_t *loaded_code_object) override;
|
||||
|
||||
hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
size_t code_object_size,
|
||||
const char *options,
|
||||
const std::string &uri,
|
||||
hsa_loaded_code_object_t *loaded_code_object) override;
|
||||
|
||||
hsa_status_t Freeze(const char *options) override;
|
||||
|
||||
hsa_status_t Validate(uint32_t *result) override {
|
||||
amd::hsa::common::ReaderLockGuard<amd::hsa::common::ReaderWriterLock> reader_lock(rw_lock_);
|
||||
assert(result);
|
||||
*result = 0;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/// @note needed for hsa v1.0.
|
||||
/// @todo remove during loader refactoring.
|
||||
bool IsProgramSymbol(const char *symbol_name) override;
|
||||
|
||||
Symbol* GetSymbol(
|
||||
const char *symbol_name,
|
||||
const hsa_agent_t *agent) override;
|
||||
|
||||
hsa_status_t IterateSymbols(
|
||||
iterate_symbols_f callback, void *data) override;
|
||||
|
||||
/// @since hsa v1.1.
|
||||
hsa_status_t IterateAgentSymbols(
|
||||
hsa_agent_t agent,
|
||||
hsa_status_t (*callback)(hsa_executable_t exec,
|
||||
hsa_agent_t agent,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *data),
|
||||
void *data) override;
|
||||
|
||||
/// @since hsa v1.1.
|
||||
hsa_status_t IterateProgramSymbols(
|
||||
hsa_status_t (*callback)(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *data),
|
||||
void *data) override;
|
||||
|
||||
hsa_status_t IterateLoadedCodeObjects(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data) override;
|
||||
|
||||
size_t GetNumSegmentDescriptors() override;
|
||||
|
||||
size_t QuerySegmentDescriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t total_num_segment_descriptors,
|
||||
size_t first_empty_segment_descriptor) override;
|
||||
|
||||
uint64_t FindHostAddress(uint64_t device_address) override;
|
||||
|
||||
void EnableReadOnlyMode();
|
||||
void DisableReadOnlyMode();
|
||||
|
||||
void Print(std::ostream& out) override;
|
||||
bool PrintToFile(const std::string& filename) override;
|
||||
|
||||
Context* context() { return context_; }
|
||||
size_t id() { return id_; }
|
||||
|
||||
private:
|
||||
ExecutableImpl(const ExecutableImpl &e);
|
||||
ExecutableImpl& operator=(const ExecutableImpl &e);
|
||||
|
||||
std::unique_ptr<amd::hsa::code::AmdHsaCode> code;
|
||||
|
||||
Symbol* GetSymbolInternal(
|
||||
const char *symbol_name,
|
||||
const hsa_agent_t *agent);
|
||||
|
||||
hsa_status_t LoadSegments(hsa_agent_t agent, const code::AmdHsaCode *c,
|
||||
uint32_t majorVersion);
|
||||
hsa_status_t LoadSegmentsV1(hsa_agent_t agent, const code::AmdHsaCode *c);
|
||||
hsa_status_t LoadSegmentsV2(hsa_agent_t agent, const code::AmdHsaCode *c);
|
||||
hsa_status_t LoadSegmentV1(hsa_agent_t agent, const code::Segment *s);
|
||||
hsa_status_t LoadSegmentV2(const code::Segment *data_segment,
|
||||
loader::Segment *load_segment);
|
||||
|
||||
hsa_status_t LoadSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
|
||||
hsa_status_t LoadDefinitionSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
|
||||
hsa_status_t LoadDeclarationSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
|
||||
|
||||
hsa_status_t ApplyRelocations(hsa_agent_t agent, amd::hsa::code::AmdHsaCode *c);
|
||||
hsa_status_t ApplyStaticRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
|
||||
hsa_status_t ApplyStaticRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);
|
||||
hsa_status_t ApplyDynamicRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
|
||||
hsa_status_t ApplyDynamicRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);
|
||||
|
||||
Segment* VirtualAddressSegment(uint64_t vaddr);
|
||||
uint64_t SymbolAddress(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
|
||||
uint64_t SymbolAddress(hsa_agent_t agent, amd::elf::Symbol* sym);
|
||||
Segment* SymbolSegment(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
|
||||
Segment* SectionSegment(hsa_agent_t agent, amd::hsa::code::Section* sec);
|
||||
|
||||
amd::hsa::common::ReaderWriterLock rw_lock_;
|
||||
hsa_profile_t profile_;
|
||||
Context *context_;
|
||||
Logger logger_;
|
||||
const size_t id_;
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode_;
|
||||
hsa_executable_state_t state_;
|
||||
|
||||
ProgramSymbolMap program_symbols_;
|
||||
AgentSymbolMap agent_symbols_;
|
||||
std::vector<ExecutableObject*> objects;
|
||||
Segment *program_allocation_segment;
|
||||
std::vector<LoadedCodeObjectImpl*> loaded_code_objects;
|
||||
};
|
||||
|
||||
class AmdHsaCodeLoader : public Loader {
|
||||
private:
|
||||
Context* context;
|
||||
std::vector<Executable*> executables;
|
||||
amd::hsa::common::ReaderWriterLock rw_lock_;
|
||||
|
||||
public:
|
||||
AmdHsaCodeLoader(Context* context_)
|
||||
: context(context_) { assert(context); }
|
||||
|
||||
Context* GetContext() const override { return context; }
|
||||
|
||||
Executable* CreateExecutable(
|
||||
hsa_profile_t profile,
|
||||
const char *options,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override;
|
||||
|
||||
hsa_status_t FreezeExecutable(Executable *executable, const char *options) override;
|
||||
void DestroyExecutable(Executable *executable) override;
|
||||
|
||||
hsa_status_t IterateExecutables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data) override;
|
||||
|
||||
hsa_status_t QuerySegmentDescriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors) override;
|
||||
|
||||
hsa_executable_t FindExecutable(uint64_t device_address) override;
|
||||
|
||||
uint64_t FindHostAddress(uint64_t device_address) override;
|
||||
|
||||
void PrintHelp(std::ostream& out) override;
|
||||
|
||||
void EnableReadOnlyMode();
|
||||
void DisableReadOnlyMode();
|
||||
};
|
||||
|
||||
} // namespace loader
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
|
||||
@@ -0,0 +1,307 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include "loaders.hpp"
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
|
||||
// Helper function that allocates an aligned memory.
|
||||
static inline void*
|
||||
alignedMalloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return ::_aligned_malloc(size, alignment);
|
||||
#else
|
||||
void * ptr = NULL;
|
||||
alignment = (std::max)(alignment, sizeof(void*));
|
||||
if (0 == ::posix_memalign(&ptr, alignment, size)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Helper function that frees an aligned memory.
|
||||
static inline void
|
||||
alignedFree(void *ptr)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
::_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
OfflineLoaderContext::OfflineLoaderContext()
|
||||
: out(std::cout)
|
||||
{
|
||||
invalid.handle = 0;
|
||||
gfx700.handle = 700;
|
||||
gfx701.handle = 701;
|
||||
gfx800.handle = 800;
|
||||
gfx801.handle = 801;
|
||||
gfx802.handle = 802;
|
||||
gfx803.handle = 803;
|
||||
gfx804.handle = 804;
|
||||
gfx810.handle = 810;
|
||||
gfx900.handle = 900;
|
||||
gfx901.handle = 901;
|
||||
gfx902.handle = 902;
|
||||
gfx903.handle = 903;
|
||||
gfx904.handle = 904;
|
||||
gfx905.handle = 905;
|
||||
gfx906.handle = 906;
|
||||
gfx907.handle = 907;
|
||||
gfx1000.handle = 1000;
|
||||
gfx1001.handle = 1001;
|
||||
gfx1010.handle = 1010;
|
||||
gfx1011.handle = 1011;
|
||||
gfx1012.handle = 1012;
|
||||
gfx1030.handle = 1030;
|
||||
gfx4000.handle = 4000;
|
||||
#if defined(GFX11_BUILD)
|
||||
gfx1100.handle = 1100;
|
||||
gfx1101.handle = 1101;
|
||||
gfx1102.handle = 1102;
|
||||
gfx1103.handle = 1103;
|
||||
gfx1150.handle = 1150;
|
||||
gfx1151.handle = 1151;
|
||||
#endif // GFX11_BUILD
|
||||
}
|
||||
|
||||
hsa_isa_t OfflineLoaderContext::IsaFromName(const char *name)
|
||||
{
|
||||
std::string sname(name);
|
||||
if (sname == "AMD:AMDGPU:7:0:0") {
|
||||
return gfx700;
|
||||
} else if (sname == "AMD:AMDGPU:7:0:1") {
|
||||
return gfx701;
|
||||
} else if (sname == "AMD:AMDGPU:8:0:0") {
|
||||
return gfx800;
|
||||
} else if (sname == "AMD:AMDGPU:8:0:1") {
|
||||
return gfx801;
|
||||
} else if (sname == "AMD:AMDGPU:8:0:2") {
|
||||
return gfx802;
|
||||
} else if (sname == "AMD:AMDGPU:8:0:3") {
|
||||
return gfx803;
|
||||
} else if (sname == "AMD:AMDGPU:8:0:4") {
|
||||
return gfx804;
|
||||
} else if (sname == "AMD:AMDGPU:8:1:0") {
|
||||
return gfx810;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:0") {
|
||||
return gfx900;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:1") {
|
||||
return gfx901;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:2") {
|
||||
return gfx902;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:3") {
|
||||
return gfx903;
|
||||
}
|
||||
else if (sname == "AMD:AMDGPU:9:0:4") {
|
||||
return gfx904;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:5") {
|
||||
return gfx905;
|
||||
}
|
||||
else if (sname == "AMD:AMDGPU:9:0:6") {
|
||||
return gfx906;
|
||||
} else if (sname == "AMD:AMDGPU:9:0:7") {
|
||||
return gfx907;
|
||||
}
|
||||
else if (sname == "AMD:AMDGPU:10:0:0") {
|
||||
return gfx1000;
|
||||
} else if (sname == "AMD:AMDGPU:10:0:1") {
|
||||
return gfx1001;
|
||||
} else if (sname == "AMD:AMDGPU:10:1:0") {
|
||||
return gfx1010;
|
||||
} else if (sname == "AMD:AMDGPU:10:1:1") {
|
||||
return gfx1011;
|
||||
} else if (sname == "AMD:AMDGPU:10:1:2") {
|
||||
return gfx1012;
|
||||
}
|
||||
else if (sname == "AMD:AMDGPU:10:3:0") {
|
||||
return gfx1030;
|
||||
}
|
||||
else if (sname == "AMD:AMDGPU:40:0:0") {
|
||||
return gfx4000;
|
||||
}
|
||||
#if defined(GFX11_BUILD)
|
||||
else if (sname == "AMD:AMDGPU:11:0:0") {
|
||||
return gfx1100;
|
||||
} else if (sname == "AMD:AMDGPU:11:0:1") {
|
||||
return gfx1101;
|
||||
} else if (sname == "AMD:AMDGPU:11:0:2") {
|
||||
return gfx1102;
|
||||
} else if (sname == "AMD:AMDGPU:11:0:3") {
|
||||
return gfx1103;
|
||||
} else if (sname == "AMD:AMDGPU:11:5:0") {
|
||||
return gfx1150;
|
||||
} else if (sname == "AMD:AMDGPU:11:5:1") {
|
||||
return gfx1151;
|
||||
}
|
||||
#endif // GFX11_BUILD
|
||||
|
||||
assert(0);
|
||||
return invalid;
|
||||
}
|
||||
|
||||
bool OfflineLoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void* OfflineLoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero)
|
||||
{
|
||||
void* ptr = alignedMalloc(size, align);
|
||||
if (zero) { memset(ptr, 0, size); }
|
||||
out << "SegmentAlloc: " << segment << ": " << "size=" << size << " align=" << align << " zero=" << zero << " result=" << ptr << std::endl;
|
||||
pointers.insert(ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool OfflineLoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size)
|
||||
{
|
||||
out << "SegmentCopy: " << segment << ": " << "dst=" << dst << " offset=" << offset << " src=" << src << " size=" << size << std::endl;
|
||||
if (!dst || !src || dst == src) {
|
||||
return false;
|
||||
}
|
||||
if (0 == size) {
|
||||
return true;
|
||||
}
|
||||
memcpy((char *) dst + offset, src, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void OfflineLoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
|
||||
{
|
||||
out << "SegmentFree: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
|
||||
pointers.erase(seg);
|
||||
alignedFree(seg);
|
||||
}
|
||||
|
||||
void* OfflineLoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
|
||||
{
|
||||
out << "SegmentAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
|
||||
return (char*) seg + offset;
|
||||
}
|
||||
|
||||
void* OfflineLoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
|
||||
{
|
||||
out << "SegmentHostAddress: " << segment << ": " << " ptr=" << seg << " offset=" << offset << std::endl;
|
||||
return (char*) seg + offset;
|
||||
}
|
||||
|
||||
bool OfflineLoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
|
||||
{
|
||||
out << "SegmentFreeze: " << segment << ": " << " ptr=" << seg << " size=" << size << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OfflineLoaderContext::ImageExtensionSupported()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_status_t OfflineLoaderContext::ImageCreate(
|
||||
hsa_agent_t agent,
|
||||
hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_ext_image_t *image_handle)
|
||||
{
|
||||
void* ptr = alignedMalloc(256, 8);
|
||||
out << "ImageCreate" << ":" <<
|
||||
" permission=" << image_permission <<
|
||||
" geometry=" << image_descriptor->geometry <<
|
||||
" width=" << image_descriptor->width <<
|
||||
" height=" << image_descriptor->height <<
|
||||
" depth=" << image_descriptor->depth <<
|
||||
" array_size=" << image_descriptor->array_size <<
|
||||
" channel_type=" << image_descriptor->format.channel_type <<
|
||||
" channel_order=" << image_descriptor->format.channel_order<<
|
||||
" data=" << image_data <<
|
||||
std::endl;
|
||||
pointers.insert(ptr);
|
||||
image_handle->handle = reinterpret_cast<uint64_t>(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t OfflineLoaderContext::ImageDestroy(
|
||||
hsa_agent_t agent, hsa_ext_image_t image_handle)
|
||||
{
|
||||
void* ptr = reinterpret_cast<void*>(image_handle.handle);
|
||||
pointers.erase(ptr);
|
||||
alignedFree(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t OfflineLoaderContext::SamplerCreate(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle)
|
||||
{
|
||||
void* ptr = alignedMalloc(256, 8);
|
||||
out << "SamplerCreate" << ":" <<
|
||||
" coordinate_mode=" << sampler_descriptor->coordinate_mode <<
|
||||
" filter_mode=" << sampler_descriptor->filter_mode <<
|
||||
" address_mode=" << sampler_descriptor->address_mode <<
|
||||
std::endl;
|
||||
pointers.insert(ptr);
|
||||
sampler_handle->handle = reinterpret_cast<uint64_t>(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t OfflineLoaderContext::SamplerDestroy(
|
||||
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle)
|
||||
{
|
||||
void* ptr = reinterpret_cast<void*>(sampler_handle.handle);
|
||||
pointers.erase(ptr);
|
||||
alignedFree(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LOADERS_HPP_
|
||||
#define LOADERS_HPP_
|
||||
|
||||
#include "amd_hsa_loader.hpp"
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
|
||||
class OfflineLoaderContext : public amd::hsa::loader::Context {
|
||||
private:
|
||||
hsa_isa_t invalid;
|
||||
hsa_isa_t gfx700, gfx701, gfx800, gfx801, gfx802, gfx803, gfx804, gfx810;
|
||||
hsa_isa_t gfx900, gfx901, gfx902, gfx903;
|
||||
hsa_isa_t gfx904, gfx905;
|
||||
hsa_isa_t gfx906, gfx907;
|
||||
hsa_isa_t gfx1000, gfx1001, gfx1010, gfx1011, gfx1012;
|
||||
hsa_isa_t gfx1030;
|
||||
hsa_isa_t gfx4000;
|
||||
#if defined(GFX11_BUILD)
|
||||
hsa_isa_t gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151;
|
||||
#endif // GFX11_BUILD
|
||||
std::ostream& out;
|
||||
typedef std::set<void*> PointerSet;
|
||||
PointerSet pointers;
|
||||
|
||||
public:
|
||||
OfflineLoaderContext();
|
||||
|
||||
hsa_isa_t IsaFromName(const char *name) override;
|
||||
|
||||
bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) override;
|
||||
|
||||
void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override;
|
||||
|
||||
bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override;
|
||||
|
||||
void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override;
|
||||
|
||||
void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
|
||||
|
||||
void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;
|
||||
|
||||
bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override;
|
||||
|
||||
bool ImageExtensionSupported() override;
|
||||
|
||||
hsa_status_t ImageCreate(
|
||||
hsa_agent_t agent,
|
||||
hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_ext_image_t *image_handle) override;
|
||||
|
||||
hsa_status_t ImageDestroy(
|
||||
hsa_agent_t agent, hsa_ext_image_t image_handle) override;
|
||||
|
||||
hsa_status_t SamplerCreate(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle) override;
|
||||
|
||||
hsa_status_t SamplerDestroy(
|
||||
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // LOADERS_HPP_
|
||||
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
@@ -0,0 +1,266 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/******************************************************************************
|
||||
* University of Illinois / NCSA
|
||||
* Open Source License
|
||||
*
|
||||
* Copyright(c) 2011 - 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
* Advanced Micro Devices, Inc.
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files(the "Software"), to deal
|
||||
* with the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and /
|
||||
* or sell copies of the Software, and to permit persons to whom the Software
|
||||
* is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimers in the documentation
|
||||
* and / or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the names of Advanced Micro Devices, Inc, nor the
|
||||
mes of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* Software without specific prior written permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef AMD_ELF_IMAGE_HPP_
|
||||
#define AMD_ELF_IMAGE_HPP_
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace amd {
|
||||
namespace elf {
|
||||
class Symbol;
|
||||
class SymbolTable;
|
||||
class Section;
|
||||
class RelocationSection;
|
||||
|
||||
class Segment {
|
||||
public:
|
||||
virtual ~Segment() { }
|
||||
virtual uint64_t type() const = 0;
|
||||
virtual uint64_t memSize() const = 0;
|
||||
virtual uint64_t align() const = 0;
|
||||
virtual uint64_t imageSize() const = 0;
|
||||
virtual uint64_t vaddr() const = 0;
|
||||
virtual uint64_t flags() const = 0;
|
||||
virtual uint64_t offset() const = 0;
|
||||
virtual const char* data() const = 0;
|
||||
virtual uint16_t getSegmentIndex() = 0;
|
||||
virtual bool updateAddSection(Section *section) = 0;
|
||||
};
|
||||
|
||||
class Section {
|
||||
public:
|
||||
virtual ~Section() { }
|
||||
virtual uint16_t getSectionIndex() const = 0;
|
||||
virtual uint32_t type() const = 0;
|
||||
virtual std::string Name() const = 0;
|
||||
virtual uint64_t offset() const = 0;
|
||||
virtual uint64_t addr() const = 0;
|
||||
virtual bool updateAddr(uint64_t addr) = 0;
|
||||
virtual uint64_t addralign() const = 0;
|
||||
virtual uint64_t flags() const = 0;
|
||||
virtual uint64_t size() const = 0;
|
||||
virtual uint64_t nextDataOffset(uint64_t align) const = 0;
|
||||
virtual uint64_t addData(const void *src, uint64_t size, uint64_t align) = 0;
|
||||
virtual bool getData(uint64_t offset, void* dest, uint64_t size) = 0;
|
||||
virtual Segment* segment() = 0;
|
||||
virtual RelocationSection* asRelocationSection() = 0;
|
||||
virtual bool hasRelocationSection() const = 0;
|
||||
virtual RelocationSection* relocationSection(SymbolTable* symtab = 0) = 0;
|
||||
virtual bool setMemSize(uint64_t s) = 0;
|
||||
virtual uint64_t memSize() const = 0;
|
||||
virtual bool setAlign(uint64_t a) = 0;
|
||||
virtual uint64_t memAlign() const = 0;
|
||||
};
|
||||
|
||||
class Relocation {
|
||||
public:
|
||||
virtual ~Relocation() { }
|
||||
virtual RelocationSection* section() = 0;
|
||||
virtual uint32_t type() = 0;
|
||||
virtual uint32_t symbolIndex() = 0;
|
||||
virtual Symbol* symbol() = 0;
|
||||
virtual uint64_t offset() = 0;
|
||||
virtual int64_t addend() = 0;
|
||||
};
|
||||
|
||||
class RelocationSection : public virtual Section {
|
||||
public:
|
||||
virtual Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) = 0;
|
||||
virtual size_t relocationCount() const = 0;
|
||||
virtual Relocation* relocation(size_t i) = 0;
|
||||
virtual Section* targetSection() = 0;
|
||||
};
|
||||
|
||||
class StringTable : public virtual Section {
|
||||
public:
|
||||
virtual const char* addString(const std::string& s) = 0;
|
||||
virtual size_t addString1(const std::string& s) = 0;
|
||||
virtual const char* getString(size_t ndx) = 0;
|
||||
virtual size_t getStringIndex(const char* name) = 0;
|
||||
};
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
virtual ~Symbol() { }
|
||||
virtual uint32_t index() = 0;
|
||||
virtual uint32_t type() = 0;
|
||||
virtual uint32_t binding() = 0;
|
||||
virtual uint64_t size() = 0;
|
||||
virtual uint64_t value() = 0;
|
||||
virtual unsigned char other() = 0;
|
||||
virtual std::string name() = 0;
|
||||
virtual Section* section() = 0;
|
||||
virtual void setValue(uint64_t value) = 0;
|
||||
virtual void setSize(uint64_t size) = 0;
|
||||
};
|
||||
|
||||
class SymbolTable : public virtual Section {
|
||||
public:
|
||||
virtual Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) = 0;
|
||||
virtual size_t symbolCount() = 0;
|
||||
virtual Symbol* symbol(size_t i) = 0;
|
||||
};
|
||||
|
||||
class NoteSection : public virtual Section {
|
||||
public:
|
||||
virtual bool addNote(const std::string& name, uint32_t type, const void* desc = 0, uint32_t desc_size = 0) = 0;
|
||||
virtual bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) = 0;
|
||||
};
|
||||
|
||||
class Image {
|
||||
public:
|
||||
virtual ~Image() { }
|
||||
|
||||
virtual bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) = 0;
|
||||
virtual bool loadFromFile(const std::string& filename) = 0;
|
||||
virtual bool saveToFile(const std::string& filename) = 0;
|
||||
virtual bool initFromBuffer(const void* buffer, size_t size) = 0;
|
||||
virtual bool initAsBuffer(const void* buffer, size_t size) = 0;
|
||||
virtual bool writeTo(const std::string& filename) = 0;
|
||||
virtual bool copyToBuffer(void** buf, size_t* size = 0) = 0; // Copy to new buffer allocated with malloc
|
||||
virtual bool copyToBuffer(void* buf, size_t size) = 0; // Copy to existing buffer of given size.
|
||||
|
||||
virtual const char* data() = 0;
|
||||
virtual uint64_t size() = 0;
|
||||
|
||||
virtual uint16_t Machine() = 0;
|
||||
virtual uint16_t Type() = 0;
|
||||
virtual uint32_t EFlags() = 0;
|
||||
virtual uint32_t ABIVersion() = 0;
|
||||
virtual uint32_t EClass() = 0;
|
||||
virtual uint32_t OsAbi() = 0;
|
||||
|
||||
std::string output() { return out.str(); }
|
||||
|
||||
virtual bool Freeze() = 0;
|
||||
virtual bool Validate() = 0;
|
||||
|
||||
virtual StringTable* shstrtab() = 0;
|
||||
virtual StringTable* strtab() = 0;
|
||||
virtual SymbolTable* symtab() = 0;
|
||||
virtual SymbolTable* getSymtab(uint16_t index) = 0;
|
||||
|
||||
virtual StringTable* addStringTable(const std::string& name) = 0;
|
||||
virtual StringTable* getStringTable(uint16_t index) = 0;
|
||||
|
||||
virtual SymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) = 0;
|
||||
|
||||
virtual size_t segmentCount() = 0;
|
||||
virtual Segment* segment(size_t i) = 0;
|
||||
virtual Segment* segmentByVAddr(uint64_t vaddr) = 0;
|
||||
|
||||
virtual size_t sectionCount() = 0;
|
||||
virtual Section* section(size_t i) = 0;
|
||||
virtual Section* sectionByVAddr(uint64_t vaddr) = 0;
|
||||
|
||||
virtual NoteSection* note() = 0;
|
||||
virtual NoteSection* addNoteSection(const std::string& name) = 0;
|
||||
|
||||
virtual Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) = 0;
|
||||
virtual bool addSegments() = 0;
|
||||
|
||||
virtual Section* addSection(const std::string &name,
|
||||
uint32_t type,
|
||||
uint64_t flags = 0,
|
||||
uint64_t entsize = 0,
|
||||
Segment* segment = 0) = 0;
|
||||
|
||||
virtual RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) = 0;
|
||||
|
||||
protected:
|
||||
std::ostringstream out;
|
||||
};
|
||||
|
||||
Image* NewElf32Image();
|
||||
Image* NewElf64Image();
|
||||
|
||||
uint64_t ElfSize(const void* buffer);
|
||||
|
||||
std::string GetNoteString(uint32_t s_size, const char* s);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AMD_ELF_IMAGE_HPP_
|
||||
@@ -0,0 +1,432 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/******************************************************************************
|
||||
* University of Illinois / NCSA
|
||||
* Open Source License
|
||||
*
|
||||
* Copyright(c) 2011 - 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
* Advanced Micro Devices, Inc.
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files(the "Software"), to deal
|
||||
* with the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and /
|
||||
* or sell copies of the Software, and to permit persons to whom the Software
|
||||
* is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
*
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimers in the documentation
|
||||
* and / or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the names of Advanced Micro Devices, Inc, nor the
|
||||
mes of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* Software without specific prior written permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef AMD_HSA_CODE_HPP_
|
||||
#define AMD_HSA_CODE_HPP_
|
||||
|
||||
#include "amd_elf_image.hpp"
|
||||
#include "amd_hsa_elf.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace common {
|
||||
|
||||
template<uint64_t signature>
|
||||
class Signed {
|
||||
public:
|
||||
static const uint64_t CT_SIGNATURE;
|
||||
const uint64_t RT_SIGNATURE;
|
||||
|
||||
protected:
|
||||
Signed(): RT_SIGNATURE(signature) {}
|
||||
virtual ~Signed() {}
|
||||
};
|
||||
|
||||
template<uint64_t signature>
|
||||
const uint64_t Signed<signature>::CT_SIGNATURE = signature;
|
||||
|
||||
bool IsAccessibleMemoryAddress(uint64_t address);
|
||||
|
||||
template<typename class_type, typename member_type>
|
||||
size_t OffsetOf(member_type class_type::*member)
|
||||
{
|
||||
return (char*)&((class_type*)nullptr->*member) - (char*)nullptr;
|
||||
}
|
||||
|
||||
template<typename class_type>
|
||||
class_type* ObjectAt(uint64_t address)
|
||||
{
|
||||
if (!IsAccessibleMemoryAddress(address)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const uint64_t *rt_signature =
|
||||
(const uint64_t*)(address + OffsetOf(&class_type::RT_SIGNATURE));
|
||||
if (nullptr == rt_signature) {
|
||||
return nullptr;
|
||||
}
|
||||
if (class_type::CT_SIGNATURE != *rt_signature) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return (class_type*)address;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace code {
|
||||
|
||||
typedef amd::elf::Segment Segment;
|
||||
typedef amd::elf::Section Section;
|
||||
typedef amd::elf::RelocationSection RelocationSection;
|
||||
typedef amd::elf::Relocation Relocation;
|
||||
|
||||
class KernelSymbol;
|
||||
class VariableSymbol;
|
||||
|
||||
class Symbol {
|
||||
protected:
|
||||
amd::elf::Symbol* elfsym;
|
||||
|
||||
public:
|
||||
explicit Symbol(amd::elf::Symbol* elfsym_)
|
||||
: elfsym(elfsym_) { }
|
||||
virtual ~Symbol() { }
|
||||
virtual bool IsKernelSymbol() const { return false; }
|
||||
virtual KernelSymbol* AsKernelSymbol() { assert(false); return 0; }
|
||||
virtual bool IsVariableSymbol() const { return false; }
|
||||
virtual VariableSymbol* AsVariableSymbol() { assert(false); return 0; }
|
||||
amd::elf::Symbol* elfSym() { return elfsym; }
|
||||
std::string Name() const { return elfsym ? elfsym->name() : ""; }
|
||||
Section* GetSection() { return elfsym->section(); }
|
||||
virtual uint64_t SectionOffset() const { return elfsym->value(); }
|
||||
virtual uint64_t VAddr() const { return elfsym->section()->addr() + elfsym->value(); }
|
||||
uint32_t Index() const { return elfsym ? elfsym->index() : 0; }
|
||||
bool IsDeclaration() const;
|
||||
bool IsDefinition() const;
|
||||
virtual bool IsAgent() const;
|
||||
virtual hsa_symbol_kind_t Kind() const = 0;
|
||||
hsa_symbol_linkage_t Linkage() const;
|
||||
hsa_variable_allocation_t Allocation() const;
|
||||
hsa_variable_segment_t Segment() const;
|
||||
uint64_t Size() const;
|
||||
uint32_t Size32() const;
|
||||
uint32_t Alignment() const;
|
||||
bool IsConst() const;
|
||||
virtual hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value);
|
||||
static hsa_code_symbol_t ToHandle(Symbol* sym);
|
||||
static Symbol* FromHandle(hsa_code_symbol_t handle);
|
||||
void setValue(uint64_t value) { elfsym->setValue(value); }
|
||||
void setSize(uint32_t size) { elfsym->setSize(size); }
|
||||
|
||||
std::string GetModuleName() const;
|
||||
std::string GetSymbolName() const;
|
||||
};
|
||||
|
||||
class KernelSymbol : public Symbol {
|
||||
private:
|
||||
uint32_t kernarg_segment_size, kernarg_segment_alignment;
|
||||
uint32_t group_segment_size, private_segment_size;
|
||||
bool is_dynamic_callstack;
|
||||
|
||||
public:
|
||||
explicit KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
|
||||
bool IsKernelSymbol() const override { return true; }
|
||||
KernelSymbol* AsKernelSymbol() override { return this; }
|
||||
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_KERNEL; }
|
||||
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
|
||||
};
|
||||
|
||||
class VariableSymbol : public Symbol {
|
||||
public:
|
||||
explicit VariableSymbol(amd::elf::Symbol* elfsym_)
|
||||
: Symbol(elfsym_) { }
|
||||
bool IsVariableSymbol() const override { return true; }
|
||||
VariableSymbol* AsVariableSymbol() override { return this; }
|
||||
hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_VARIABLE; }
|
||||
hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
|
||||
};
|
||||
|
||||
class AmdHsaCode {
|
||||
private:
|
||||
std::ostringstream out;
|
||||
std::unique_ptr<amd::elf::Image> img;
|
||||
std::vector<Segment*> dataSegments;
|
||||
std::vector<Section*> dataSections;
|
||||
std::vector<RelocationSection*> relocationSections;
|
||||
std::vector<Symbol*> symbols;
|
||||
bool combineDataSegments;
|
||||
Segment* hsaSegments[AMDGPU_HSA_SEGMENT_LAST][2];
|
||||
Section* hsaSections[AMDGPU_HSA_SECTION_LAST];
|
||||
|
||||
amd::elf::Section* hsatext;
|
||||
amd::elf::Section* imageInit;
|
||||
amd::elf::Section* samplerInit;
|
||||
amd::elf::Section* debugInfo;
|
||||
amd::elf::Section* debugLine;
|
||||
amd::elf::Section* debugAbbrev;
|
||||
|
||||
bool PullElf();
|
||||
bool PullElfV1();
|
||||
bool PullElfV2();
|
||||
|
||||
void AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size);
|
||||
template <typename S>
|
||||
bool GetAmdNote(uint32_t type, S** desc)
|
||||
{
|
||||
uint32_t desc_size;
|
||||
if (!img->note()->getNote("AMD", type, (void**) desc, &desc_size)) {
|
||||
out << "Failed to find note, type: " << type << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (desc_size < sizeof(S)) {
|
||||
out << "Note size mismatch, type: " << type << " size: " << desc_size << " expected at least " << sizeof(S) << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void PrintSegment(std::ostream& out, Segment* segment);
|
||||
void PrintSection(std::ostream& out, Section* section);
|
||||
void PrintRawData(std::ostream& out, Section* section);
|
||||
void PrintRawData(std::ostream& out, const unsigned char *data, size_t size);
|
||||
void PrintRelocationData(std::ostream& out, RelocationSection* section);
|
||||
void PrintSymbol(std::ostream& out, Symbol* sym);
|
||||
void PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset = 0);
|
||||
std::string MangleSymbolName(const std::string& module_name, const std::string symbol_name);
|
||||
bool ElfImageError();
|
||||
|
||||
public:
|
||||
bool HasHsaText() const { return hsatext != 0; }
|
||||
amd::elf::Section* HsaText() { assert(hsatext); return hsatext; }
|
||||
const amd::elf::Section* HsaText() const { assert(hsatext); return hsatext; }
|
||||
amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); }
|
||||
uint16_t Machine() const { return img->Machine(); }
|
||||
uint32_t EFlags() const { return img->EFlags(); }
|
||||
uint32_t EClass() const { return img->EClass(); }
|
||||
uint32_t OsAbi() const { return img->OsAbi(); }
|
||||
|
||||
AmdHsaCode(bool combineDataSegments = true);
|
||||
virtual ~AmdHsaCode();
|
||||
|
||||
std::string output() { return out.str(); }
|
||||
bool LoadFromFile(const std::string& filename);
|
||||
bool SaveToFile(const std::string& filename);
|
||||
bool WriteToBuffer(void* buffer);
|
||||
bool InitFromBuffer(const void* buffer, size_t size);
|
||||
bool InitAsBuffer(const void* buffer, size_t size);
|
||||
bool InitAsHandle(hsa_code_object_t code_handle);
|
||||
bool InitNew(bool xnack = false);
|
||||
bool Freeze();
|
||||
hsa_code_object_t GetHandle();
|
||||
const char* ElfData();
|
||||
uint64_t ElfSize();
|
||||
bool Validate();
|
||||
void Print(std::ostream& out);
|
||||
void PrintNotes(std::ostream& out);
|
||||
void PrintSegments(std::ostream& out);
|
||||
void PrintSections(std::ostream& out);
|
||||
void PrintSymbols(std::ostream& out);
|
||||
void PrintMachineCode(std::ostream& out);
|
||||
void PrintMachineCode(std::ostream& out, KernelSymbol* sym);
|
||||
bool PrintToFile(const std::string& filename);
|
||||
|
||||
void AddNoteCodeObjectVersion(uint32_t major, uint32_t minor);
|
||||
bool GetNoteCodeObjectVersion(std::string& version);
|
||||
void AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode);
|
||||
bool GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round);
|
||||
void AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping);
|
||||
bool GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping);
|
||||
void AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer);
|
||||
bool GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name);
|
||||
void AddNoteProducerOptions(const std::string& options);
|
||||
void AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options);
|
||||
bool GetNoteProducerOptions(std::string& options);
|
||||
|
||||
bool GetIsa(std::string& isaName, unsigned *genericVersion = nullptr);
|
||||
bool GetCodeObjectVersion(uint32_t* major, uint32_t* minor);
|
||||
hsa_status_t GetInfo(hsa_code_object_info_t attribute, void *value);
|
||||
hsa_status_t GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *sym);
|
||||
hsa_status_t IterateSymbols(hsa_code_object_t code_object,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_code_object_t code_object,
|
||||
hsa_code_symbol_t symbol,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
void AddHsaTextData(const void* buffer, size_t size);
|
||||
uint64_t NextKernelCodeOffset() const;
|
||||
bool AddKernelCode(KernelSymbol* sym, const void* code, size_t size);
|
||||
|
||||
Symbol* AddKernelDefinition(const std::string& name, const void* isa, size_t isa_size);
|
||||
|
||||
size_t DataSegmentCount() const { return dataSegments.size(); }
|
||||
Segment* DataSegment(size_t i) const { return dataSegments[i]; }
|
||||
|
||||
size_t DataSectionCount() { return dataSections.size(); }
|
||||
Section* DataSection(size_t i) { return dataSections[i]; }
|
||||
|
||||
Section* AddEmptySection();
|
||||
Section* AddCodeSection(Segment* segment);
|
||||
Section* AddDataSection(const std::string &name,
|
||||
uint32_t type,
|
||||
uint64_t flags,
|
||||
Segment* segment);
|
||||
|
||||
bool HasImageInitSection() const { return imageInit != 0; }
|
||||
Section* ImageInitSection();
|
||||
void AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& init);
|
||||
void AddImageInitializer(Symbol* image, uint64_t destOffset,
|
||||
amdgpu_hsa_metadata_kind16_t kind,
|
||||
amdgpu_hsa_image_geometry8_t geometry,
|
||||
amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
|
||||
uint64_t width, uint64_t height, uint64_t depth, uint64_t array);
|
||||
|
||||
|
||||
bool HasSamplerInitSection() const { return samplerInit != 0; }
|
||||
amd::elf::Section* SamplerInitSection();
|
||||
amd::elf::Section* AddSamplerInit();
|
||||
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& init);
|
||||
void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
|
||||
amdgpu_hsa_sampler_coord8_t coord,
|
||||
amdgpu_hsa_sampler_filter8_t filter,
|
||||
amdgpu_hsa_sampler_addressing8_t addressing);
|
||||
|
||||
void AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend);
|
||||
|
||||
void InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
|
||||
bool AddHsaSegments();
|
||||
Segment* HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
|
||||
|
||||
void InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
|
||||
Section* HsaDataSection(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
|
||||
|
||||
Symbol* AddExecutableSymbol(const std::string &name,
|
||||
unsigned char type,
|
||||
unsigned char binding,
|
||||
unsigned char other,
|
||||
Section *section = 0);
|
||||
|
||||
Symbol* AddVariableSymbol(const std::string &name,
|
||||
unsigned char type,
|
||||
unsigned char binding,
|
||||
unsigned char other,
|
||||
Section *section,
|
||||
uint64_t value,
|
||||
uint64_t size);
|
||||
void AddSectionSymbols();
|
||||
|
||||
size_t RelocationSectionCount() { return relocationSections.size(); }
|
||||
RelocationSection* GetRelocationSection(size_t i) { return relocationSections[i]; }
|
||||
|
||||
size_t SymbolCount() { return symbols.size(); }
|
||||
Symbol* GetSymbol(size_t i) { return symbols[i]; }
|
||||
Symbol* GetSymbolByElfIndex(size_t index);
|
||||
Symbol* FindSymbol(const std::string &n);
|
||||
|
||||
void AddData(amdgpu_hsa_elf_section_t section, const void* data = 0, size_t size = 0);
|
||||
|
||||
Section* DebugInfo();
|
||||
Section* DebugLine();
|
||||
Section* DebugAbbrev();
|
||||
|
||||
Section* AddHsaHlDebug(const std::string& name, const void* data, size_t size);
|
||||
};
|
||||
|
||||
class AmdHsaCodeManager {
|
||||
private:
|
||||
typedef std::unordered_map<uint64_t, AmdHsaCode*> CodeMap;
|
||||
CodeMap codeMap;
|
||||
|
||||
public:
|
||||
AmdHsaCode* FromHandle(hsa_code_object_t handle);
|
||||
bool Destroy(hsa_code_object_t handle);
|
||||
};
|
||||
|
||||
class KernelSymbolV2 : public KernelSymbol {
|
||||
private:
|
||||
public:
|
||||
explicit KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
|
||||
bool IsAgent() const override { return true; }
|
||||
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
|
||||
uint64_t VAddr() const override { return elfsym->value(); }
|
||||
};
|
||||
|
||||
class VariableSymbolV2 : public VariableSymbol {
|
||||
private:
|
||||
public:
|
||||
explicit VariableSymbolV2(amd::elf::Symbol* elfsym_) : VariableSymbol(elfsym_) { }
|
||||
bool IsAgent() const override { return false; }
|
||||
uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
|
||||
uint64_t VAddr() const override { return elfsym->value(); }
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AMD_HSA_CODE_HPP_
|
||||
@@ -0,0 +1,91 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// The following set of header files provides definitions for AMD GPU
|
||||
// Architecture:
|
||||
// - amd_hsa_common.h
|
||||
// - amd_hsa_elf.h
|
||||
// - amd_hsa_kernel_code.h
|
||||
// - amd_hsa_queue.h
|
||||
// - amd_hsa_signal.h
|
||||
//
|
||||
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
|
||||
// information.
|
||||
|
||||
#ifndef AMD_HSA_COMMON_H
|
||||
#define AMD_HSA_COMMON_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Descriptive version of the HSA Application Binary Interface.
|
||||
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
|
||||
|
||||
// Alignment attribute that specifies a minimum alignment (in bytes) for
|
||||
// variables of the specified type.
|
||||
#if defined(__GNUC__)
|
||||
# define __ALIGNED__(x) __attribute__((aligned(x)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define __ALIGNED__(x) __declspec(align(x))
|
||||
#elif defined(RC_INVOKED)
|
||||
# define __ALIGNED__(x)
|
||||
#else
|
||||
# error
|
||||
#endif
|
||||
|
||||
// Creates enumeration entries for packed types. Enumeration entries include
|
||||
// bit shift amount, bit width, and bit mask.
|
||||
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
|
||||
name##_SHIFT = (shift), \
|
||||
name##_WIDTH = (width), \
|
||||
name = (((1 << (width)) - 1) << (shift)) \
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT) \
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask##_SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask##_SHIFT) & mask) \
|
||||
|
||||
#endif // AMD_HSA_COMMON_H
|
||||
@@ -0,0 +1,476 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Undefine the macro in case it is defined in the system elf.h.
|
||||
#undef EM_AMDGPU
|
||||
|
||||
#ifndef AMD_HSA_ELF_H
|
||||
#define AMD_HSA_ELF_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// AMD GPU Specific ELF Header Enumeration Values.
|
||||
//
|
||||
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
|
||||
// code object V1 defintions which are not part of the LLVM header. Code object
|
||||
// V1 was only supported by the Finalizer which is now deprecated and removed.
|
||||
//
|
||||
// TODO: Deprecate and remove V1 support and replace this header with using the
|
||||
// LLVM header.
|
||||
namespace ELF {
|
||||
|
||||
// Machine architectures
|
||||
// See current registered ELF machine architectures at:
|
||||
// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
|
||||
enum {
|
||||
EM_AMDGPU = 224, // AMD GPU architecture
|
||||
};
|
||||
|
||||
// OS ABI identification.
|
||||
enum {
|
||||
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
|
||||
};
|
||||
|
||||
// AMDGPU OS ABI Version identification.
|
||||
enum {
|
||||
// ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
|
||||
// was never defined for V1.
|
||||
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
|
||||
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
|
||||
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
|
||||
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
|
||||
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
|
||||
};
|
||||
|
||||
// AMDGPU specific e_flags.
|
||||
enum : unsigned {
|
||||
// Processor selection mask for EF_AMDGPU_MACH_* values.
|
||||
EF_AMDGPU_MACH = 0x0ff,
|
||||
// Not specified processor.
|
||||
EF_AMDGPU_MACH_NONE = 0x000,
|
||||
// R600-based processors.
|
||||
// Radeon HD 2000/3000 Series (R600).
|
||||
EF_AMDGPU_MACH_R600_R600 = 0x001,
|
||||
EF_AMDGPU_MACH_R600_R630 = 0x002,
|
||||
EF_AMDGPU_MACH_R600_RS880 = 0x003,
|
||||
EF_AMDGPU_MACH_R600_RV670 = 0x004,
|
||||
// Radeon HD 4000 Series (R700).
|
||||
EF_AMDGPU_MACH_R600_RV710 = 0x005,
|
||||
EF_AMDGPU_MACH_R600_RV730 = 0x006,
|
||||
EF_AMDGPU_MACH_R600_RV770 = 0x007,
|
||||
// Radeon HD 5000 Series (Evergreen).
|
||||
EF_AMDGPU_MACH_R600_CEDAR = 0x008,
|
||||
EF_AMDGPU_MACH_R600_CYPRESS = 0x009,
|
||||
EF_AMDGPU_MACH_R600_JUNIPER = 0x00a,
|
||||
EF_AMDGPU_MACH_R600_REDWOOD = 0x00b,
|
||||
EF_AMDGPU_MACH_R600_SUMO = 0x00c,
|
||||
// Radeon HD 6000 Series (Northern Islands).
|
||||
EF_AMDGPU_MACH_R600_BARTS = 0x00d,
|
||||
EF_AMDGPU_MACH_R600_CAICOS = 0x00e,
|
||||
EF_AMDGPU_MACH_R600_CAYMAN = 0x00f,
|
||||
EF_AMDGPU_MACH_R600_TURKS = 0x010,
|
||||
// Reserved for R600-based processors.
|
||||
EF_AMDGPU_MACH_R600_RESERVED_FIRST = 0x011,
|
||||
EF_AMDGPU_MACH_R600_RESERVED_LAST = 0x01f,
|
||||
// First/last R600-based processors.
|
||||
EF_AMDGPU_MACH_R600_FIRST = EF_AMDGPU_MACH_R600_R600,
|
||||
EF_AMDGPU_MACH_R600_LAST = EF_AMDGPU_MACH_R600_TURKS,
|
||||
// AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1000 = 0x0f1,
|
||||
#if defined(GFX40_BUILD)
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX4000 = 0x0f8,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX4010 = 0x0f9,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX4020 = 0x0fe,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX4030 = 0x0f6,
|
||||
#endif // GFX40_BUILD
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
|
||||
#if defined(GFX11_BUILD)
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
|
||||
#endif // GFX11_BUILD
|
||||
#if defined(GFX12_BUILD)
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
||||
#endif // GFX12_BUILD
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
|
||||
// Reserved for AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_LAST = 0x0ff,
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_LAST,
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
|
||||
// Indicates if the trap handler is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
|
||||
// Indicates if the "sramecc" target feature is enabled for all code
|
||||
// contained in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
|
||||
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
|
||||
// ELFABIVERSION_AMDGPU_HSA_V5.
|
||||
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
|
||||
// XNACK is not supported.
|
||||
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
|
||||
// XNACK is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
|
||||
// XNACK is off.
|
||||
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
|
||||
// XNACK is on.
|
||||
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
|
||||
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
|
||||
// ELFABIVERSION_AMDGPU_HSA_V5.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
|
||||
// SRAMECC is not supported.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
|
||||
// SRAMECC is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
|
||||
// SRAMECC is off.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
|
||||
// SRAMECC is on.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
|
||||
|
||||
// Generic target versioning. This is contained in the list byte of EFLAGS.
|
||||
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
|
||||
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
|
||||
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
|
||||
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
|
||||
};
|
||||
|
||||
} // end namespace ELF
|
||||
|
||||
// ELF Section Header Flag Enumeration Values.
|
||||
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
|
||||
|
||||
//
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
|
||||
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
|
||||
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
|
||||
AMDGPU_HSA_SEGMENT_LAST,
|
||||
} amdgpu_hsa_elf_segment_t;
|
||||
|
||||
// ELF Program Header Type Enumeration Values.
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
|
||||
|
||||
// ELF Symbol Type Enumeration Values.
|
||||
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
|
||||
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
|
||||
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
|
||||
|
||||
// ELF Symbol Binding Enumeration Values.
|
||||
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
|
||||
|
||||
// ELF Symbol Other Information Creation/Retrieval.
|
||||
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
|
||||
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
|
||||
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
|
||||
} amdgpu_hsa_symbol_allocation_t;
|
||||
|
||||
// ELF Symbol Allocation Enumeration Values.
|
||||
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
|
||||
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
|
||||
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
|
||||
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_LAST,
|
||||
} amdgpu_hsa_symbol_flag_t;
|
||||
|
||||
// ELF Symbol Flag Enumeration Values.
|
||||
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
|
||||
|
||||
// AMD GPU Relocation Type Enumeration Values.
|
||||
#define R_AMDGPU_NONE 0
|
||||
#define R_AMDGPU_32_LOW 1
|
||||
#define R_AMDGPU_32_HIGH 2
|
||||
#define R_AMDGPU_64 3
|
||||
#define R_AMDGPU_INIT_SAMPLER 4
|
||||
#define R_AMDGPU_INIT_IMAGE 5
|
||||
#define R_AMDGPU_RELATIVE64 13
|
||||
|
||||
// AMD GPU Note Type Enumeration Values.
|
||||
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
|
||||
#define NT_AMD_HSA_HSAIL 2
|
||||
#define NT_AMD_HSA_ISA_VERSION 3
|
||||
#define NT_AMD_HSA_PRODUCER 4
|
||||
#define NT_AMD_HSA_PRODUCER_OPTIONS 5
|
||||
#define NT_AMD_HSA_EXTENSION 6
|
||||
#define NT_AMD_HSA_ISA_NAME 11
|
||||
#define NT_AMD_HSA_HLDEBUG_DEBUG 101
|
||||
#define NT_AMD_HSA_HLDEBUG_TARGET 102
|
||||
|
||||
// AMD GPU Metadata Kind Enumeration Values.
|
||||
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_METADATA_KIND_NONE = 0,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
|
||||
} amdgpu_hsa_metadata_kind_t;
|
||||
|
||||
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
|
||||
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
|
||||
} amdgpu_hsa_sampler_coord_t;
|
||||
|
||||
// AMD GPU Sampler Filter Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
|
||||
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
|
||||
} amdgpu_hsa_sampler_filter_t;
|
||||
|
||||
// AMD GPU Sampler Addressing Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
|
||||
} amdgpu_hsa_sampler_addressing_t;
|
||||
|
||||
// AMD GPU Sampler Descriptor.
|
||||
typedef struct amdgpu_hsa_sampler_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_sampler_coord8_t coord;
|
||||
amdgpu_hsa_sampler_filter8_t filter;
|
||||
amdgpu_hsa_sampler_addressing8_t addressing;
|
||||
uint8_t reserved1;
|
||||
} amdgpu_hsa_sampler_descriptor_t;
|
||||
|
||||
// AMD GPU Image Geometry Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_geometry8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
|
||||
} amdgpu_hsa_image_geometry_t;
|
||||
|
||||
// AMD GPU Image Channel Order Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
|
||||
} amdgpu_hsa_image_channel_order_t;
|
||||
|
||||
// AMD GPU Image Channel Type Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
|
||||
} amdgpu_hsa_image_channel_type_t;
|
||||
|
||||
// AMD GPU Image Descriptor.
|
||||
typedef struct amdgpu_hsa_image_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_image_geometry8_t geometry;
|
||||
amdgpu_hsa_image_channel_order8_t channel_order;
|
||||
amdgpu_hsa_image_channel_type8_t channel_type;
|
||||
uint8_t reserved1;
|
||||
uint64_t width;
|
||||
uint64_t height;
|
||||
uint64_t depth;
|
||||
uint64_t array;
|
||||
} amdgpu_hsa_image_descriptor_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_code_object_version_s {
|
||||
uint32_t major_version;
|
||||
uint32_t minor_version;
|
||||
} amdgpu_hsa_note_code_object_version_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_hsail_s {
|
||||
uint32_t hsail_major_version;
|
||||
uint32_t hsail_minor_version;
|
||||
uint8_t profile;
|
||||
uint8_t machine_model;
|
||||
uint8_t default_float_round;
|
||||
} amdgpu_hsa_note_hsail_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_isa_s {
|
||||
uint16_t vendor_name_size;
|
||||
uint16_t architecture_name_size;
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t stepping;
|
||||
char vendor_and_architecture_name[1];
|
||||
} amdgpu_hsa_note_isa_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_s {
|
||||
uint16_t producer_name_size;
|
||||
uint16_t reserved;
|
||||
uint32_t producer_major_version;
|
||||
uint32_t producer_minor_version;
|
||||
char producer_name[1];
|
||||
} amdgpu_hsa_note_producer_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_options_s {
|
||||
uint16_t producer_options_size;
|
||||
char producer_options[1];
|
||||
} amdgpu_hsa_note_producer_options_t;
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_RODATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_DATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_DATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_BSS_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_BSS_READONLY_AGENT,
|
||||
AMDGPU_HSA_SECTION_LAST,
|
||||
} amdgpu_hsa_elf_section_t;
|
||||
|
||||
#endif // AMD_HSA_ELF_H
|
||||
@@ -0,0 +1,274 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_KERNEL_CODE_H
|
||||
#define AMD_HSA_KERNEL_CODE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Kernel Code Version Enumeration Values.
|
||||
typedef uint32_t amd_kernel_code_version32_t;
|
||||
enum amd_kernel_code_version_t {
|
||||
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
|
||||
AMD_KERNEL_CODE_VERSION_MINOR = 1
|
||||
};
|
||||
|
||||
// AMD Machine Kind Enumeration Values.
|
||||
typedef uint16_t amd_machine_kind16_t;
|
||||
enum amd_machine_kind_t {
|
||||
AMD_MACHINE_KIND_UNDEFINED = 0,
|
||||
AMD_MACHINE_KIND_AMDGPU = 1
|
||||
};
|
||||
|
||||
// AMD Machine Version.
|
||||
typedef uint16_t amd_machine_version16_t;
|
||||
|
||||
// AMD Float Round Mode Enumeration Values.
|
||||
enum amd_float_round_mode_t {
|
||||
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
|
||||
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
|
||||
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
|
||||
AMD_FLOAT_ROUND_MODE_ZERO = 3
|
||||
};
|
||||
|
||||
// AMD Float Denorm Mode Enumeration Values.
|
||||
enum amd_float_denorm_mode_t {
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
|
||||
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register One.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
|
||||
enum amd_compute_pgm_rsrc_one_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FP16_OVFL, 26, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED0, 27, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_WGP_MODE, 29, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_MEM_ORDERED, 30, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FWD_PROGRESS, 31, 1),
|
||||
};
|
||||
|
||||
// AMD System VGPR Workitem ID Enumeration Values.
|
||||
enum amd_system_vgpr_workitem_id_t {
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register Two.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
|
||||
enum amd_compute_pgm_rsrc_two_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
|
||||
};
|
||||
|
||||
// AMD Element Byte Size Enumeration Values.
|
||||
enum amd_element_byte_size_t {
|
||||
AMD_ELEMENT_BYTE_SIZE_2 = 0,
|
||||
AMD_ELEMENT_BYTE_SIZE_4 = 1,
|
||||
AMD_ELEMENT_BYTE_SIZE_8 = 2,
|
||||
AMD_ELEMENT_BYTE_SIZE_16 = 3
|
||||
};
|
||||
|
||||
// AMD Kernel Code Properties.
|
||||
typedef uint32_t amd_kernel_code_properties32_t;
|
||||
enum amd_kernel_code_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
|
||||
};
|
||||
|
||||
// AMD Power Of Two Enumeration Values.
|
||||
typedef uint8_t amd_powertwo8_t;
|
||||
enum amd_powertwo_t {
|
||||
AMD_POWERTWO_1 = 0,
|
||||
AMD_POWERTWO_2 = 1,
|
||||
AMD_POWERTWO_4 = 2,
|
||||
AMD_POWERTWO_8 = 3,
|
||||
AMD_POWERTWO_16 = 4,
|
||||
AMD_POWERTWO_32 = 5,
|
||||
AMD_POWERTWO_64 = 6,
|
||||
AMD_POWERTWO_128 = 7,
|
||||
AMD_POWERTWO_256 = 8
|
||||
};
|
||||
|
||||
// AMD Enabled Control Directive Enumeration Values.
|
||||
typedef uint64_t amd_enabled_control_directive64_t;
|
||||
enum amd_enabled_control_directive_t {
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
|
||||
};
|
||||
|
||||
// AMD Exception Kind Enumeration Values.
|
||||
typedef uint16_t amd_exception_kind16_t;
|
||||
enum amd_exception_kind_t {
|
||||
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
|
||||
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
|
||||
AMD_EXCEPTION_KIND_OVERFLOW = 4,
|
||||
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
|
||||
AMD_EXCEPTION_KIND_INEXACT = 16
|
||||
};
|
||||
|
||||
// AMD Control Directives.
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
|
||||
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
|
||||
amd_enabled_control_directive64_t enabled_control_directives;
|
||||
uint16_t enable_break_exceptions;
|
||||
uint16_t enable_detect_exceptions;
|
||||
uint32_t max_dynamic_group_size;
|
||||
uint64_t max_flat_grid_size;
|
||||
uint32_t max_flat_workgroup_size;
|
||||
uint8_t required_dim;
|
||||
uint8_t reserved1[3];
|
||||
uint64_t required_grid_size[3];
|
||||
uint32_t required_workgroup_size[3];
|
||||
uint8_t reserved2[60];
|
||||
} amd_control_directives_t;
|
||||
|
||||
// AMD Kernel Code.
|
||||
#define AMD_ISA_ALIGN_BYTES 256
|
||||
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
|
||||
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
|
||||
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_major;
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_minor;
|
||||
amd_machine_kind16_t amd_machine_kind;
|
||||
amd_machine_version16_t amd_machine_version_major;
|
||||
amd_machine_version16_t amd_machine_version_minor;
|
||||
amd_machine_version16_t amd_machine_version_stepping;
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
|
||||
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
|
||||
amd_kernel_code_properties32_t kernel_code_properties;
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
uint32_t gds_segment_byte_size;
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
uint16_t wavefront_sgpr_count;
|
||||
uint16_t workitem_vgpr_count;
|
||||
uint16_t reserved_vgpr_first;
|
||||
uint16_t reserved_vgpr_count;
|
||||
uint16_t reserved_sgpr_first;
|
||||
uint16_t reserved_sgpr_count;
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
amd_powertwo8_t kernarg_segment_alignment;
|
||||
amd_powertwo8_t group_segment_alignment;
|
||||
amd_powertwo8_t private_segment_alignment;
|
||||
amd_powertwo8_t wavefront_size;
|
||||
int32_t call_convention;
|
||||
uint8_t reserved1[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
amd_control_directives_t control_directives;
|
||||
} amd_kernel_code_t;
|
||||
|
||||
// TODO: this struct should be completely gone once debugger designs/implements
|
||||
// Debugger APIs.
|
||||
typedef struct amd_runtime_loader_debug_info_s {
|
||||
const void* elf_raw;
|
||||
size_t elf_size;
|
||||
const char *kernel_name;
|
||||
const void *owning_segment;
|
||||
} amd_runtime_loader_debug_info_t;
|
||||
|
||||
#endif // AMD_HSA_KERNEL_CODE_H
|
||||
@@ -0,0 +1,520 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_LOADER_HPP
|
||||
#define AMD_HSA_LOADER_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ven_amd_loader.h"
|
||||
#include "amd_hsa_elf.h"
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <io.h>
|
||||
#define __read__ _read
|
||||
#define __lseek__ _lseek
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define __read__ read
|
||||
#define __lseek__ lseek
|
||||
#endif // _WIN32 || _WIN64
|
||||
|
||||
/// @brief Major version of the AMD HSA Loader. Major versions are not backwards
|
||||
/// compatible.
|
||||
#define AMD_HSA_LOADER_VERSION_MAJOR 0
|
||||
|
||||
/// @brief Minor version of the AMD HSA Loader. Minor versions are backwards
|
||||
/// compatible.
|
||||
#define AMD_HSA_LOADER_VERSION_MINOR 5
|
||||
|
||||
/// @brief Descriptive version of the AMD HSA Loader.
|
||||
#define AMD_HSA_LOADER_VERSION "AMD HSA Loader v0.05 (June 16, 2015)"
|
||||
|
||||
enum hsa_ext_symbol_info_t {
|
||||
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE = 100,
|
||||
HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN = 101,
|
||||
};
|
||||
|
||||
typedef uint32_t hsa_symbol_info32_t;
|
||||
typedef hsa_executable_symbol_t hsa_symbol_t;
|
||||
typedef hsa_executable_symbol_info_t hsa_symbol_info_t;
|
||||
|
||||
/// @brief Loaded code object attributes.
|
||||
enum amd_loaded_code_object_info_t {
|
||||
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE = 0,
|
||||
AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE = 1
|
||||
};
|
||||
|
||||
/// @brief Loaded segment handle.
|
||||
typedef struct amd_loaded_segment_s {
|
||||
uint64_t handle;
|
||||
} amd_loaded_segment_t;
|
||||
|
||||
/// @brief Loaded segment attributes.
|
||||
enum amd_loaded_segment_info_t {
|
||||
AMD_LOADED_SEGMENT_INFO_TYPE = 0,
|
||||
AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS = 1,
|
||||
AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS = 2,
|
||||
AMD_LOADED_SEGMENT_INFO_SIZE = 3
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
|
||||
/// @class CodeObjectReaderImpl.
|
||||
/// @brief Code Object Reader Wrapper.
|
||||
struct CodeObjectReaderImpl final {
|
||||
public:
|
||||
/// @returns Handle equivalent of @p object.
|
||||
static hsa_code_object_reader_t Handle(
|
||||
const CodeObjectReaderImpl *object) {
|
||||
hsa_code_object_reader_t handle = {reinterpret_cast<uint64_t>(object)};
|
||||
return handle;
|
||||
}
|
||||
|
||||
/// @returns Object equivalent of @p handle.
|
||||
static CodeObjectReaderImpl *Object(
|
||||
const hsa_code_object_reader_t &handle) {
|
||||
CodeObjectReaderImpl *object =
|
||||
reinterpret_cast<CodeObjectReaderImpl*>(handle.handle);
|
||||
return object;
|
||||
}
|
||||
|
||||
/// @brief Default constructor.
|
||||
CodeObjectReaderImpl() {}
|
||||
|
||||
/// @brief Default destructor.
|
||||
~CodeObjectReaderImpl();
|
||||
|
||||
hsa_status_t SetFile(
|
||||
hsa_file_t _code_object_file_descriptor,
|
||||
size_t _code_object_offset = 0,
|
||||
size_t _code_object_size = 0);
|
||||
|
||||
hsa_status_t SetMemory(
|
||||
const void *_code_object_memory,
|
||||
size_t _code_object_size);
|
||||
|
||||
const void *GetCodeObjectMemory() const { return code_object_memory; };
|
||||
|
||||
std::string GetUri() const { return uri; };
|
||||
|
||||
private:
|
||||
const void *code_object_memory{nullptr};
|
||||
size_t code_object_size{0};
|
||||
std::string uri{};
|
||||
bool is_mmap{false};
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Context. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Context {
|
||||
public:
|
||||
virtual ~Context() {}
|
||||
|
||||
virtual hsa_isa_t IsaFromName(const char *name) = 0;
|
||||
|
||||
// This function will be deleted in a future patch. Use the overload
|
||||
// that takes a generic version instead.
|
||||
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) = 0;
|
||||
|
||||
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa, unsigned genericVersion) { return IsaSupportedByAgent(agent, isa); }
|
||||
|
||||
virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0;
|
||||
|
||||
virtual bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) = 0;
|
||||
|
||||
virtual void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
|
||||
|
||||
virtual void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
|
||||
|
||||
virtual void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;
|
||||
|
||||
virtual bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;
|
||||
|
||||
virtual bool ImageExtensionSupported() = 0;
|
||||
|
||||
virtual hsa_status_t ImageCreate(
|
||||
hsa_agent_t agent,
|
||||
hsa_access_permission_t image_permission,
|
||||
const hsa_ext_image_descriptor_t *image_descriptor,
|
||||
const void *image_data,
|
||||
hsa_ext_image_t *image_handle) = 0;
|
||||
|
||||
virtual hsa_status_t ImageDestroy(
|
||||
hsa_agent_t agent, hsa_ext_image_t image_handle) = 0;
|
||||
|
||||
virtual hsa_status_t SamplerCreate(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
||||
hsa_ext_sampler_t *sampler_handle) = 0;
|
||||
|
||||
virtual hsa_status_t SamplerDestroy(
|
||||
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) = 0;
|
||||
|
||||
protected:
|
||||
Context() {}
|
||||
|
||||
private:
|
||||
Context(const Context &c);
|
||||
Context& operator=(const Context &c);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Symbol. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
static hsa_symbol_t Handle(Symbol *symbol) {
|
||||
hsa_symbol_t symbol_handle =
|
||||
{reinterpret_cast<uint64_t>(symbol)};
|
||||
return symbol_handle;
|
||||
}
|
||||
|
||||
static Symbol* Object(hsa_symbol_t symbol_handle) {
|
||||
Symbol *symbol =
|
||||
reinterpret_cast<Symbol*>(symbol_handle.handle);
|
||||
return symbol;
|
||||
}
|
||||
|
||||
virtual ~Symbol() {}
|
||||
|
||||
virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void *value) = 0;
|
||||
|
||||
virtual hsa_agent_t GetAgent() = 0;
|
||||
|
||||
protected:
|
||||
Symbol() {}
|
||||
|
||||
private:
|
||||
Symbol(const Symbol &s);
|
||||
Symbol& operator=(const Symbol &s);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoadedCodeObject. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class LoadedCodeObject {
|
||||
public:
|
||||
static hsa_loaded_code_object_t Handle(LoadedCodeObject *object) {
|
||||
hsa_loaded_code_object_t handle =
|
||||
{reinterpret_cast<uint64_t>(object)};
|
||||
return handle;
|
||||
}
|
||||
|
||||
static LoadedCodeObject* Object(hsa_loaded_code_object_t handle) {
|
||||
LoadedCodeObject *object =
|
||||
reinterpret_cast<LoadedCodeObject*>(handle.handle);
|
||||
return object;
|
||||
}
|
||||
|
||||
virtual ~LoadedCodeObject() {}
|
||||
|
||||
virtual bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) = 0;
|
||||
|
||||
virtual hsa_status_t IterateLoadedSegments(
|
||||
hsa_status_t (*callback)(
|
||||
amd_loaded_segment_t loaded_segment,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
virtual hsa_agent_t getAgent() const = 0;
|
||||
virtual hsa_executable_t getExecutable() const = 0;
|
||||
virtual uint64_t getElfData() const = 0;
|
||||
virtual uint64_t getElfSize() const = 0;
|
||||
virtual uint64_t getStorageOffset() const = 0;
|
||||
virtual uint64_t getLoadBase() const = 0;
|
||||
virtual uint64_t getLoadSize() const = 0;
|
||||
virtual int64_t getDelta() const = 0;
|
||||
virtual std::string getUri() const = 0;
|
||||
|
||||
protected:
|
||||
LoadedCodeObject() {}
|
||||
|
||||
private:
|
||||
LoadedCodeObject(const LoadedCodeObject&);
|
||||
LoadedCodeObject& operator=(const LoadedCodeObject&);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LoadedSegment. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class LoadedSegment {
|
||||
public:
|
||||
static amd_loaded_segment_t Handle(LoadedSegment *object) {
|
||||
amd_loaded_segment_t handle =
|
||||
{reinterpret_cast<uint64_t>(object)};
|
||||
return handle;
|
||||
}
|
||||
|
||||
static LoadedSegment* Object(amd_loaded_segment_t handle) {
|
||||
LoadedSegment *object =
|
||||
reinterpret_cast<LoadedSegment*>(handle.handle);
|
||||
return object;
|
||||
}
|
||||
|
||||
virtual ~LoadedSegment() {}
|
||||
|
||||
virtual bool GetInfo(amd_loaded_segment_info_t attribute, void *value) = 0;
|
||||
|
||||
protected:
|
||||
LoadedSegment() {}
|
||||
|
||||
private:
|
||||
LoadedSegment(const LoadedSegment&);
|
||||
LoadedSegment& operator=(const LoadedSegment&);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Executable. //
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Executable {
|
||||
public:
|
||||
static hsa_executable_t Handle(Executable *executable) {
|
||||
hsa_executable_t executable_handle =
|
||||
{reinterpret_cast<uint64_t>(executable)};
|
||||
return executable_handle;
|
||||
}
|
||||
|
||||
static Executable* Object(hsa_executable_t executable_handle) {
|
||||
Executable *executable =
|
||||
reinterpret_cast<Executable*>(executable_handle.handle);
|
||||
return executable;
|
||||
}
|
||||
|
||||
virtual ~Executable() {}
|
||||
|
||||
virtual hsa_status_t GetInfo(
|
||||
hsa_executable_info_t executable_info, void *value) = 0;
|
||||
|
||||
virtual hsa_status_t DefineProgramExternalVariable(
|
||||
const char *name, void *address) = 0;
|
||||
|
||||
virtual hsa_status_t DefineAgentExternalVariable(
|
||||
const char *name,
|
||||
hsa_agent_t agent,
|
||||
hsa_variable_segment_t segment,
|
||||
void *address) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
size_t code_object_size,
|
||||
const char *options,
|
||||
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
const char *options,
|
||||
const std::string &uri,
|
||||
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t LoadCodeObject(
|
||||
hsa_agent_t agent,
|
||||
hsa_code_object_t code_object,
|
||||
size_t code_object_size,
|
||||
const char *options,
|
||||
const std::string &uri,
|
||||
hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;
|
||||
|
||||
virtual hsa_status_t Freeze(const char *options) = 0;
|
||||
|
||||
virtual hsa_status_t Validate(uint32_t *result) = 0;
|
||||
|
||||
/// @note needed for hsa v1.0.
|
||||
/// @todo remove during loader refactoring.
|
||||
virtual bool IsProgramSymbol(const char *symbol_name) = 0;
|
||||
|
||||
virtual Symbol* GetSymbol(
|
||||
const char *symbol_name,
|
||||
const hsa_agent_t *agent) = 0;
|
||||
|
||||
typedef hsa_status_t (*iterate_symbols_f)(
|
||||
hsa_executable_t executable,
|
||||
hsa_symbol_t symbol_handle,
|
||||
void *data);
|
||||
|
||||
virtual hsa_status_t IterateSymbols(
|
||||
iterate_symbols_f callback, void *data) = 0;
|
||||
|
||||
/// @since hsa v1.1.
|
||||
virtual hsa_status_t IterateAgentSymbols(
|
||||
hsa_agent_t agent,
|
||||
hsa_status_t (*callback)(hsa_executable_t exec,
|
||||
hsa_agent_t agent,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
/// @since hsa v1.1.
|
||||
virtual hsa_status_t IterateProgramSymbols(
|
||||
hsa_status_t (*callback)(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
virtual hsa_status_t IterateLoadedCodeObjects(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
virtual size_t GetNumSegmentDescriptors() = 0;
|
||||
|
||||
virtual size_t QuerySegmentDescriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t total_num_segment_descriptors,
|
||||
size_t first_empty_segment_descriptor) = 0;
|
||||
|
||||
virtual uint64_t FindHostAddress(uint64_t device_address) = 0;
|
||||
|
||||
virtual void Print(std::ostream& out) = 0;
|
||||
virtual bool PrintToFile(const std::string& filename) = 0;
|
||||
|
||||
protected:
|
||||
Executable() {}
|
||||
|
||||
private:
|
||||
Executable(const Executable &e);
|
||||
Executable& operator=(const Executable &e);
|
||||
|
||||
static std::vector<Executable*> executables;
|
||||
static std::mutex executables_mutex;
|
||||
};
|
||||
|
||||
/// @class Loader
|
||||
class Loader {
|
||||
public:
|
||||
/// @brief Destructor.
|
||||
virtual ~Loader() {}
|
||||
|
||||
/// @brief Creates AMD HSA Loader with specified @p context.
|
||||
///
|
||||
/// @param[in] context Context. Must not be null.
|
||||
///
|
||||
/// @returns AMD HSA Loader on success, null on failure.
|
||||
static Loader* Create(Context* context);
|
||||
|
||||
/// @brief Destroys AMD HSA Loader @p Loader_object.
|
||||
///
|
||||
/// @param[in] loader AMD HSA Loader to destroy. Must not be null.
|
||||
static void Destroy(Loader *loader);
|
||||
|
||||
/// @returns Context associated with Loader.
|
||||
virtual Context* GetContext() const = 0;
|
||||
|
||||
/// @brief Creates empty AMD HSA Executable with specified @p profile,
|
||||
/// @p options
|
||||
virtual Executable* CreateExecutable(
|
||||
hsa_profile_t profile,
|
||||
const char *options,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0;
|
||||
|
||||
|
||||
/// @brief Freezes @p executable
|
||||
virtual hsa_status_t FreezeExecutable(Executable *executable, const char *options) = 0;
|
||||
|
||||
/// @brief Destroys @p executable
|
||||
virtual void DestroyExecutable(Executable *executable) = 0;
|
||||
|
||||
/// @brief Invokes @p callback for each created executable
|
||||
virtual hsa_status_t IterateExecutables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data) = 0;
|
||||
|
||||
/// @brief same as hsa_ven_amd_loader_query_segment_descriptors.
|
||||
virtual hsa_status_t QuerySegmentDescriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors) = 0;
|
||||
|
||||
/// @brief Finds the handle of executable to which @p device_address
|
||||
/// belongs. Return NULL handle if device address is invalid.
|
||||
virtual hsa_executable_t FindExecutable(uint64_t device_address) = 0;
|
||||
|
||||
/// @brief Returns host address given @p device_address. If @p device_address
|
||||
/// is already host address, returns null pointer. If @p device_address is
|
||||
/// invalid address, returns null pointer.
|
||||
virtual uint64_t FindHostAddress(uint64_t device_address) = 0;
|
||||
|
||||
/// @brief Print loader help.
|
||||
virtual void PrintHelp(std::ostream& out) = 0;
|
||||
|
||||
protected:
|
||||
/// @brief Default constructor.
|
||||
Loader() {}
|
||||
|
||||
private:
|
||||
/// @brief Copy constructor - not available.
|
||||
Loader(const Loader&);
|
||||
|
||||
/// @brief Assignment operator - not available.
|
||||
Loader& operator=(const Loader&);
|
||||
};
|
||||
|
||||
|
||||
} // namespace loader
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
|
||||
#endif // AMD_HSA_LOADER_HPP
|
||||
@@ -0,0 +1,347 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (c) 2011 - 2015 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
/// @file amd_hsa_program.hpp
|
||||
/// @author AMD HSA Finalizer Team
|
||||
///
|
||||
/// @brief Public AMD HSA Program Interfaces.
|
||||
#ifndef AMD_HSA_PROGRAM_HPP
|
||||
#define AMD_HSA_PROGRAM_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "amd_hsa_code.hpp"
|
||||
#include "Brig.h"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
|
||||
/// @brief Descriptive version of AMD HSA Program.
|
||||
#define AMD_HSA_PROGRAM_VERSION "AMD HSA Program v1.04 (August 3, 2015)"
|
||||
#define AMD_HSA_PROGRAM_VERSION_MAJOR 1
|
||||
#define AMD_HSA_PROGRAM_VERSION_MINOR 4
|
||||
|
||||
/// @brief Environment variable. If set, overrides options parameter from
|
||||
/// Program::Create with contents of this environment variable.
|
||||
#define ENVVAR_AMD_HSA_PROGRAM_CREATE_OPTIONS "AMD_HSA_PROGRAM_CREATE_OPTIONS"
|
||||
|
||||
/// @brief Environment variable. If set, concatenates options parameter from
|
||||
/// Program::Create with contents of this environment variable.
|
||||
#define ENVVAR_AMD_HSA_PROGRAM_CREATE_OPTIONS_APPEND "AMD_HSA_PROGRAM_CREATE_OPTIONS_APPEND"
|
||||
|
||||
/// @brief Environment variable. If set, overrides options parameter from
|
||||
/// Program::Finalize with contents of this environment variable.
|
||||
#define ENVVAR_AMD_HSA_PROGRAM_FINALIZE_OPTIONS "AMD_HSA_PROGRAM_FINALIZE_OPTIONS"
|
||||
|
||||
/// @brief Environment variable. If set, concatenates options parameter from
|
||||
/// Program::Finalize with contents of this environment variable.
|
||||
#define ENVVAR_AMD_HSA_PROGRAM_FINALIZE_OPTIONS_APPEND "AMD_HSA_PROGRAM_FINALIZE_OPTIONS_APPEND"
|
||||
|
||||
/// @brief AMD HSA Program attributes (in addition to hsa_ext_program_info_t,
|
||||
/// which is defined in HSA Runtime Specification), enumeration values below
|
||||
/// must be negative.
|
||||
typedef int32_t amd_hsa_program_info32_t;
|
||||
enum amd_hsa_program_info_t {
|
||||
/// @brief Major version of BRIG specified when AMD HSA Program was created.
|
||||
/// The type of this attribute is BrigVersion32_t.
|
||||
AMD_HSA_PROGRAM_INFO_BRIG_VERSION_MAJOR = -1,
|
||||
/// @brief Minor version of BRIG specified when AMD HSA Program was created.
|
||||
/// The type of this attribute is BrigVersion32_t.
|
||||
AMD_HSA_PROGRAM_INFO_BRIG_VERSION_MINOR = -2,
|
||||
/// @brief Indicates whether or not AMD HSA Program was created with debugging
|
||||
/// enabled. The type of this attribute is bool.
|
||||
AMD_HSA_PROGRAM_INFO_IS_DEBUGGING_ENABLED = -3
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace program {
|
||||
|
||||
/// @class Context
|
||||
class Context {
|
||||
public:
|
||||
/// @brief Default destructor.
|
||||
virtual ~Context() {}
|
||||
|
||||
/// @brief Invoked when AMD HSA Program needs to allocate @p size bytes of
|
||||
/// code object memory whose alignment is specified by @p align.
|
||||
///
|
||||
/// @param[in] size Requested allocation size in bytes.
|
||||
/// @param[in] align Requested alignment.
|
||||
///
|
||||
/// @returns Pointer to allocated code object memory on success, null pointer
|
||||
/// on failure.
|
||||
virtual void* CodeObjectAlloc(size_t size, size_t align) = 0;
|
||||
|
||||
/// @brief Invoked when AMD HSA Program needs to copy @p size bytes from
|
||||
/// memory pointed to by @p src to code object memory pointed to by @p dst.
|
||||
///
|
||||
/// @param[in] dst Pointer to code object memory to copy to.
|
||||
/// @param[in] src Pointer to memory to copy from.
|
||||
/// @param[in] size Requested copy size in bytes.
|
||||
///
|
||||
/// @returns True on success, false on failure.
|
||||
virtual bool CodeObjectCopy(void *dst, const void *src, size_t size) = 0;
|
||||
|
||||
/// @brief Invoked when AMD HSA Program needs to deallocate @p size bytes of
|
||||
/// code object memory pointed to by @p ptr.
|
||||
///
|
||||
/// @param[in] ptr Pointer to code object memory to deallocate.
|
||||
/// @param[in] size Requested deallocation size in bytes.
|
||||
virtual void CodeObjectFree(void *ptr, size_t size) = 0;
|
||||
|
||||
/// @brief Invoked when AMD HSA Finalizer and Program needs to reprot message or error
|
||||
///
|
||||
/// @param[in] str Message to report.
|
||||
virtual void ReportMessage(const std::string& str) = 0;
|
||||
|
||||
protected:
|
||||
/// @brief Default constructor.
|
||||
Context() {}
|
||||
|
||||
private:
|
||||
/// @brief Copy constructor - not available.
|
||||
Context(const Context&);
|
||||
|
||||
/// @brief Assignment operator - not available.
|
||||
Context& operator=(const Context&);
|
||||
};
|
||||
|
||||
class Finalizer;
|
||||
|
||||
/// @class Program
|
||||
class Program: public amd::hsa::common::Signed<0x71BB0A093D69DA92> {
|
||||
public:
|
||||
/// @brief Constant BRIG Module iterator.
|
||||
typedef std::vector<hsa_ext_module_t>::const_iterator const_module_iterator;
|
||||
|
||||
/// @brief BRIG Module iterator.
|
||||
typedef std::vector<hsa_ext_module_t>::iterator module_iterator;
|
||||
|
||||
/// @brief Invalid HSA Program Handle.
|
||||
static const uint64_t INVALID_HANDLE = 0;
|
||||
|
||||
/// @brief Destructor.
|
||||
virtual ~Program() {}
|
||||
|
||||
/// @brief Converts AMD HSA Program @p program_object to HSA Program Handle.
|
||||
///
|
||||
/// @param[in] program_object AMD HSA Program to convert. Can be null.
|
||||
///
|
||||
/// @returns HSA Program Handle on success, invalid handle on failure.
|
||||
static hsa_ext_program_t Handle(Program *program_object);
|
||||
|
||||
/// @brief Converts HSA Program Handle @p program_handle to AMD HSA Program.
|
||||
///
|
||||
/// @param[in] program_handle HSA Program Handle to convert. Can be invalid.
|
||||
///
|
||||
/// @returns AMD HSA Program on success, null on failure.
|
||||
static Program* Object(hsa_ext_program_t program_handle);
|
||||
|
||||
/// @returns Constant iterator to first BRIG Module in AMD HSA Program. If
|
||||
/// AMD HSA Program does not contain any BRIG Modules, returned constant
|
||||
/// iterator will be equal to Program::module_end().
|
||||
virtual const_module_iterator module_begin() const = 0;
|
||||
|
||||
/// @returns Constant iterator to entity following last BRIG Module in AMD
|
||||
/// HSA Program.
|
||||
virtual const_module_iterator module_end() const = 0;
|
||||
|
||||
/// @returns Iterator to first BRIG Module in AMD HSA Program. If
|
||||
/// AMD HSA Program does not contain any BRIG Modules, returned
|
||||
/// iterator will be equal to Program::module_end().
|
||||
virtual module_iterator module_begin() = 0;
|
||||
|
||||
/// @returns Iterator to entity following last BRIG Module in AMD
|
||||
/// HSA Program.
|
||||
virtual module_iterator module_end() = 0;
|
||||
|
||||
/// @returns Context associated with AMD HSA Program.
|
||||
virtual Context* GetContext() const = 0;
|
||||
|
||||
/// @returns Finalizer associated with this AMD HSA Program.
|
||||
virtual Finalizer* GetFinalizer() const = 0;
|
||||
|
||||
/// @brief Retrieves current value of specified AMD HSA Program's
|
||||
/// @p attribute.
|
||||
///
|
||||
/// @param[in] attribute AMD HSA Program's attribute to retrieve. Can be
|
||||
/// invalid.
|
||||
/// @param[out] value Pointer to client-allocated memory to store attribute's
|
||||
/// value in. Must not be null. If client-allocated memory is not large enough
|
||||
/// to hold attribute's value, behaviour is undefined.
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
|
||||
/// @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Specified @p attribute is
|
||||
/// invalid AMD HSA Program's attribute.
|
||||
///
|
||||
/// @note If function failed to execute successfully, details of failure
|
||||
/// can be retrieved using Program::GetLog.
|
||||
virtual hsa_status_t GetInfo(amd_hsa_program_info32_t attribute, void *value) const = 0;
|
||||
|
||||
/// @brief Adds specified BRIG Module @p module to AMD HSA Program.
|
||||
///
|
||||
/// @details AMD HSA Program does not perform deep copy of BRIG Module
|
||||
/// upon addition, it stores pointer to BRIG Module. BRIG Module is owned by
|
||||
/// the client, which has to ensure that the lifetime of BRIG Module is
|
||||
/// greater than the lifetime of AMD HSA Program.
|
||||
///
|
||||
/// @param[in] module BRIG Module to add. Must not be null.
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
|
||||
/// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES Function failed to allocate
|
||||
/// resources.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_INVALID_MODULE Specified @p module is invalid
|
||||
/// BRIG module.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED Specified @p module
|
||||
/// is already included in AMD HSA Program.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE Specified @p module is
|
||||
/// incompatible with AMD HSA Program.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol in specified @p module
|
||||
/// is incompatible with symbol in AMD HSA Program.
|
||||
///
|
||||
/// @note If function failed to execute successfully, details of failure
|
||||
/// can be retrieved using Program::GetLog.
|
||||
virtual hsa_status_t AddModule(hsa_ext_module_t module) = 0;
|
||||
|
||||
/// @brief Finalizes AMD HSA Program with specified @p target,
|
||||
/// @p call_convention, @p options, @p control_directives, and
|
||||
/// @p code_object_type.
|
||||
///
|
||||
/// @details Finalizes all kernels and indirect functions that belong to
|
||||
/// AMD HSA Program for specified @p target, @p call_convention,
|
||||
/// @p options, @p control_directives, and @p code_object_type. Transitive
|
||||
/// closure of all functions specified by call or scall must be defined.
|
||||
/// All kernels and indirect functions that belong to AMD HSA Program must
|
||||
/// be defined. Kernels and indirect functions that are referenced in kernels
|
||||
/// and indirect functions that belong to AMD HSA Program may or may not be
|
||||
/// defined, but must be declared. All global and readonly variables that
|
||||
/// belong to AMD HSA Program, or referenced in kernels and indirect functions
|
||||
/// that belong to AMD HSA Program may or may not be defined, but must be
|
||||
/// declared.
|
||||
///
|
||||
/// @param[in] target Target to finalize for. Must not be null.
|
||||
/// @param[in] call_convention Call convention to finalize for. Must be valid.
|
||||
/// @param[in] options Options to finalize for. Can be null.
|
||||
/// @param[in] control_directives Control directives to finalize for. Can be
|
||||
/// invalid.
|
||||
/// @param[in] code_object_type Code object type to create. Must be valid.
|
||||
/// @param[out] code_object Code object generated by AMD HSA Program. Must
|
||||
/// not be null.
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS Function executed successfully.
|
||||
/// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES Function failed to allocate
|
||||
/// resources.
|
||||
/// @retval HSA_STATUS_ERROR_INVALID_ISA Specified @p target is invalid.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH Specified
|
||||
/// @p control_directives does not match control directives in
|
||||
/// one of kernels or indirect functions that belong to AMD HSA Program.
|
||||
/// @retval HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED AMD HSA Program failed
|
||||
/// to finalize.
|
||||
///
|
||||
/// @note If function failed to execute successfully, details of failure
|
||||
/// can be retrieved using Program::GetLog.
|
||||
///
|
||||
/// @deprecated @p control_directives will be included in @p options starting
|
||||
/// AMD HSA Program v2.0.
|
||||
virtual hsa_status_t Finalize(
|
||||
const char *target,
|
||||
int32_t call_convention,
|
||||
const char *options,
|
||||
hsa_ext_control_directives_t control_directives,
|
||||
hsa_code_object_type_t code_object_type,
|
||||
hsa_code_object_t *code_object) = 0;
|
||||
|
||||
protected:
|
||||
/// @brief Default constructor.
|
||||
Program() {}
|
||||
|
||||
private:
|
||||
/// @brief Copy constructor - not available.
|
||||
Program(const Program&);
|
||||
|
||||
/// @brief Assignment operator - not available.
|
||||
Program& operator=(const Program&);
|
||||
};
|
||||
|
||||
|
||||
/// @class Finalizer
|
||||
class Finalizer {
|
||||
public:
|
||||
/// @brief Destructor.
|
||||
virtual ~Finalizer() {}
|
||||
|
||||
/// @brief Creates AMD HSA Finalizer with specified @p context.
|
||||
///
|
||||
/// @param[in] context Context. Must not be null.
|
||||
///
|
||||
/// @returns AMD HSA Finalizer on success, null on failure.
|
||||
static Finalizer* CreateFinalizer(Context* context);
|
||||
|
||||
/// @brief Destroys AMD HSA Finalizer @p finalizer_object.
|
||||
///
|
||||
/// @param[in] finalizer_object AMD HSA Finalizer to destroy. Must not be null.
|
||||
static void DestroyFinalizer(Finalizer *finalizer_object);
|
||||
|
||||
/// @brief Creates empty AMD HSA Program with specified @p profile,
|
||||
/// @p machine_model, @p rounding_mode, @p options, @p context, @p major and
|
||||
/// @p minor BRIG versions.
|
||||
///
|
||||
/// @param[in] profile HSA profile. Must be valid.
|
||||
/// @param[in] machine_model HSA machine model. Must be valid.
|
||||
/// @param[in] rounding_mode HSA rounding mode. Must be valid.
|
||||
/// @param[in] options User options. Can be null.
|
||||
/// @param[in] brig_major Major BRIG version. Must be valid.
|
||||
/// @param[in] brig_minor Minor BRIG version. Must be valid.
|
||||
///
|
||||
/// @returns AMD HSA Program on success, null on failure.
|
||||
virtual Program* CreateProgram(
|
||||
hsa_profile_t profile,
|
||||
hsa_machine_model_t machine_model,
|
||||
hsa_default_float_rounding_mode_t rounding_mode,
|
||||
const char *options,
|
||||
BrigVersion32_t brig_major = BRIG_VERSION_BRIG_MAJOR,
|
||||
BrigVersion32_t brig_minor = BRIG_VERSION_BRIG_MINOR) = 0;
|
||||
|
||||
/// @brief Destroys AMD HSA Program @p program_object.
|
||||
///
|
||||
/// @param[in] program_object AMD HSA Program to destroy. Must not be null.
|
||||
virtual void DestroyProgram(Program *program_object) = 0;
|
||||
|
||||
/// @brief Prints available finalizer options as error and exits.
|
||||
virtual void PrintFinalizerOptions() const = 0;
|
||||
|
||||
/// @returns Context associated with Finalizer.
|
||||
virtual Context* GetContext() const = 0;
|
||||
|
||||
/// @brief Enables code cache optimization.
|
||||
virtual void EnableCodeCache() = 0;
|
||||
|
||||
/// @brief Disables code cache optimization.
|
||||
virtual void DisableCodeCache() = 0;
|
||||
|
||||
/// @returns True if code cache is enabled, false otherwise.
|
||||
virtual bool IsCodeCacheEnabled() const = 0;
|
||||
|
||||
/// @returns List of names for supported targets.
|
||||
virtual const std::vector<std::string>& GetSupportedTargets() const = 0;
|
||||
|
||||
protected:
|
||||
/// @brief Default constructor.
|
||||
Finalizer() {}
|
||||
|
||||
private:
|
||||
/// @brief Copy constructor - not available.
|
||||
Finalizer(const Finalizer&);
|
||||
|
||||
/// @brief Assignment operator - not available.
|
||||
Finalizer& operator=(const Finalizer&);
|
||||
};
|
||||
|
||||
} // namespace program
|
||||
} // namespace hsa
|
||||
} // namespace amd
|
||||
|
||||
#endif // AMD_HSA_PROGRAM_HPP
|
||||
@@ -0,0 +1,87 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_QUEUE_H
|
||||
#define AMD_HSA_QUEUE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Queue Properties.
|
||||
typedef uint32_t amd_queue_properties32_t;
|
||||
enum amd_queue_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
|
||||
};
|
||||
|
||||
// AMD Queue.
|
||||
#define AMD_QUEUE_ALIGN_BYTES 64
|
||||
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t reserved1[4];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint64_t scratch_backing_memory_byte_size;
|
||||
uint32_t scratch_workitem_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
uint32_t reserved3[2];
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
uint32_t reserved4[14];
|
||||
} amd_queue_t;
|
||||
|
||||
#endif // AMD_HSA_QUEUE_H
|
||||
@@ -0,0 +1,80 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_SIGNAL_H
|
||||
#define AMD_HSA_SIGNAL_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "amd_hsa_queue.h"
|
||||
|
||||
// AMD Signal Kind Enumeration Values.
|
||||
typedef int64_t amd_signal_kind64_t;
|
||||
enum amd_signal_kind_t {
|
||||
AMD_SIGNAL_KIND_INVALID = 0,
|
||||
AMD_SIGNAL_KIND_USER = 1,
|
||||
AMD_SIGNAL_KIND_DOORBELL = -1,
|
||||
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
|
||||
};
|
||||
|
||||
// AMD Signal.
|
||||
#define AMD_SIGNAL_ALIGN_BYTES 64
|
||||
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
|
||||
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
|
||||
amd_signal_kind64_t kind;
|
||||
union {
|
||||
volatile int64_t value;
|
||||
volatile uint32_t* legacy_hardware_doorbell_ptr;
|
||||
volatile uint64_t* hardware_doorbell_ptr;
|
||||
};
|
||||
uint64_t event_mailbox_ptr;
|
||||
uint32_t event_id;
|
||||
uint32_t reserved1;
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
union {
|
||||
amd_queue_t* queue_ptr;
|
||||
uint64_t reserved2;
|
||||
};
|
||||
uint32_t reserved3[2];
|
||||
} amd_signal_t;
|
||||
|
||||
#endif // AMD_HSA_SIGNAL_H
|
||||
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
@@ -0,0 +1,531 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#undef HSA_API
|
||||
#ifdef HSA_EXPORT_FINALIZER
|
||||
#define HSA_API HSA_API_EXPORT
|
||||
#else
|
||||
#define HSA_API HSA_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct BrigModuleHeader;
|
||||
typedef struct BrigModuleHeader* BrigModule_t;
|
||||
|
||||
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* The HSAIL program is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
|
||||
/**
|
||||
* The HSAIL module is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
|
||||
/**
|
||||
* Machine model or profile of the HSAIL module do not match the machine model
|
||||
* or profile of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
|
||||
/**
|
||||
* The HSAIL module is already a part of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
|
||||
/**
|
||||
* Compatibility mismatch between symbol declaration and symbol definition.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
|
||||
/**
|
||||
* The finalization encountered an error while finalizing a kernel or
|
||||
* indirect function.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
|
||||
/**
|
||||
* Mismatch between a directive in the control directive structure and in
|
||||
* the HSAIL kernel.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup ext-alt-finalizer-program Finalization Program
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
|
||||
* the definition of the BrigModule_t type.
|
||||
*/
|
||||
typedef BrigModule_t hsa_ext_module_t;
|
||||
|
||||
/**
|
||||
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
|
||||
* modules that collectively define functions and variables used by kernels and
|
||||
* indirect functions.
|
||||
*/
|
||||
typedef struct hsa_ext_program_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
} hsa_ext_program_t;
|
||||
|
||||
/**
|
||||
* @brief Create an empty HSAIL program.
|
||||
*
|
||||
* @param[in] machine_model Machine model used in the HSAIL program.
|
||||
*
|
||||
* @param[in] profile Profile used in the HSAIL program.
|
||||
*
|
||||
* @param[in] default_float_rounding_mode Default float rounding mode used in
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[out] program Memory location where the HSA runtime stores the newly
|
||||
* created HSAIL program handle.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
|
||||
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
|
||||
* @p program is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_create(
|
||||
hsa_machine_model_t machine_model,
|
||||
hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options,
|
||||
hsa_ext_program_t *program);
|
||||
|
||||
/**
|
||||
* @brief Destroy a HSAIL program.
|
||||
*
|
||||
* @details The HSAIL program handle becomes invalid after it has been
|
||||
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
|
||||
* still valid after the HSAIL program has been destroyed, and can be used as
|
||||
* intended. Resources allocated outside and associated with the HSAIL program
|
||||
* (such as HSAIL modules that are added to the HSAIL program) can be released
|
||||
* after the finalization program has been destroyed.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_destroy(
|
||||
hsa_ext_program_t program);
|
||||
|
||||
/**
|
||||
* @brief Add a HSAIL module to an existing HSAIL program.
|
||||
*
|
||||
* @details The HSA runtime does not perform a deep copy of the HSAIL module
|
||||
* upon addition. Instead, it stores a pointer to the HSAIL module. The
|
||||
* ownership of the HSAIL module belongs to the application, which must ensure
|
||||
* that @p module is not released before destroying the HSAIL program.
|
||||
*
|
||||
* The HSAIL module is successfully added to the HSAIL program if @p module is
|
||||
* valid, if all the declarations and definitions for the same symbol are
|
||||
* compatible, and if @p module specify machine model and profile that matches
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] module HSAIL module. The application can add the same HSAIL module
|
||||
* to @p program at most once. The HSAIL module must specify the same machine
|
||||
* model and profile as @p program. If the floating-mode rounding mode of @p
|
||||
* module is not default, then it should match that of @p program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
|
||||
* module does not match machine model of @p program, or the profile of @p
|
||||
* module does not match profile of @p program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
|
||||
* already a part of the HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
|
||||
* definition compatibility mismatch. See the symbol compatibility rules in the
|
||||
* HSA Programming Reference Manual.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_add_module(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
/**
|
||||
* @brief Iterate over the HSAIL modules in a program, and invoke an
|
||||
* application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per HSAIL module in the
|
||||
* program. The HSA runtime passes three arguments to the callback: the program,
|
||||
* a HSAIL module, and the application data. If @p callback returns a status
|
||||
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
|
||||
* stops and ::hsa_ext_program_iterate_modules returns that status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
/**
|
||||
* @brief HSAIL program attributes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Machine model specified when the HSAIL program was created. The type
|
||||
* of this attribute is ::hsa_machine_model_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
|
||||
/**
|
||||
* Profile specified when the HSAIL program was created. The type of
|
||||
* this attribute is ::hsa_profile_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
|
||||
/**
|
||||
* Default float rounding mode specified when the HSAIL program was
|
||||
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
|
||||
} hsa_ext_program_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behaviour is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* HSAIL program attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_get_info(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
/**
|
||||
* @brief Finalizer-determined call convention.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Finalizer-determined call convention.
|
||||
*/
|
||||
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
|
||||
} hsa_ext_finalizer_call_convention_t;
|
||||
|
||||
/**
|
||||
* @brief Control directives specify low-level information about the
|
||||
* finalization process.
|
||||
*/
|
||||
typedef struct hsa_ext_control_directives_s {
|
||||
/**
|
||||
* Bitset indicating which control directives are enabled. The bit assigned to
|
||||
* a control directive is determined by the corresponding value in
|
||||
* BrigControlDirective.
|
||||
*
|
||||
* If a control directive is disabled, its corresponding field value (if any)
|
||||
* must be 0. Control directives that are only present or absent (such as
|
||||
* partial workgroups) have no corresponding field as the presence of the bit
|
||||
* in this mask is sufficient.
|
||||
*/
|
||||
uint64_t control_directives_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
|
||||
* assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t break_exceptions_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
|
||||
* bit assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t detect_exceptions_mask;
|
||||
/**
|
||||
* Maximum size (in bytes) of dynamic group memory that will be allocated by
|
||||
* the application for any dispatch of the kernel. If the kernel contains a
|
||||
* maxdynamicsize control directive, the two values should match.
|
||||
*/
|
||||
uint32_t max_dynamic_group_size;
|
||||
/**
|
||||
* Maximum number of grid work-items that will be used by the application to
|
||||
* launch the kernel. If the kernel contains a maxflatgridsize control
|
||||
* directive, the value of @a max_flat_grid_size must not be greater than the
|
||||
* value of the directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_grid_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint64_t max_flat_grid_size;
|
||||
/**
|
||||
* Maximum number of work-group work-items that will be used by the
|
||||
* application to launch the kernel. If the kernel contains a
|
||||
* maxflatworkgroupsize control directive, the value of @a
|
||||
* max_flat_workgroup_size must not be greater than the value of the
|
||||
* directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_workgroup_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint32_t max_flat_workgroup_size;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint32_t reserved1;
|
||||
/**
|
||||
* Grid size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredgridsize control directive, the
|
||||
* dimensions should match.
|
||||
*
|
||||
* The specified grid size must be consistent with @a required_workgroup_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
|
||||
* only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
uint64_t required_grid_size[3];
|
||||
/**
|
||||
* Work-group size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredworkgroupsize control directive,
|
||||
* the dimensions should match.
|
||||
*
|
||||
* The specified work-group size must be consistent with @a required_grid_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
|
||||
* hold only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
hsa_dim3_t required_workgroup_size;
|
||||
/**
|
||||
* Number of dimensions that will be used by the application to launch the
|
||||
* kernel. If the kernel contains a requireddim control directive, the two
|
||||
* values should match.
|
||||
*
|
||||
* The specified dimensions must be consistent with @a required_grid_size and
|
||||
* @a required_workgroup_size. This invariant must hold only if all the
|
||||
* corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
|
||||
* control_directives_mask, this field must be 1, 2, or 3.
|
||||
*/
|
||||
uint8_t required_dim;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint8_t reserved2[75];
|
||||
} hsa_ext_control_directives_t;
|
||||
|
||||
/**
|
||||
* @brief Finalize an HSAIL program for a given instruction set architecture.
|
||||
*
|
||||
* @details Finalize all of the kernels and indirect functions that belong to
|
||||
* the same HSAIL program for a specific instruction set architecture (ISA). The
|
||||
* transitive closure of all functions specified by call or scall must be
|
||||
* defined. Kernels and indirect functions that are being finalized must be
|
||||
* defined. Kernels and indirect functions that are referenced in kernels and
|
||||
* indirect functions being finalized may or may not be defined, but must be
|
||||
* declared. All the global/readonly segment variables that are referenced in
|
||||
* kernels and indirect functions being finalized may or may not be defined, but
|
||||
* must be declared.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] isa Instruction set architecture to finalize for.
|
||||
*
|
||||
* @param[in] call_convention A call convention used in a finalization. Must
|
||||
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
|
||||
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
|
||||
* isa (not inclusive).
|
||||
*
|
||||
* @param[in] control_directives Low-level control directives that influence
|
||||
* the finalization process.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[in] code_object_type Type of code object to produce.
|
||||
*
|
||||
* @param[out] code_object Code object generated by the Finalizer, which
|
||||
* contains the machine code for the kernels and indirect functions in the HSAIL
|
||||
* program. The code object is independent of the HSAIL module that was used to
|
||||
* generate it.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
|
||||
* the control directive structure and in the HSAIL kernel mismatch, or if the
|
||||
* same directive is used with a different value in one of the functions used by
|
||||
* this kernel.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
|
||||
* encountered an error while compiling a kernel or an indirect function.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_finalize(
|
||||
hsa_ext_program_t program,
|
||||
hsa_isa_t isa,
|
||||
int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives,
|
||||
const char *options,
|
||||
hsa_code_object_type_t code_object_type,
|
||||
hsa_code_object_t *code_object);
|
||||
|
||||
/** @} */
|
||||
|
||||
#define hsa_ext_finalizer_1_00
|
||||
|
||||
typedef struct hsa_ext_finalizer_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ext_program_create)(
|
||||
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options, hsa_ext_program_t *program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_iterate_modules)(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module, void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_get_info)(
|
||||
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_finalize)(
|
||||
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives, const char *options,
|
||||
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
|
||||
} hsa_ext_finalizer_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
A különbségek nem kerülnek megjelenítésre, mivel a fájl túl nagy
Load Diff
@@ -0,0 +1,667 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA AMD extension for additional loader functionality.
|
||||
|
||||
#ifndef HSA_VEN_AMD_LOADER_H
|
||||
#define HSA_VEN_AMD_LOADER_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/**
|
||||
* @brief Queries equivalent host address for given @p device_address, and
|
||||
* records it in @p host_address.
|
||||
*
|
||||
*
|
||||
* @details Contents of memory pointed to by @p host_address would be identical
|
||||
* to contents of memory pointed to by @p device_address. Only difference
|
||||
* between the two is host accessibility: @p host_address is always accessible
|
||||
* from host, @p device_address might not be accessible from host.
|
||||
*
|
||||
* If @p device_address already points to host accessible memory, then the value
|
||||
* of @p device_address is simply copied into @p host_address.
|
||||
*
|
||||
* The lifetime of @p host_address is the same as the lifetime of @p
|
||||
* device_address, and both lifetimes are limited by the lifetime of the
|
||||
* executable that is managing these addresses.
|
||||
*
|
||||
*
|
||||
* @param[in] device_address Device address to query equivalent host address
|
||||
* for.
|
||||
*
|
||||
* @param[out] host_address Pointer to application-allocated buffer to record
|
||||
* queried equivalent host address in.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
|
||||
* null, or @p host_address is null.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_host_address(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
/**
|
||||
* @brief The storage type of the code object that is backing loaded memory
|
||||
* segment.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Loaded memory segment is not backed by any code object (anonymous), as the
|
||||
* case would be with BSS (uninitialized data).
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* file.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* memory.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
|
||||
} hsa_ven_amd_loader_code_object_storage_type_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded memory segment descriptor.
|
||||
*
|
||||
*
|
||||
* @details Loaded memory segment descriptor describes underlying loaded memory
|
||||
* segment. Loaded memory segment is created/allocated by the executable during
|
||||
* the loading of the code object that is backing underlying memory segment.
|
||||
*
|
||||
* The lifetime of underlying memory segment is limited by the lifetime of the
|
||||
* executable that is managing underlying memory segment.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
|
||||
/**
|
||||
* Agent underlying memory segment is allocated on. If the code object that is
|
||||
* backing underlying memory segment is program code object, then 0.
|
||||
*/
|
||||
hsa_agent_t agent;
|
||||
/**
|
||||
* Executable that is managing this underlying memory segment.
|
||||
*/
|
||||
hsa_executable_t executable;
|
||||
/**
|
||||
* Storage type of the code object that is backing underlying memory segment.
|
||||
*/
|
||||
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
|
||||
* filepath to the code object;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
|
||||
* accessible pointer to the first byte of the code object.
|
||||
*/
|
||||
const void *code_object_storage_base;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
|
||||
* the filepath to the code object (including null-terminating character);
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
|
||||
* bytes, of the memory occupied by the code object.
|
||||
*/
|
||||
size_t code_object_storage_size;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - other, then offset, in bytes, from the beginning of the code object to
|
||||
* the first byte in the code object data is copied from.
|
||||
*/
|
||||
size_t code_object_storage_offset;
|
||||
/**
|
||||
* Starting address of the underlying memory segment.
|
||||
*/
|
||||
const void *segment_base;
|
||||
/**
|
||||
* Size, in bytes, of the underlying memory segment.
|
||||
*/
|
||||
size_t segment_size;
|
||||
} hsa_ven_amd_loader_segment_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Either queries loaded memory segment descriptors, or total number of
|
||||
* loaded memory segment descriptors.
|
||||
*
|
||||
*
|
||||
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
|
||||
* points to number that exactly matches total number of loaded memory segment
|
||||
* descriptors, then queries loaded memory segment descriptors, and records them
|
||||
* in @p segment_descriptors. If @p segment_descriptors is null and @p
|
||||
* num_segment_descriptors points to zero, then queries total number of loaded
|
||||
* memory segment descriptors, and records it in @p num_segment_descriptors. In
|
||||
* all other cases returns appropriate error code (see below).
|
||||
*
|
||||
* The caller of this function is responsible for the allocation/deallocation
|
||||
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
|
||||
*
|
||||
* The lifetime of loaded memory segments that are described by queried loaded
|
||||
* memory segment descriptors is limited by the lifetime of the executable that
|
||||
* is managing loaded memory segments.
|
||||
*
|
||||
* Queried loaded memory segment descriptors are always self-consistent: they
|
||||
* describe a complete set of loaded memory segments that are being backed by
|
||||
* fully loaded code objects that are present at the time (i.e. this function
|
||||
* is blocked until all executable manipulations are fully complete).
|
||||
*
|
||||
*
|
||||
* @param[out] segment_descriptors Pointer to application-allocated buffer to
|
||||
* record queried loaded memory segment descriptors in. Can be null if @p
|
||||
* num_segment_descriptors points to zero.
|
||||
*
|
||||
* @param[in,out] num_segment_descriptors Pointer to application-allocated
|
||||
* buffer that contains either total number of loaded memory segment descriptors
|
||||
* or zero.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
|
||||
* while @p num_segment_descriptors points to non-zero number, @p
|
||||
* segment_descriptors is not null while @p num_segment_descriptors points to
|
||||
* zero, or @p num_segment_descriptors is null.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
|
||||
* does not point to number that exactly matches total number of loaded memory
|
||||
* segment descriptors.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
/**
|
||||
* @brief Obtains the handle of executable to which the device address belongs.
|
||||
*
|
||||
* @details This method should not be used to obtain executable handle by using
|
||||
* a host address. The executable returned is expected to be alive until its
|
||||
* destroyed by the user.
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
|
||||
* is no exectuable found for this kernel code object.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_executable(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the loaded code objects in an executable, and invoke
|
||||
* an application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] executable Executable.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per loaded code object. The
|
||||
* HSA runtime passes three arguments to the callback: the executable, a
|
||||
* loaded code object, and the application data. If @p callback returns a
|
||||
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
|
||||
* traversal stops and
|
||||
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
|
||||
* status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/**
|
||||
* @brief Loaded code object kind.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Program code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
|
||||
/**
|
||||
* Agent code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
|
||||
} hsa_ven_amd_loader_loaded_code_object_kind_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded code object attributes.
|
||||
*/
|
||||
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
|
||||
/**
|
||||
* The executable in which this loaded code object is loaded. The
|
||||
* type of this attribute is ::hsa_executable_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
|
||||
/**
|
||||
* The kind of this loaded code object. The type of this attribute is
|
||||
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
|
||||
/**
|
||||
* The agent on which this loaded code object is loaded. The
|
||||
* value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
|
||||
* attribute is ::hsa_agent_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
|
||||
/**
|
||||
* The storage type of the code object reader used to load the loaded code object.
|
||||
* The type of this attribute is ::uint32_t interpreted as a
|
||||
* ::hsa_ven_amd_loader_code_object_storage_type_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
|
||||
/**
|
||||
* The memory address of the first byte of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
|
||||
/**
|
||||
* The memory size in bytes of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
|
||||
/**
|
||||
* The file descriptor of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
|
||||
* attribute is ::int.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
|
||||
/**
|
||||
* The signed byte address difference of the memory address at which the code
|
||||
* object is loaded minus the virtual address specified in the code object
|
||||
* that is loaded. The value of this attribute is only defined if the
|
||||
* executable in which the code object is loaded is froozen. The type of this
|
||||
* attribute is ::int64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
|
||||
/**
|
||||
* The base memory address at which the code object is loaded. This is the
|
||||
* base address of the allocation for the lowest addressed segment of the code
|
||||
* object that is loaded. Note that any non-loaded segments before the first
|
||||
* loaded segment are ignored. The value of this attribute is only defined if
|
||||
* the executable in which the code object is loaded is froozen. The type of
|
||||
* this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
|
||||
/**
|
||||
* The byte size of the loaded code objects contiguous memory allocation. The
|
||||
* value of this attribute is only defined if the executable in which the code
|
||||
* object is loaded is froozen. The type of this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
|
||||
/**
|
||||
* The length of the URI in bytes, not including the NUL terminator. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
|
||||
/**
|
||||
* The URI name from which the code object was loaded. The type of this
|
||||
* attribute is a NUL terminated \p char* with the length equal to the value
|
||||
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
|
||||
* The URI name syntax is defined by the following BNF syntax:
|
||||
*
|
||||
* code_object_uri ::== file_uri | memory_uri
|
||||
* file_uri ::== "file://" file_path [ range_specifier ]
|
||||
* memory_uri ::== "memory://" process_id range_specifier
|
||||
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
|
||||
* file_path ::== URI_ENCODED_OS_FILE_PATH
|
||||
* process_id ::== DECIMAL_NUMBER
|
||||
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
|
||||
*
|
||||
* ``number`` is a C integral literal where hexadecimal values are prefixed by
|
||||
* "0x" or "0X", and octal values by "0".
|
||||
*
|
||||
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
|
||||
* In URI encoding, every character that is not in the regular expression
|
||||
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
|
||||
* proceeded by "%". Directories in the path are separated by "/".
|
||||
*
|
||||
* ``offset`` is a 0-based byte offset to the start of the code object. For a
|
||||
* file URI, it is from the start of the file specified by the ``file_path``,
|
||||
* and if omitted defaults to 0. For a memory URI, it is the memory address
|
||||
* and is required.
|
||||
*
|
||||
* ``size`` is the number of bytes in the code object. For a file URI, if
|
||||
* omitted it defaults to the size of the file. It is required for a memory
|
||||
* URI.
|
||||
*
|
||||
* ``process_id`` is the identity of the process owning the memory. For Linux
|
||||
* it is the C unsigned integral decimal literal for the process ID (PID).
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* file:///dir1/dir2/file1
|
||||
* file:///dir3/dir4/file2#offset=0x2000&size=3000
|
||||
* memory://1234#offset=0x20000&size=3000
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
|
||||
} hsa_ven_amd_loader_loaded_code_object_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given loaded code
|
||||
* object.
|
||||
*
|
||||
* @param[in] loaded_code_object Loaded code object.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behavior is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* loaded code object attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Create a code object reader to operate on a file with size and offset.
|
||||
*
|
||||
* @param[in] file File descriptor. The file must have been opened by
|
||||
* application with at least read permissions prior calling this function. The
|
||||
* file must contain a vendor-specific code object.
|
||||
*
|
||||
* The file is owned and managed by the application; the lifetime of the file
|
||||
* descriptor must exceed that of any associated code object reader.
|
||||
*
|
||||
* @param[in] size Size of the code object embedded in @p file.
|
||||
*
|
||||
* @param[in] offset 0-based offset relative to the beginning of the @p file
|
||||
* that denotes the beginning of the code object embedded within the @p file.
|
||||
*
|
||||
* @param[out] code_object_reader Memory location to store the newly created
|
||||
* code object reader handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
|
||||
* read permissions. This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
|
||||
* do not form a valid code object. If file size is 0. Or offset > file size.
|
||||
* This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
|
||||
* allocate the required resources.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the available executables, and invoke an
|
||||
* application-defined callback on every iteration. While
|
||||
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
|
||||
* ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy will be blocked.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per executable. The HSA
|
||||
* runtime passes two arguments to the callback: the executable and the
|
||||
* application data. If @p callback returns a status other than
|
||||
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
|
||||
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
|
||||
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy then the behavior is undefined.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_iterate_executables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Extension version.
|
||||
*/
|
||||
#define hsa_ven_amd_loader 001003
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.00.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
} hsa_ven_amd_loader_1_00_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.01.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
} hsa_ven_amd_loader_1_01_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.02.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
} hsa_ven_amd_loader_1_02_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.03.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_iterate_executables)(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
} hsa_ven_amd_loader_1_03_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* HSA_VEN_AMD_LOADER_H */
|
||||
@@ -0,0 +1,30 @@
|
||||
/*****************************************************************************\
|
||||
*
|
||||
* *** NEED TO REPLACE THIS WITH OPEN SOURCE HEADER ***
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1999-2004 ATI Technologies Inc. (unpublished)
|
||||
*
|
||||
*
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef _AMDID_H
|
||||
#define _AMDID_H
|
||||
|
||||
#define FAMILY_UNKNOWN 0
|
||||
|
||||
#define FAMILY_SI 110 // Southern Islands: Tahiti, Pitcairn, Cape Verde
|
||||
|
||||
#define FAMILY_TN 105 // Fusion Trinity: Devastator, Scrapper
|
||||
|
||||
#define FAMILY_CI 120 // Sea Islands: Hawaii, Bonaire
|
||||
|
||||
#define FAMILY_KV 125 // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
|
||||
|
||||
#define FAMILY_VI 130 // Volcanic Islands: Iceland, Tonga
|
||||
|
||||
#define FAMILY_CZ 135 // Carrizo
|
||||
|
||||
#define AMD_VENDOR_ID 0x1002 // used for GPUs -- AMD also has 1022 for CPU HW
|
||||
|
||||
#endif // _AMDID_H
|
||||
@@ -0,0 +1,46 @@
|
||||
/*****************************************************************************\
|
||||
*
|
||||
* *** NEED TO REPLACE THIS WITH OPEN SOURCE HEADER ***
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
*
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef _CZ_ID_H
|
||||
#define _CZ_ID_H
|
||||
|
||||
enum {
|
||||
CARRIZO_A1 = 0x02,
|
||||
CARRIZO_BRISTOL_A0 = 0x10,
|
||||
STONEY_A0 = 0x61,
|
||||
CZ_UNKNOWN = 0xFF
|
||||
};
|
||||
|
||||
|
||||
// for the original CARRIZO + other CARRIZO kickers
|
||||
#define ASICREV_IS_CARRIZO(eChipRev) (eChipRev == CARRIZO_A1)
|
||||
|
||||
//
|
||||
// Carrizo device IDs
|
||||
//
|
||||
#define DEVICE_ID_CZ_9874 0x9874
|
||||
|
||||
// CARRIZO ASIC internal revision number
|
||||
#define INTERNAL_REV_CARRIZO_A1 0x01
|
||||
|
||||
// CARRIZO PCI Revision IDs
|
||||
#define PRID_CZ_C4 0xC4 // Client B10
|
||||
#define PRID_CZ_C5 0xC5 // Client B8
|
||||
#define PRID_CZ_C6 0xC6 // Client B6
|
||||
#define PRID_CZ_C7 0xC7 // Client B4
|
||||
|
||||
// eCARRIZO PCI Revision IDs
|
||||
#define PRID_eCZ_84 0x84 // eCZ B10
|
||||
#define PRID_eCZ_81 0x81 // eCZ B8/iTemp
|
||||
#define PRID_eCZ_85 0x85 // eCZ B8
|
||||
#define PRID_eCZ_87 0x87 // eCZ B4
|
||||
|
||||
#endif // _CZ_ID_H
|
||||
|
||||
Reference in New Issue
Block a user