diff --git a/rocrtst/Kernels/CMakeLists.txt b/rocrtst/Kernels/CMakeLists.txt index c65f5d79bc..5f8772fc27 100644 --- a/rocrtst/Kernels/CMakeLists.txt +++ b/rocrtst/Kernels/CMakeLists.txt @@ -117,11 +117,11 @@ function(CompileKernel KRNL_NAME TARGET_DEV) # asm files # string(CONCAT CODE_ARG_STR "-Xclang -finclude-default-header " - "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} -mno-code-object-v3 " + "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} " "${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} " "${PROJECT_SOURCE_DIR}/${CL_FILE} -o ${KERNEL_DIR}/${CODEOBJ_FILE}") string(CONCAT ASM_ARG_STR "-S -Xclang -finclude-default-header " - "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} -mno-code-object-v3 " + "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} " "${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} " "${PROJECT_SOURCE_DIR}/${CL_FILE} -o ${KERNEL_DIR}/${CODEASM_FILE}") set(ASM_ARG_LIST ${ASM_ARG_STR}) diff --git a/rocrtst/common/base_rocr_utils.cc b/rocrtst/common/base_rocr_utils.cc index f97e2f8644..848fca253c 100755 --- a/rocrtst/common/base_rocr_utils.cc +++ b/rocrtst/common/base_rocr_utils.cc @@ -314,7 +314,7 @@ hsa_status_t LoadKernelFromObjFile(BaseRocR* test, hsa_agent_t* agent) { RET_IF_HSA_UTILS_ERR(err); hsa_executable_symbol_t kern_sym; - err = hsa_executable_get_symbol(executable, NULL, kern_name.c_str(), *agent, + err = hsa_executable_get_symbol(executable, NULL, (kern_name + ".kd").c_str(), *agent, 0, &kern_sym); RET_IF_HSA_UTILS_ERR(err); diff --git a/rocrtst/samples/CMakeLists.txt b/rocrtst/samples/CMakeLists.txt index 48d23c6595..dad10c327b 100755 --- a/rocrtst/samples/CMakeLists.txt +++ b/rocrtst/samples/CMakeLists.txt @@ -213,7 +213,7 @@ function(process_sample S_NAME TARG_DEV HAS_KERNEL) if (${HAS_KERNEL}) # Build the kernel separate_arguments(CLANG_ARG_LIST UNIX_COMMAND - "-x cl -target amdgcn-amd-amdhsa -Xclang -finclude-default-header -mcpu=${TARG_DEV} -mno-code-object-v3 ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}") + "-x cl -target amdgcn-amd-amdhsa -Xclang -finclude-default-header -mcpu=${TARG_DEV} ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}") add_custom_target("${TARG_NAME}" ${CLANG} ${CLANG_ARG_LIST} COMMAND ${CMAKE_COMMAND} -E create_symlink "../${SNAME_EXE}" "${KERNEL_DIR}/${SNAME_EXE}" diff --git a/rocrtst/suites/test_common/CMakeLists.txt b/rocrtst/suites/test_common/CMakeLists.txt index 9e136ad2b0..175c767c78 100755 --- a/rocrtst/suites/test_common/CMakeLists.txt +++ b/rocrtst/suites/test_common/CMakeLists.txt @@ -41,7 +41,7 @@ set(PROJECT_NAME "rocrtst64") project (${PROJECT_NAME}) -set(DEFAULT_TARGETS "gfx803;gfx701;gfx801;gfx802;gfx900;gfx902;gfx906;gfx908;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031") +set(DEFAULT_TARGETS "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx810;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031") # # Currently support for Windows platform is not present @@ -266,7 +266,7 @@ function(build_kernel S_NAME TARG_DEV) set(HSACO_TARG_LIST ${HSACO_TARG_LIST} ${TARG_NAME} CACHE INTERNAL HSA_TARG_LIST) separate_arguments(CLANG_ARG_LIST UNIX_COMMAND - "-x cl -target amdgcn-amd-amdhsa -include ${OPENCL_INC_DIR}opencl-c.h -mcpu=${TARG_DEV} -mno-code-object-v3 ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}") + "-x cl -target amdgcn-amd-amdhsa -include ${OPENCL_INC_DIR}opencl-c.h -mcpu=${TARG_DEV} ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}") add_custom_target("${TARG_NAME}" ${CLANG} ${CLANG_ARG_LIST} COMMAND ${CMAKE_COMMAND} -E create_symlink "../${ROCRTST}" "${KERNEL_DIR}/${ROCRTST}" diff --git a/runtime/hsa-runtime/README.md b/runtime/hsa-runtime/README.md index 3402a7dcac..5d15fc806c 100644 --- a/runtime/hsa-runtime/README.md +++ b/runtime/hsa-runtime/README.md @@ -3,7 +3,7 @@ This directory contains the ROC Runtime source code based on the HSA Runtime but modified to support AMD/ATI discrete GPUs. -#### Source & Include directories +#### Source & Include Directories core - Contains the source code for AMD's implementation of the core HSA Runtime API's. @@ -17,9 +17,10 @@ loader - Used to load code objects. utils - Utilities required to build the core runtime. -#### Build environment +#### Build Environment -CMake build framework is used to build the ROC runtime. The minimum version is 3.5.0. +CMake build framework is used to build the ROC runtime. The minimum version is +3.5.0. Obtain cmake infrastructure: http://www.cmake.org/download/ @@ -27,34 +28,34 @@ Export cmake bin into your PATH #### Package Dependencies -The following support packages are requried to succesfully build the runtime: +The following support packages are required to successfully build the runtime: * libelf-dev * g++ * libc6-dev-i386 (for libhsakmt 32bit) -#### Building the runtime +#### Building the Runtime To build the runtime a compatible version of the libhsakmt library and the hsakmt.h header file must be available. The latest version of these files can be obtained from the ROCT-Thunk-Interface repository, available here: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface - + As of ROCm release 3.7 libhsakmt development packages now include a cmake -package config file. The runtime will now locate libhsakmt via find_package if -libhsakmt is installed to a standard location. For installations that do not +package config file. The runtime will now locate libhsakmt via find_package if +libhsakmt is installed to a standard location. For installations that do not use ROCm standard paths set cmake variables CMAKE_PREFIX_PATH or hsakmt_DIR to override find_package search paths. As of ROCm release 3.7 the runtime includes an optional image support module -(previously hsa-ext-rocr-dev). By default this module is included in builds of -the runtime. The image module may be excluded the runtime by setting +(previously hsa-ext-rocr-dev). By default this module is included in builds of +the runtime. The image module may be excluded the runtime by setting cmake variable IMAGE_SUPPORT to OFF. When building the optional image module additional build dependencies are -required. An amdgcn compatible clang and device library must be installed -to build the image module. The latest version of these requirements can be +required. An amdgcn compatible clang and device library must be installed +to build the image module. The latest version of these requirements can be obtained from the ROCm package repository (see: https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) The latest source for these projects may be found here: @@ -65,8 +66,8 @@ https://github.com/RadeonOpenCompute/ROCm-Device-Libs Additionally xxd must be installed. -The runtime optionally supports use of the cmake user package registry. By -default the registry is not modified. Set cmake variable +The runtime optionally supports use of the cmake user package registry. By +default the registry is not modified. Set cmake variable EXPORT_TO_USER_PACKAGE_REGISTRY to ON to enable updating the package registry. For example, to build, install, and produce packages on a system with standard @@ -81,14 +82,12 @@ ROCm packages installed, execute the following from src/: Example with a custom installation path, build dependency path, and options: - ... cmake -DIMAGE_SUPPORT=OFF \ -DEXPORT_TO_USER_PACKAGE_REGISTRY=ON \ -DCMAKE_VERBOSE_MAKEFILE=1 \ -DCMAKE_PREFIX_PATH= \ -DCMAKE_INSTALL_PATH= \ .. - ... Alternately ccmake and cmake-gui are supported: @@ -101,11 +100,11 @@ Alternately ccmake and cmake-gui are supported: press g to generate and exit make -#### Building against the runtime +#### Building Against the Runtime The runtime provides a cmake package config file, installed by default to -/opt/rocm/lib/cmake/hsa-runtime64. The runtime exports cmake target -hsa-runtime64 in namespace hsa-runtime64. A cmake project (Foo) using the +/opt/rocm/lib/cmake/hsa-runtime64. The runtime exports cmake target +hsa-runtime64 in namespace hsa-runtime64. A cmake project (Foo) using the runtime may locate, include, and link the runtime with the following template: Add /opt/rocm to CMAKE_PREFIX_PATH. @@ -126,86 +125,107 @@ HSA Programmer Reference Manual Specification 1.1 HSA Platform System Architecture Specification 1.1 -#### Runtime Design overview +#### Runtime Design Overview The AMD ROC runtime consists of three primary layers: -C interface adaptors -C++ interfaces classes and common functions -AMD device specific implementations -Additionally the runtime is dependent on a small utility library which provides simple common functions, limited operating system and compiler abstraction, as well as atomic operation interfaces. +* C interface adaptors +* C++ interfaces classes and common functions +* AMD device specific implementations -#### C interface adaptors +Additionally the runtime is dependent on a small utility library which provides +simple common functions, limited operating system and compiler abstraction, as +well as atomic operation interfaces. -Files : +#### C Interface Adaptors -hsa.h(cpp) +Files: -hsa_ext_interface.h(cpp) +* hsa.h(cpp) +* hsa_ext_interface.h(cpp) -The C interface layer provides C99 APIs as defined in the HSA Runtime Specification 1.1. The interfaces and default definitions for the standard extensions are also provided. The interface functions simply forward to a function pointer table defined here. The table is initialized to point to default definitions, which simply return an appropriate error code. If available the extension library is loaded as part of runtime initialization and the table is updated to point into the extension library. In this release the standard extensions (image support and finalizer) are implemented in a separate libraries (not open sourced), and can be obtained from the HSA-Runtime-AMD git repository. +The C interface layer provides C99 APIs as defined in the HSA Runtime +Specification 1.1. The interfaces and default definitions for the standard +extensions are also provided. The interface functions simply forward to a +function pointer table defined here. The table is initialized to point to +default definitions, which simply return an appropriate error code. If +available the extension library is loaded as part of runtime initialization and +the table is updated to point into the extension library. In this release the +standard extensions (image support and finalizer) are implemented in a separate +libraries (not open sourced), and can be obtained from the HSA-Runtime-AMD git +repository. -#### C++ interfaces classes and common functions +#### C++ Interfaces Classes & Common Functions -Files : +Files: -runtime.h(cpp) +* runtime.h(cpp) +* agent.h +* queue.h +* signal.h +* memory_region.h(cpp) +* checked.h +* memory_database.h(cpp) +* default_signal.h(cpp) -agent.h - -queue.h - -signal.h - -memory_region.h(cpp) - -checked.h - -memory_database.h(cpp) - -default_signal.h(cpp) - -The C++ interface layer provides abstract interface classes encapsulating commands to HSA Signals, Agents, and Queues. This layer also contains the implementation of device independent commands, such as hsa_init and hsa_system_get_info, and a default signal and queue implementation. +The C++ interface layer provides abstract interface classes encapsulating +commands to HSA Signals, Agents, and Queues. This layer also contains the +implementation of device independent commands, such as hsa_init and +hsa_system_get_info, and a default signal and queue implementation. #### Device Specific Implementations Files: -amd_cpu_agent.h(cpp) +* amd_cpu_agent.h(cpp) +* amd_gpu_agent.h(cpp) +* amd_hw_aql_command_processor.h(cpp) +* amd_memory_region.h(cpp) +* amd_memory_registration.h(cpp) +* amd_topology.h(cpp) +* host_queue.h(cpp) +* interrupt_signal.h(cpp) +* hsa_ext_private_amd.h(cpp) -amd_gpu_agent.h(cpp) +The device specific layer contains implementations of the C++ interface classes +which implement HSA functionality for ROCm supported devices. -amd_hw_aql_command_processor.h(cpp) - -amd_memory_region.h(cpp) - -amd_memory_registration.h(cpp) - -amd_topology.h(cpp) - -host_queue.h(cpp) - -interrupt_signal.h(cpp) - -hsa_ext_private_amd.h(cpp) - -The device specific layer contains implementations of the C++ interface classes which implement HSA functionality for ROCm supported devices. - -#### Implemented functionality +#### Implemented Functionality * The following queries are not implemented: - ** hsa_code_symbol_get_info: HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION - ** hsa_executable_symbol_get_info: HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION + + * hsa_code_symbol_get_info: + * HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION + * hsa_executable_symbol_get_info: + * HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT + * HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION #### Known Issues * hsa_agent_get_exception_policies is not implemented. -* hsa_system_get_extension_table is not implemented for HSA_EXTENSION_AMD_PROFILER. +* hsa_system_get_extension_table is not implemented for + HSA_EXTENSION_AMD_PROFILER. #### Disclaimer -The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale. +The information contained herein is for informational purposes only, and is +subject to change without notice. While every precaution has been taken in the +preparation of this document, it may contain technical inaccuracies, omissions +and typographical errors, and AMD is under no obligation to update or otherwise +correct this information. Advanced Micro Devices, Inc. makes no representations +or warranties with respect to the accuracy or completeness of the contents of +this document, and assumes no liability of any kind, including the implied +warranties of noninfringement, merchantability or fitness for particular +purposes, with respect to the operation or use of AMD hardware, software or +other products described herein. No license, including implied or arising by +estoppel, to any intellectual property rights is granted by this document. +Terms and limitations applicable to the purchase or use of AMD's products are +as set forth in a signed agreement between the parties or in AMD's Standard +Terms and Conditions of Sale. -AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. +AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced +Micro Devices, Inc. Other product names used in this publication are for +identification purposes only and may be trademarks of their respective +companies. Copyright (c) 2014-2020 Advanced Micro Devices, Inc. All rights reserved. diff --git a/runtime/hsa-runtime/core/inc/amd_elf_image.hpp b/runtime/hsa-runtime/core/inc/amd_elf_image.hpp index 593cae13cb..0da61f5cf8 100644 --- a/runtime/hsa-runtime/core/inc/amd_elf_image.hpp +++ b/runtime/hsa-runtime/core/inc/amd_elf_image.hpp @@ -171,6 +171,8 @@ namespace elf { virtual uint16_t Type() = 0; virtual uint32_t EFlags() = 0; virtual uint32_t ABIVersion() = 0; + virtual uint32_t EClass() = 0; + virtual uint32_t OsAbi() = 0; std::string output() { return out.str(); } diff --git a/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp b/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp index 0cda7490f3..724100826a 100644 --- a/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp +++ b/runtime/hsa-runtime/core/inc/amd_hsa_code.hpp @@ -230,6 +230,8 @@ namespace code { amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); } uint16_t Machine() const { return img->Machine(); } uint32_t EFlags() const { return img->EFlags(); } + uint32_t EClass() const { return img->EClass(); } + uint32_t OsAbi() const { return img->OsAbi(); } AmdHsaCode(bool combineDataSegments = true); virtual ~AmdHsaCode(); diff --git a/runtime/hsa-runtime/core/inc/isa.h b/runtime/hsa-runtime/core/inc/isa.h index 9386c24f65..818216c496 100644 --- a/runtime/hsa-runtime/core/inc/isa.h +++ b/runtime/hsa-runtime/core/inc/isa.h @@ -82,6 +82,13 @@ private: friend class Isa; }; +enum class IsaFeature : uint8_t { + Unsupported, + Any, + Disabled, + Enabled, +}; + /// @class Isa. /// @brief Instruction Set Architecture. class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { @@ -103,23 +110,36 @@ class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { return isa_object; } + /// @returns True if @p code_object_isa and @p agent_isa are compatible, + /// false otherwise. + static bool IsCompatible(const Isa &code_object_isa, const Isa &agent_isa); + /// @returns This Isa's version. const Version &version() const { return version_; } - /// @returns True if this Isa has xnack enabled, false otherwise. - const bool &xnackEnabled() const { - return xnackEnabled_; + /// @returns SRAM ECC feature status. + const IsaFeature &sramecc() const { + return sramecc_; } - /// @returns True if this Isa has sram ecc enabled, false otherwise. - const bool &sramEccEnabled() const { - return sramEcc_; + /// @returns XNACK feature status. + const IsaFeature &xnack() const { + return xnack_; } /// @returns This Isa's supported wavefront. const Wavefront &wavefront() const { return wavefront_; } + /// @returns True if SRAMECC feature is supported, false otherwise. + bool IsSrameccSupported() const { + return sramecc_ != IsaFeature::Unsupported; + } + /// @returns True if XNACK feature is supported, false otherwise. + bool IsXnackSupported() const { + return xnack_ != IsaFeature::Unsupported; + } + /// @returns This Isa's architecture. std::string GetArchitecture() const { return "amdgcn"; @@ -153,19 +173,6 @@ class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { return &wavefront_; } - /// @returns True if this Isa is compatible with @p isa_object, false - /// otherwise. - bool IsCompatible(const Isa *isa_object) const { - assert(isa_object); - return version_ == isa_object->version_ && - xnackEnabled_ == isa_object->xnackEnabled_; - } - /// @returns True if this Isa is compatible with @p isa_handle, false - /// otherwise. - bool IsCompatible(const hsa_isa_t &isa_handle) const { - assert(isa_handle.handle); - return IsCompatible(Object(isa_handle)); - } /// @brief Isa is always in valid state. bool IsValid() const { return true; @@ -186,22 +193,33 @@ class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { private: /// @brief Default constructor. - Isa(): version_(Version(-1, -1, -1)), xnackEnabled_(false), sramEcc_(false) {} + Isa() + : version_(Version(-1, -1, -1)), + sramecc_(IsaFeature::Unsupported), + xnack_(IsaFeature::Unsupported) {} /// @brief Construct from @p version. - Isa(const Version &version): version_(version), xnackEnabled_(false), sramEcc_(false) {} + Isa(const Version &version) + : version_(version), + sramecc_(IsaFeature::Unsupported), + xnack_(IsaFeature::Unsupported) {} /// @brief Construct from @p version. - Isa(const Version &version, const bool xnack, const bool ecc): version_(version), xnackEnabled_(xnack), sramEcc_(ecc) {} + Isa(const Version &version, + IsaFeature sramecc, + IsaFeature xnack) + : version_(version), + sramecc_(sramecc), + xnack_(xnack) {} /// @brief Isa's version. Version version_; - /// @brief Isa's supported xnack flag. - bool xnackEnabled_; + /// @brief SRAMECC feature. + IsaFeature sramecc_; - /// @brief Isa's sram ecc flag. - bool sramEcc_; + /// @brief XNACK feature. + IsaFeature xnack_; /// @brief Isa's supported wavefront. Wavefront wavefront_; @@ -217,7 +235,9 @@ class IsaRegistry final { /// @returns Isa for requested @p full_name, null pointer if not supported. static const Isa *GetIsa(const std::string &full_name); /// @returns Isa for requested @p version, null pointer if not supported. - static const Isa *GetIsa(const Isa::Version &version, bool xnack, bool ecc); + static const Isa *GetIsa(const Isa::Version &version, + IsaFeature sramecc = IsaFeature::Any, + IsaFeature xnack = IsaFeature::Any); private: /// @brief IsaRegistry's map type. diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index a41890467f..89f3d5e900 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -100,11 +100,34 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) historical_clock_ratio_ = 0.0; assert(err == HSAKMT_STATUS_SUCCESS && "hsaGetClockCounters error"); + const core::Isa *isa_base = core::IsaRegistry::GetIsa( + core::Isa::Version(node_props.EngineId.ui32.Major, + node_props.EngineId.ui32.Minor, + node_props.EngineId.ui32.Stepping)); + if (!isa_base) { + throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ISA, "Agent creation failed.\nThe GPU node has an unrecognized id.\n"); + } + + rocr::core::IsaFeature sramecc = rocr::core::IsaFeature::Unsupported; + if (isa_base->IsSrameccSupported()) { + sramecc = node_props.Capability.ui32.SRAM_EDCSupport == 1 + ? core::IsaFeature::Enabled + : core::IsaFeature::Disabled; + } + + rocr::core::IsaFeature xnack = rocr::core::IsaFeature::Unsupported; + if (isa_base->IsXnackSupported()) { + // TODO: This needs to be obtained form KFD once HMM implemented. + xnack = profile_ == HSA_PROFILE_FULL ? core::IsaFeature::Enabled + : core::IsaFeature::Disabled; + } + // Set instruction set architecture via node property, only on GPU device. isa_ = (core::Isa*)core::IsaRegistry::GetIsa( core::Isa::Version(node_props.EngineId.ui32.Major, node_props.EngineId.ui32.Minor, - node_props.EngineId.ui32.Stepping), - profile_ == HSA_PROFILE_FULL, false);//node_props.Capability.ui32.SRAM_EDCSupport == 1); + node_props.EngineId.ui32.Stepping), sramecc, xnack); + + assert(isa_ != nullptr && "ISA registry inconsistency."); // Check if the device is Kaveri, only on GPU device. if (isa_->GetMajorVersion() == 7 && isa_->GetMinorVersion() == 0 && diff --git a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp b/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp index abcca0d132..2e7418c80c 100644 --- a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp @@ -371,32 +371,6 @@ bool RegionMemory::Freeze() { return true; } -hsa_status_t IsIsaEquivalent(hsa_isa_t isa, void *data) { - assert(data); - - std::pair *data_pair = (std::pair*)data; - assert(data_pair); - assert(data_pair->first.handle != 0); - assert(data_pair->second != true); - - const core::Isa *agent_isa = core::Isa::Object(isa); - assert(agent_isa); - const core::Isa *code_object_isa = core::Isa::Object(data_pair->first); - assert(code_object_isa); - - // SRAM ECC enabled code may run on a system without ECC - // but a system which has ECC enabled requires ECC enabled code. - if (agent_isa->sramEccEnabled() && !code_object_isa->sramEccEnabled()) - return HSA_STATUS_SUCCESS; - - if (agent_isa->version() == code_object_isa->version()) { - data_pair->second = true; - return HSA_STATUS_INFO_BREAK; - } - - return HSA_STATUS_SUCCESS; -} - } // namespace anonymous namespace amd { @@ -419,14 +393,29 @@ hsa_isa_t LoaderContext::IsaFromName(const char *name) { bool LoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa) { - assert(agent.handle != 0); + std::pair comparison_data(code_object_isa, false); + auto IsIsaEquivalent = [](hsa_isa_t agent_isa_h, void *data) { + assert(data); - std::pair data(code_object_isa, false); - hsa_status_t status = HSA::hsa_agent_iterate_isas(agent, IsIsaEquivalent, &data); + std::pair *data_pair = + reinterpret_cast(data); + assert(data_pair); + assert(data_pair->second != true); + + const core::Isa *agent_isa = core::Isa::Object(agent_isa_h); + assert(agent_isa); + const core::Isa *code_object_isa = core::Isa::Object(data_pair->first); + assert(code_object_isa); + + data_pair->second = core::Isa::IsCompatible(*code_object_isa, *agent_isa); + return data_pair->second ? HSA_STATUS_INFO_BREAK : HSA_STATUS_SUCCESS; + }; + + hsa_status_t status = HSA::hsa_agent_iterate_isas(agent, IsIsaEquivalent, &comparison_data); if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { return false; } - return data.second; + return comparison_data.second; } void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index e6619fec9b..b1cf8e061c 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -80,13 +80,26 @@ CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { } GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { + GpuAgent* gpu = nullptr; if (node_prop.NumFComputeCores == 0) { - return nullptr; + // Ignore non GPUs. + return nullptr; + } + try { + gpu = new GpuAgent(node_id, node_prop); + } catch (const hsa_exception& e) { + if(e.error_code() == HSA_STATUS_ERROR_INVALID_ISA) { + ifdebug { + if (!strIsEmpty(e.what())) debug_print("Warning: %s\n", e.what()); + } + // Ignore unrecognized GPUs. + return nullptr; + } else { + // Rethrow remaining exceptions. + throw; + } } - - GpuAgent* gpu = new GpuAgent(node_id, node_prop); core::Runtime::runtime_singleton_->RegisterAgent(gpu); - return gpu; } @@ -180,8 +193,8 @@ static void SurfaceGpuList(std::vector& gpu_list) { // Instantiate a Gpu device. The IO links // of this node have already been registered - const GpuAgent* gpu = DiscoverGpu(gpu_list[idx], node_prop); - assert((node_prop.NumFComputeCores != 0) && (gpu != nullptr) && "GPU device failed discovery."); + assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); + DiscoverGpu(gpu_list[idx], node_prop); } } diff --git a/runtime/hsa-runtime/core/runtime/hsa.cpp b/runtime/hsa-runtime/core/runtime/hsa.cpp index c7cccd797b..89e85636ed 100644 --- a/runtime/hsa-runtime/core/runtime/hsa.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa.cpp @@ -1681,7 +1681,7 @@ hsa_status_t hsa_isa_compatible( const Isa *agent_isa_object = Isa::Object(agent_isa); IS_VALID(agent_isa_object); - *result = code_object_isa_object->IsCompatible(agent_isa_object); + *result = Isa::IsCompatible(*code_object_isa_object, *agent_isa_object); return HSA_STATUS_SUCCESS; CATCH; } @@ -1843,6 +1843,7 @@ hsa_status_t hsa_code_object_destroy( static std::string ConvertOldTargetNameToNew( const std::string &OldName, bool IsFinalizer, uint32_t EFlags) { std::string NewName = ""; + bool xnack_supported = false; // FIXME #1: Should 9:0:3 be completely (loader, sc, etc.) removed? // FIXME #2: What does PAL do with respect to boltzmann/usual fiji/tonga? @@ -1858,60 +1859,78 @@ static std::string ConvertOldTargetNameToNew( NewName = "amdgcn-amd-amdhsa--gfx704"; else if (OldName == "AMD:AMDGPU:8:0:0") NewName = "amdgcn-amd-amdhsa--gfx800"; - else if (OldName == "AMD:AMDGPU:8:0:1") + else if (OldName == "AMD:AMDGPU:8:0:1") { NewName = "amdgcn-amd-amdhsa--gfx801"; + xnack_supported = true; + } else if (OldName == "AMD:AMDGPU:8:0:2") NewName = "amdgcn-amd-amdhsa--gfx802"; else if (OldName == "AMD:AMDGPU:8:0:3") NewName = "amdgcn-amd-amdhsa--gfx803"; else if (OldName == "AMD:AMDGPU:8:0:4") NewName = "amdgcn-amd-amdhsa--gfx804"; - else if (OldName == "AMD:AMDGPU:8:1:0") + else if (OldName == "AMD:AMDGPU:8:1:0") { NewName = "amdgcn-amd-amdhsa--gfx810"; - else if (OldName == "AMD:AMDGPU:9:0:0") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:0") { NewName = "amdgcn-amd-amdhsa--gfx900"; - else if (OldName == "AMD:AMDGPU:9:0:1") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:1") { NewName = "amdgcn-amd-amdhsa--gfx900"; - else if (OldName == "AMD:AMDGPU:9:0:2") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:2") { NewName = "amdgcn-amd-amdhsa--gfx902"; - else if (OldName == "AMD:AMDGPU:9:0:3") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:3") { NewName = "amdgcn-amd-amdhsa--gfx902"; - else if (OldName == "AMD:AMDGPU:9:0:4") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:4") { NewName = "amdgcn-amd-amdhsa--gfx904"; - else if (OldName == "AMD:AMDGPU:9:0:6") + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:5") { + NewName = "amdgcn-amd-amdhsa--gfx904"; + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:6") { NewName = "amdgcn-amd-amdhsa--gfx906"; - else if (OldName == "AMD:AMDGPU:9:0:8") - NewName = "amdgcn-amd-amdhsa--gfx908"; - else if (OldName == "AMD:AMDGPU:10:1:0") - NewName = "amdgcn-amd-amdhsa--gfx1010"; - else if (OldName == "AMD:AMDGPU:10:1:1") - NewName = "amdgcn-amd-amdhsa--gfx1011"; - else if (OldName == "AMD:AMDGPU:10:1:2") - NewName = "amdgcn-amd-amdhsa--gfx1012"; - else if (OldName == "AMD:AMDGPU:10:3:0") - NewName = "amdgcn-amd-amdhsa--gfx1030"; - else if (OldName == "AMD:AMDGPU:10:3:1") - NewName = "amdgcn-amd-amdhsa--gfx1031"; - else - assert(false && "Unhandled target"); + xnack_supported = true; + } + else if (OldName == "AMD:AMDGPU:9:0:7") { + NewName = "amdgcn-amd-amdhsa--gfx906"; + xnack_supported = true; + } + else { + // Code object v2 only supports asics up to gfx906. Do NOT add handling + // of new asics into this if-else-if* block. + return ""; + } - if (IsFinalizer && (EFlags & EF_AMDGPU_XNACK)) { - NewName = NewName + "+xnack"; + if (IsFinalizer) { + if (EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V2) + NewName = NewName + ":xnack+"; + else if (xnack_supported) + NewName = NewName + ":xnack-"; } else { - if (EFlags != 0 && (EFlags & EF_AMDGPU_XNACK_LC)) { - NewName = NewName + "+xnack"; - } else { - if (OldName == "AMD:AMDGPU:8:0:1") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:8:1:0") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:1") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:2") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:3") - NewName = NewName + "+xnack"; - } + if (OldName == "AMD:AMDGPU:8:0:1") + NewName = NewName + ":xnack+"; + else if (OldName == "AMD:AMDGPU:8:1:0") + NewName = NewName + ":xnack+"; + else if (OldName == "AMD:AMDGPU:9:0:1") + NewName = NewName + ":xnack+"; + else if (OldName == "AMD:AMDGPU:9:0:3") + NewName = NewName + ":xnack+"; + else if (OldName == "AMD:AMDGPU:9:0:5") + NewName = NewName + ":xnack+"; + else if (OldName == "AMD:AMDGPU:9:0:7") + NewName = NewName + ":xnack+"; + else if (xnack_supported) + NewName = NewName + ":xnack-"; } return NewName; diff --git a/runtime/hsa-runtime/core/runtime/isa.cpp b/runtime/hsa-runtime/core/runtime/isa.cpp index ca23abb01b..e348ac89e8 100755 --- a/runtime/hsa-runtime/core/runtime/isa.cpp +++ b/runtime/hsa-runtime/core/runtime/isa.cpp @@ -67,17 +67,54 @@ bool Wavefront::GetInfo( } } +/* static */ +bool Isa::IsCompatible(const Isa &code_object_isa, + const Isa &agent_isa) { + if (code_object_isa.version() != agent_isa.version()) + return false; + + assert(code_object_isa.IsSrameccSupported() == agent_isa.IsSrameccSupported() && agent_isa.sramecc() != IsaFeature::Any); + if ((code_object_isa.sramecc() == IsaFeature::Enabled || + code_object_isa.sramecc() == IsaFeature::Disabled) && + code_object_isa.sramecc() != agent_isa.sramecc()) + return false; + + assert(code_object_isa.IsXnackSupported() == agent_isa.IsXnackSupported() && agent_isa.xnack() != IsaFeature::Any); + if ((code_object_isa.xnack() == IsaFeature::Enabled || + code_object_isa.xnack() == IsaFeature::Disabled) && + code_object_isa.xnack() != agent_isa.xnack()) + return false; + + return true; +} + std::string Isa::GetFullName() const { std::stringstream full_name; full_name << GetArchitecture() << "-" << GetVendor() << "-" << GetOS() << "-" << GetEnvironment() << "-gfx" << GetMajorVersion() << GetMinorVersion() << GetStepping(); - if (xnackEnabled_) - full_name << "+xnack"; + switch (sramecc_) { + case IsaFeature::Disabled: + full_name << ":sramecc-"; + break; + case IsaFeature::Enabled: + full_name << ":sramecc+"; + break; + default: + break; + } - if (sramEcc_) - full_name << "+sram-ecc"; + switch (xnack_) { + case IsaFeature::Disabled: + full_name << ":xnack-"; + break; + case IsaFeature::Enabled: + full_name << ":xnack+"; + break; + default: + break; + } return full_name.str(); } @@ -185,8 +222,8 @@ const Isa *IsaRegistry::GetIsa(const std::string &full_name) { return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; } -const Isa *IsaRegistry::GetIsa(const Isa::Version &version, bool xnack, bool ecc) { - auto isareg_iter = supported_isas_.find(Isa(version, xnack, ecc).GetFullName()); +const Isa *IsaRegistry::GetIsa(const Isa::Version &version, IsaFeature sramecc, IsaFeature xnack) { + auto isareg_iter = supported_isas_.find(Isa(version, sramecc, xnack).GetFullName()); return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; } @@ -194,52 +231,72 @@ const IsaRegistry::IsaMap IsaRegistry::supported_isas_ = IsaRegistry::GetSupportedIsas(); const IsaRegistry::IsaMap IsaRegistry::GetSupportedIsas() { -#define ISAREG_ENTRY_GEN(maj, min, stp, xnack, ecc) \ - Isa amd_amdgpu_##maj##min##stp##xnack##ecc; \ - amd_amdgpu_##maj##min##stp##xnack##ecc.version_ = Isa::Version(maj, min, stp); \ - amd_amdgpu_##maj##min##stp##xnack##ecc.xnackEnabled_ = xnack; \ - amd_amdgpu_##maj##min##stp##xnack##ecc.sramEcc_ = ecc; \ - supported_isas.insert(std::make_pair( \ - amd_amdgpu_##maj##min##stp##xnack##ecc.GetFullName(), \ - amd_amdgpu_##maj##min##stp##xnack##ecc)); \ +#define ISAREG_ENTRY_GEN(maj, min, stp, sramecc, xnack) \ + Isa amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack; \ + amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack.version_ = Isa::Version(maj, min, stp); \ + amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack.sramecc_ = sramecc; \ + amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack.xnack_ = xnack; \ + supported_isas.insert(std::make_pair( \ + amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack.GetFullName(), \ + amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack)); \ IsaMap supported_isas; + IsaFeature unsupported = IsaFeature::Unsupported; + IsaFeature any = IsaFeature::Any; + IsaFeature disabled = IsaFeature::Disabled; + IsaFeature enabled = IsaFeature::Enabled; - ISAREG_ENTRY_GEN(7, 0, 0, false, false) - ISAREG_ENTRY_GEN(7, 0, 1, false, false) - ISAREG_ENTRY_GEN(7, 0, 2, false, false) - ISAREG_ENTRY_GEN(8, 0, 1, false, false) - ISAREG_ENTRY_GEN(8, 0, 1, true, false) - ISAREG_ENTRY_GEN(8, 0, 2, false, false) - ISAREG_ENTRY_GEN(8, 0, 2, true, false) - ISAREG_ENTRY_GEN(8, 0, 3, false, false) - ISAREG_ENTRY_GEN(8, 0, 3, true, false) - ISAREG_ENTRY_GEN(8, 1, 0, false, false) - ISAREG_ENTRY_GEN(8, 1, 0, true, false) - ISAREG_ENTRY_GEN(9, 0, 0, false, false) - ISAREG_ENTRY_GEN(9, 0, 0, true, false) - ISAREG_ENTRY_GEN(9, 0, 2, false, false) - ISAREG_ENTRY_GEN(9, 0, 2, true, false) - ISAREG_ENTRY_GEN(9, 0, 4, false, false) - ISAREG_ENTRY_GEN(9, 0, 4, true, false) - ISAREG_ENTRY_GEN(9, 0, 6, false, false) - ISAREG_ENTRY_GEN(9, 0, 6, true, false) - ISAREG_ENTRY_GEN(9, 0, 6, false, true ) - ISAREG_ENTRY_GEN(9, 0, 6, true, true ) - ISAREG_ENTRY_GEN(9, 0, 8, false, false) - ISAREG_ENTRY_GEN(9, 0, 8, true, false) - ISAREG_ENTRY_GEN(9, 0, 8, false, true ) - ISAREG_ENTRY_GEN(9, 0, 8, true, true ) - ISAREG_ENTRY_GEN(10, 1, 0, false, false) - ISAREG_ENTRY_GEN(10, 1, 0, true, false) - ISAREG_ENTRY_GEN(10, 1, 1, false, false) - ISAREG_ENTRY_GEN(10, 1, 1, true, false) - ISAREG_ENTRY_GEN(10, 1, 2, false, false) - ISAREG_ENTRY_GEN(10, 1, 2, true, false) - ISAREG_ENTRY_GEN(10, 3, 0, false, false) - ISAREG_ENTRY_GEN(10, 3, 0, true, false) - ISAREG_ENTRY_GEN(10, 3, 1, false, false) - ISAREG_ENTRY_GEN(10, 3, 1, true, false) + // Version SRAMECC XNACK + ISAREG_ENTRY_GEN(7, 0, 0, unsupported, unsupported) + ISAREG_ENTRY_GEN(7, 0, 1, unsupported, unsupported) + ISAREG_ENTRY_GEN(7, 0, 2, unsupported, unsupported) + ISAREG_ENTRY_GEN(8, 0, 1, unsupported, any) + ISAREG_ENTRY_GEN(8, 0, 1, unsupported, disabled) + ISAREG_ENTRY_GEN(8, 0, 1, unsupported, enabled) + ISAREG_ENTRY_GEN(8, 0, 2, unsupported, unsupported) + ISAREG_ENTRY_GEN(8, 0, 3, unsupported, unsupported) + ISAREG_ENTRY_GEN(8, 1, 0, unsupported, any) + ISAREG_ENTRY_GEN(8, 1, 0, unsupported, disabled) + ISAREG_ENTRY_GEN(8, 1, 0, unsupported, enabled) + ISAREG_ENTRY_GEN(9, 0, 0, unsupported, any) + ISAREG_ENTRY_GEN(9, 0, 0, unsupported, disabled) + ISAREG_ENTRY_GEN(9, 0, 0, unsupported, enabled) + ISAREG_ENTRY_GEN(9, 0, 2, unsupported, any) + ISAREG_ENTRY_GEN(9, 0, 2, unsupported, disabled) + ISAREG_ENTRY_GEN(9, 0, 2, unsupported, enabled) + ISAREG_ENTRY_GEN(9, 0, 4, unsupported, any) + ISAREG_ENTRY_GEN(9, 0, 4, unsupported, disabled) + ISAREG_ENTRY_GEN(9, 0, 4, unsupported, enabled) + ISAREG_ENTRY_GEN(9, 0, 6, any, any) + ISAREG_ENTRY_GEN(9, 0, 6, any, disabled) + ISAREG_ENTRY_GEN(9, 0, 6, any, enabled) + ISAREG_ENTRY_GEN(9, 0, 6, disabled, any) + ISAREG_ENTRY_GEN(9, 0, 6, enabled, any) + ISAREG_ENTRY_GEN(9, 0, 6, disabled, disabled) + ISAREG_ENTRY_GEN(9, 0, 6, disabled, enabled) + ISAREG_ENTRY_GEN(9, 0, 6, enabled, disabled) + ISAREG_ENTRY_GEN(9, 0, 6, enabled, enabled) + ISAREG_ENTRY_GEN(9, 0, 8, any, any) + ISAREG_ENTRY_GEN(9, 0, 8, any, disabled) + ISAREG_ENTRY_GEN(9, 0, 8, any, enabled) + ISAREG_ENTRY_GEN(9, 0, 8, disabled, any) + ISAREG_ENTRY_GEN(9, 0, 8, enabled, any) + ISAREG_ENTRY_GEN(9, 0, 8, disabled, disabled) + ISAREG_ENTRY_GEN(9, 0, 8, disabled, enabled) + ISAREG_ENTRY_GEN(9, 0, 8, enabled, disabled) + ISAREG_ENTRY_GEN(9, 0, 8, enabled, enabled) + ISAREG_ENTRY_GEN(10, 1, 0, unsupported, any) + ISAREG_ENTRY_GEN(10, 1, 0, unsupported, disabled) + ISAREG_ENTRY_GEN(10, 1, 0, unsupported, enabled) + ISAREG_ENTRY_GEN(10, 1, 1, unsupported, any) + ISAREG_ENTRY_GEN(10, 1, 1, unsupported, disabled) + ISAREG_ENTRY_GEN(10, 1, 1, unsupported, enabled) + ISAREG_ENTRY_GEN(10, 1, 2, unsupported, any) + ISAREG_ENTRY_GEN(10, 1, 2, unsupported, disabled) + ISAREG_ENTRY_GEN(10, 1, 2, unsupported, enabled) + ISAREG_ENTRY_GEN(10, 3, 0, unsupported, unsupported) + ISAREG_ENTRY_GEN(10, 3, 1, unsupported, unsupported) +#undef ISAREG_ENTRY_GEN return supported_isas; } diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 63b2d1660c..9ea49a9e30 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -1157,13 +1157,13 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) { fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_IMPRECISE; } if (fault.Failure.ECC == 1 && fault.Failure.ErrorType == 0) { - fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAM_ECC; + fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAMECC; } if (fault.Failure.ErrorType == 1) { - fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_SRAM_ECC; + fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_SRAMECC; } if (fault.Failure.ErrorType == 2) { - fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAM_ECC; + fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAMECC; } if (fault.Failure.ErrorType == 3) { fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_HANG; diff --git a/runtime/hsa-runtime/image/blit_kernel.cpp b/runtime/hsa-runtime/image/blit_kernel.cpp index 7b08a32ff5..63cc1ed6cb 100644 --- a/runtime/hsa-runtime/image/blit_kernel.cpp +++ b/runtime/hsa-runtime/image/blit_kernel.cpp @@ -75,6 +75,7 @@ extern uint8_t ocl_blit_object_gfx702[]; extern uint8_t ocl_blit_object_gfx801[]; extern uint8_t ocl_blit_object_gfx802[]; extern uint8_t ocl_blit_object_gfx803[]; +extern uint8_t ocl_blit_object_gfx810[]; extern uint8_t ocl_blit_object_gfx900[]; extern uint8_t ocl_blit_object_gfx902[]; extern uint8_t ocl_blit_object_gfx904[]; @@ -983,6 +984,9 @@ hsa_status_t BlitKernel::GetPatchedBlitObject(const char* agent_name, case 803: *blit_code_object = ocl_blit_object_gfx803; break; + case 810: + *blit_code_object = ocl_blit_object_gfx810; + break; case 900: *blit_code_object = ocl_blit_object_gfx900; break; diff --git a/runtime/hsa-runtime/image/blit_src/CMakeLists.txt b/runtime/hsa-runtime/image/blit_src/CMakeLists.txt index 623a7f1c4c..ecd0515893 100644 --- a/runtime/hsa-runtime/image/blit_src/CMakeLists.txt +++ b/runtime/hsa-runtime/image/blit_src/CMakeLists.txt @@ -67,15 +67,9 @@ if (NOT ${FOUND}) set (QUIT 1) endif() -# Define the target devices with xnack enable -if (NOT DEFINED XNACK_DEVS) - set (XNACK_DEVS "gfx801;gfx902") -endif() -set( XNACK_DEVS ${XNACK_DEVS} CACHE STRING "XNACK targets" FORCE ) - # Determine the target devices if not specified if (NOT DEFINED TARGET_DEVICES) - set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031") + set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx810;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012;gfx1030;gfx1031") endif() set( TARGET_DEVICES ${TARGET_DEVICES} CACHE STRING "Build targets" FORCE ) @@ -90,8 +84,8 @@ if(${CMAKE_VERBOSE_MAKEFILE}) get_property(clang_path TARGET clang PROPERTY LOCATION) message("Using clang from: ${clang_path}") message("Build Setting:") - message(" Target Devices: ${TARGET_DEVICES}") - message(" XNACK Devices: ${XNACK_DEVS}") + message(" Target Devices*: ${TARGET_DEVICES}") + message(" (Specify \";\" separated list of target IDs.)") message(" Clang path: ${clang_path}") message(" Bitcode Dir: ${BITCODE_DIR}") endif() @@ -99,9 +93,15 @@ endif() ##========================================== ## Add custom command to generate a kernel code object file ##========================================== -function(gen_kernel_bc TARGET_DEV XNACK_OPT INPUT_FILE OUTPUT_FILE) +function(gen_kernel_bc TARGET_ID INPUT_FILE OUTPUT_FILE) - string (REPLACE "gfx" "" GFXIP "${TARGET_DEV}") + string (REGEX MATCH "^gfx([^:]+)" GFXIP "${TARGET_ID}") + set (GFXIP_NUMBER "${CMAKE_MATCH_1}") + # Report syntactically invalid target IDs and terminate. + if (NOT GFXIP) + message(FATAL_ERROR "Invalid target (${TARGET_ID}) specified for generating BLIT kerenel") + return() + endif() # Determine if device-libs is following old or new layout if(EXISTS "${BITCODE_DIR}/opencl.amdgcn.bc") set(BITCODE_ARGS "-nogpulib @@ -109,7 +109,7 @@ function(gen_kernel_bc TARGET_DEV XNACK_OPT INPUT_FILE OUTPUT_FILE) -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ockl.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ocml.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_daz_opt_on.amdgcn.bc - -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_isa_version_${GFXIP}.amdgcn.bc + -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_isa_version_${GFXIP_NUMBER}.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_unsafe_math_off.amdgcn.bc -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_finite_only_off.amdgcn.bc") else() @@ -118,7 +118,7 @@ function(gen_kernel_bc TARGET_DEV XNACK_OPT INPUT_FILE OUTPUT_FILE) separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-O2 -x cl -cl-denorms-are-zero -cl-std=CL2.0 -target amdgcn-amd-amdhsa - -Xclang -finclude-default-header -mcpu=${TARGET_DEV} -m${XNACK_OPT} + -Xclang -finclude-default-header -mcpu=${TARGET_ID} ${BITCODE_ARGS} -o ${OUTPUT_FILE} ${INPUT_FILE}") ## Add custom command to produce a code object file. @@ -140,19 +140,19 @@ endfunction(gen_kernel_bc) ##========================================== ## Find device code object name and forward to custom command ##========================================== -function(build_kernel BLIT_NAME TARG_DEV) +function(build_kernel BLIT_NAME TARGET_ID) - list (FIND XNACK_DEVS ${TARG_DEV} XNACK_IDX) - if (${XNACK_IDX} GREATER -1) - set (XNACK_OPT "xnack") - else() - set (XNACK_OPT "no-xnack") + string (REGEX MATCH "^gfx([^:]+)" GFXIP "${TARGET_ID}") + # Report syntactically invalid target IDs and terminate. + if (NOT GFXIP) + message(FATAL_ERROR "Invalid target (${TARGET_ID}) specified for generating BLIT kerenel (${BLIT_NAME})") + return() endif() - ## generate kernel bitcodes - set (CODE_OBJECT_FILE "${BLIT_NAME}_${TARG_DEV}") + ## generate kernel bitcodes + set (CODE_OBJECT_FILE "${BLIT_NAME}_${GFXIP}") set (CL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/imageblit_kernels.cl) - gen_kernel_bc(${TARG_DEV} ${XNACK_OPT} ${CL_FILE} ${CODE_OBJECT_FILE}) + gen_kernel_bc(${TARGET_ID} ${CL_FILE} ${CODE_OBJECT_FILE}) ## Build a list of code object file names ## These will be target dependencies. diff --git a/runtime/hsa-runtime/image/blit_src/README.md b/runtime/hsa-runtime/image/blit_src/README.md index b6597873a4..ddcebef026 100644 --- a/runtime/hsa-runtime/image/blit_src/README.md +++ b/runtime/hsa-runtime/image/blit_src/README.md @@ -12,12 +12,17 @@ whenever a new device is introduced. To add a new supported device, the following steps are required: 1. Declare an extern variable of the device XXX, by adding the line of - "extern uint32_t ocl_blit_object_gfxNNN[];" in "blit_kernel.cpp" + "extern uint32_t ocl_blit_object_gfxNNN[];" in "blit_kernel.cpp". 2. Update the BlitKernel::GetPatchedBlitObject() function to support the - device by assigning "blit_code_object" to "ocl_blit_object_gfxNNN[]" - 3. Add the gfxNNN to the TARGET_DEVICES list in CMakeLists.txt - 4. If the new device requires XNACK, add it to the XNACK_DEVS list in CMakeLists.txt - 5. Rebuild the image library + device by assigning "blit_code_object" to "ocl_blit_object_gfxNNN[]". + 3. Add the target to the TARGET_DEVICES list in CMakeLists.txt. Specify using + the target ID syntax which is the target GFX IP name, optionally followed + by the settings for the target features such as XNACK and SRAMECC. If + omitted, a target feature defaults to producing code that will execute on + any setting. For example, "gfx908" for code that will run on any setting, + or "gfx908:sramecc+:xnack-" for code that will only run if SRAMECC is + enabled and XNACK is disabled. + 4. Rebuild the image library. ## REQUIREMENT diff --git a/runtime/hsa-runtime/inc/amd_hsa_elf.h b/runtime/hsa-runtime/inc/amd_hsa_elf.h index 377f771e32..8e65b63024 100644 --- a/runtime/hsa-runtime/inc/amd_hsa_elf.h +++ b/runtime/hsa-runtime/inc/amd_hsa_elf.h @@ -40,41 +40,126 @@ // //////////////////////////////////////////////////////////////////////////////// +// Undefine the macro in case it is defined in the system elf.h. +#undef EM_AMDGPU + #ifndef AMD_HSA_ELF_H #define AMD_HSA_ELF_H -#include "amd_hsa_common.h" +// AMD GPU Specific ELF Header Enumeration Values. +// +// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains +// code object V1 defintions which are not part of the LLVM header. Code object +// V1 was only supported by the Finalizer which is now deprecated and removed. +// +// TODO: Deprecate and remove V1 support and replace this header with using the +// LLVM header. +namespace ELF { -// ELF Header Enumeration Values. -#define EM_AMDGPU 224 -#define ELFOSABI_AMDGPU_HSA 64 -#define ELFABIVERSION_AMDGPU_HSA 0 -#define EF_AMDGPU_XNACK 0x00000001 -#define EF_AMDGPU_TRAP_HANDLER 0x00000002 +// Machine architectures +// See current registered ELF machine architectures at: +// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html +enum { + EM_AMDGPU = 224, // AMD GPU architecture +}; -// FIXME: We really need to start thinking about separating legacy code out, -// it is getting messy. -#define EF_AMDGPU_MACH_LC 0x0ff -#define EF_AMDGPU_MACH_AMDGCN_GFX700_LC 0x022 -#define EF_AMDGPU_MACH_AMDGCN_GFX701_LC 0x023 -#define EF_AMDGPU_MACH_AMDGCN_GFX702_LC 0x024 -#define EF_AMDGPU_MACH_AMDGCN_GFX801_LC 0x028 -#define EF_AMDGPU_MACH_AMDGCN_GFX802_LC 0x029 -#define EF_AMDGPU_MACH_AMDGCN_GFX803_LC 0x02a -#define EF_AMDGPU_MACH_AMDGCN_GFX810_LC 0x02b -#define EF_AMDGPU_MACH_AMDGCN_GFX900_LC 0x02c -#define EF_AMDGPU_MACH_AMDGCN_GFX902_LC 0x02d -#define EF_AMDGPU_MACH_AMDGCN_GFX904_LC 0x02e -#define EF_AMDGPU_MACH_AMDGCN_GFX906_LC 0x02f -#define EF_AMDGPU_MACH_AMDGCN_GFX908_LC 0x030 -#define EF_AMDGPU_MACH_AMDGCN_GFX909_LC 0x031 -#define EF_AMDGPU_MACH_AMDGCN_GFX1010_LC 0x033 -#define EF_AMDGPU_MACH_AMDGCN_GFX1011_LC 0x034 -#define EF_AMDGPU_MACH_AMDGCN_GFX1012_LC 0x035 -#define EF_AMDGPU_MACH_AMDGCN_GFX1030_LC 0x036 -#define EF_AMDGPU_MACH_AMDGCN_GFX1031_LC 0x037 -#define EF_AMDGPU_XNACK_LC 0x100 -#define EF_AMDGPU_SRAM_ECC_LC 0x200 +// OS ABI identification. +enum { + ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime +}; + +// AMDGPU OS ABI Version identification. +enum { + // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification + // was never defined for V1. + ELFABIVERSION_AMDGPU_HSA_V2 = 0, + ELFABIVERSION_AMDGPU_HSA_V3 = 1, + ELFABIVERSION_AMDGPU_HSA_V4 = 2 +}; + +// AMDGPU specific e_flags. +enum : unsigned { + // Processor selection mask for EF_AMDGPU_MACH_* values. + EF_AMDGPU_MACH = 0x0ff, + + // Not specified processor. + EF_AMDGPU_MACH_NONE = 0x000, + + // AMDGCN GFX7. + EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022, + EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023, + EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024, + EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025, + EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026, + // AMDGCN GFX8. + EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028, + EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029, + EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, + EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b, + // AMDGCN GFX9. + EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, + EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d, + EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e, + EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, + EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, + EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, + // AMDGCN GFX10. + EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033, + EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034, + EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035, + EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036, + EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037, + + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2. + EF_AMDGPU_FEATURE_XNACK_V2 = 0x01, + // Indicates if the trap handler is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2. + EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02, + + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_XNACK_V3 = 0x100, + // Indicates if the "sramecc" target feature is enabled for all code + // contained in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200, + + // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_XNACK_V4 = 0x300, + // XNACK is not supported. + EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000, + // XNACK is any/default/unspecified. + EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100, + // XNACK is off. + EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200, + // XNACK is on. + EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300, + + // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00, + // SRAMECC is not supported. + EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000, + // SRAMECC is any/default/unspecified. + EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400, + // SRAMECC is off. + EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800, + // SRAMECC is on. + EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00, +}; + +} // end namespace ELF // ELF Section Header Flag Enumeration Values. #define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS) @@ -143,14 +228,15 @@ typedef enum { #define R_AMDGPU_RELATIVE64 13 // AMD GPU Note Type Enumeration Values. -#define NT_AMDGPU_HSA_CODE_OBJECT_VERSION 1 -#define NT_AMDGPU_HSA_HSAIL 2 -#define NT_AMDGPU_HSA_ISA 3 -#define NT_AMDGPU_HSA_PRODUCER 4 -#define NT_AMDGPU_HSA_PRODUCER_OPTIONS 5 -#define NT_AMDGPU_HSA_EXTENSION 6 -#define NT_AMDGPU_HSA_HLDEBUG_DEBUG 101 -#define NT_AMDGPU_HSA_HLDEBUG_TARGET 102 +#define NT_AMD_HSA_CODE_OBJECT_VERSION 1 +#define NT_AMD_HSA_HSAIL 2 +#define NT_AMD_HSA_ISA_VERSION 3 +#define NT_AMD_HSA_PRODUCER 4 +#define NT_AMD_HSA_PRODUCER_OPTIONS 5 +#define NT_AMD_HSA_EXTENSION 6 +#define NT_AMD_HSA_ISA_NAME 11 +#define NT_AMD_HSA_HLDEBUG_DEBUG 101 +#define NT_AMD_HSA_HLDEBUG_TARGET 102 // AMD GPU Metadata Kind Enumeration Values. typedef uint16_t amdgpu_hsa_metadata_kind16_t; diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h index 5cd01f75a5..04a6e4d712 100644 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -1815,11 +1815,11 @@ typedef enum { // GPU attempted access to a host only page. HSA_AMD_MEMORY_FAULT_HOST_ONLY = 1 << 3, // DRAM ECC failure. - HSA_AMD_MEMORY_FAULT_DRAM_ECC = 1 << 4, + HSA_AMD_MEMORY_FAULT_DRAMECC = 1 << 4, // Can't determine the exact fault address. HSA_AMD_MEMORY_FAULT_IMPRECISE = 1 << 5, // SRAM ECC failure (ie registers, no fault address). - HSA_AMD_MEMORY_FAULT_SRAM_ECC = 1 << 6, + HSA_AMD_MEMORY_FAULT_SRAMECC = 1 << 6, // GPU reset following unspecified hang. HSA_AMD_MEMORY_FAULT_HANG = 1 << 31 } hsa_amd_memory_fault_reason_t; diff --git a/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp b/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp index a6b9021eab..bff3f46dd9 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp @@ -706,6 +706,8 @@ namespace elf { uint16_t Type() override { return ehdr.e_type; } uint32_t EFlags() override { return ehdr.e_flags; } uint32_t ABIVersion() override { return (uint32_t)(ehdr.e_ident[EI_ABIVERSION]); } + uint32_t EClass() override { return (uint32_t)(ehdr.e_ident[EI_CLASS]); } + uint32_t OsAbi() override { return (uint32_t)(ehdr.e_ident[EI_OSABI]); } GElfStringTable* shstrtab() override; GElfStringTable* strtab() override; diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp index 3a911ee59a..6cfaa50fcf 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp @@ -408,8 +408,8 @@ namespace code { if (!img) { img.reset(amd::elf::NewElf64Image()); uint32_t flags = 0; - if (xnack) { flags |= EF_AMDGPU_XNACK; } - return img->initNew(EM_AMDGPU, ET_EXEC, ELFOSABI_AMDGPU_HSA, ELFABIVERSION_AMDGPU_HSA, flags) || + if (xnack) { flags |= ELF::EF_AMDGPU_FEATURE_XNACK_V2; } + return img->initNew(ELF::EM_AMDGPU, ET_EXEC, ELF::ELFOSABI_AMDGPU_HSA, ELF::ELFABIVERSION_AMDGPU_HSA_V2, flags) || ElfImageError(); // FIXME: elfutils libelf does not allow program headers in ET_REL file type, so change it later in finalizer. } return false; @@ -440,7 +440,7 @@ namespace code { bool AmdHsaCode::Validate() { if (!img->Validate()) { return ElfImageError(); } - if (img->Machine() != EM_AMDGPU) { + if (img->Machine() != ELF::EM_AMDGPU) { out << "ELF error: Invalid machine" << std::endl; return false; } @@ -457,29 +457,37 @@ namespace code { amdgpu_hsa_note_code_object_version_t desc; desc.major_version = major; desc.minor_version = minor; - AddAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc, sizeof(desc)); + AddAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc, sizeof(desc)); } bool AmdHsaCode::GetCodeObjectVersion(uint32_t* major, uint32_t* minor) { - amdgpu_hsa_note_code_object_version_t* desc; - if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { - if (img->ABIVersion() != 0 && img->ABIVersion() != 1) - return false; - + switch (img->ABIVersion()) { + case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + amdgpu_hsa_note_code_object_version_t* desc; + if (GetAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc)) { + *major = desc->major_version; + *minor = desc->minor_version; + return *major <= 2; + } + return false; + case ELF::ELFABIVERSION_AMDGPU_HSA_V3: *major = 3; *minor = 0; return true; + case ELF::ELFABIVERSION_AMDGPU_HSA_V4: + *major = 4; + *minor = 0; + return true; } - *major = desc->major_version; - *minor = desc->minor_version; - return true; + + return false; } bool AmdHsaCode::GetNoteCodeObjectVersion(std::string& version) { amdgpu_hsa_note_code_object_version_t* desc; - if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { return false; } + if (!GetAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc)) { return false; } version.clear(); version += std::to_string(desc->major_version); version += "."; @@ -496,13 +504,13 @@ namespace code { desc.profile = uint8_t(profile); desc.machine_model = uint8_t(machine_model); desc.default_float_round = uint8_t(rounding_mode); - AddAmdNote(NT_AMDGPU_HSA_HSAIL, &desc, sizeof(desc)); + AddAmdNote(NT_AMD_HSA_HSAIL, &desc, sizeof(desc)); } bool AmdHsaCode::GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round) { amdgpu_hsa_note_hsail_t *desc; - if (!GetAmdNote(NT_AMDGPU_HSA_HSAIL, &desc)) { return false; } + if (!GetAmdNote(NT_AMD_HSA_HSAIL, &desc)) { return false; } *hsail_major = desc->hsail_major_version; *hsail_minor = desc->hsail_minor_version; *profile = (hsa_profile_t) desc->profile; @@ -523,13 +531,13 @@ namespace code { desc->stepping = stepping; memcpy(desc->vendor_and_architecture_name, vendor_name.c_str(), vendor_name.length() + 1); memcpy(desc->vendor_and_architecture_name + desc->vendor_name_size, architecture_name.c_str(), architecture_name.length() + 1); - AddAmdNote(NT_AMDGPU_HSA_ISA, desc, size); + AddAmdNote(NT_AMD_HSA_ISA_VERSION, desc, size); } bool AmdHsaCode::GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping) { amdgpu_hsa_note_isa_t *desc; - if (!GetAmdNote(NT_AMDGPU_HSA_ISA, &desc)) { return false; } + if (!GetAmdNote(NT_AMD_HSA_ISA_VERSION, &desc)) { return false; } vendor_name = GetNoteString(desc->vendor_name_size, desc->vendor_and_architecture_name); architecture_name = GetNoteString(desc->architecture_name_size, desc->vendor_and_architecture_name + vendor_name.length() + 1); *major_version = desc->major; @@ -538,143 +546,204 @@ namespace code { return true; } - static std::string ConvertOldTargetNameToNew( - const std::string &OldName, bool IsFinalizer, uint32_t EFlags) { - std::string NewName = ""; - - // FIXME #1: Should 9:0:3 be completely (loader, sc, etc.) removed? - // FIXME #2: What does PAL do with respect to boltzmann/usual fiji/tonga? - if (OldName == "AMD:AMDGPU:7:0:0") - NewName = "amdgcn-amd-amdhsa--gfx700"; - else if (OldName == "AMD:AMDGPU:7:0:1") - NewName = "amdgcn-amd-amdhsa--gfx701"; - else if (OldName == "AMD:AMDGPU:7:0:2") - NewName = "amdgcn-amd-amdhsa--gfx702"; - else if (OldName == "AMD:AMDGPU:7:0:3") - NewName = "amdgcn-amd-amdhsa--gfx703"; - else if (OldName == "AMD:AMDGPU:7:0:4") - NewName = "amdgcn-amd-amdhsa--gfx704"; - else if (OldName == "AMD:AMDGPU:8:0:0") - NewName = "amdgcn-amd-amdhsa--gfx800"; - else if (OldName == "AMD:AMDGPU:8:0:1") - NewName = "amdgcn-amd-amdhsa--gfx801"; - else if (OldName == "AMD:AMDGPU:8:0:2") - NewName = "amdgcn-amd-amdhsa--gfx802"; - else if (OldName == "AMD:AMDGPU:8:0:3") - NewName = "amdgcn-amd-amdhsa--gfx803"; - else if (OldName == "AMD:AMDGPU:8:0:4") - NewName = "amdgcn-amd-amdhsa--gfx804"; - else if (OldName == "AMD:AMDGPU:8:1:0") - NewName = "amdgcn-amd-amdhsa--gfx810"; - else if (OldName == "AMD:AMDGPU:9:0:0") - NewName = "amdgcn-amd-amdhsa--gfx900"; - else if (OldName == "AMD:AMDGPU:9:0:1") - NewName = "amdgcn-amd-amdhsa--gfx900"; - else if (OldName == "AMD:AMDGPU:9:0:2") - NewName = "amdgcn-amd-amdhsa--gfx902"; - else if (OldName == "AMD:AMDGPU:9:0:3") - NewName = "amdgcn-amd-amdhsa--gfx902"; - else if (OldName == "AMD:AMDGPU:9:0:4") - NewName = "amdgcn-amd-amdhsa--gfx904"; - else if (OldName == "AMD:AMDGPU:9:0:6") - NewName = "amdgcn-amd-amdhsa--gfx906"; - else if (OldName == "AMD:AMDGPU:9:0:8") - NewName = "amdgcn-amd-amdhsa--gfx908"; - else if (OldName == "AMD:AMDGPU:10:1:0") - NewName = "amdgcn-amd-amdhsa--gfx1010"; - else if (OldName == "AMD:AMDGPU:10:1:1") - NewName = "amdgcn-amd-amdhsa--gfx1011"; - else if (OldName == "AMD:AMDGPU:10:1:2") - NewName = "amdgcn-amd-amdhsa--gfx1012"; - else if (OldName == "AMD:AMDGPU:10:3:0") - NewName = "amdgcn-amd-amdhsa--gfx1030"; - else if (OldName == "AMD:AMDGPU:10:3:1") - NewName = "amdgcn-amd-amdhsa--gfx1031"; - else - assert(false && "Unhandled target"); - - if (IsFinalizer && (EFlags & EF_AMDGPU_XNACK)) { - NewName = NewName + "+xnack"; - } else { - if (EFlags & EF_AMDGPU_XNACK_LC) { - NewName = NewName + "+xnack"; - } else { - if (OldName == "AMD:AMDGPU:8:0:1") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:8:1:0") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:1") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:2") - NewName = NewName + "+xnack"; - else if (OldName == "AMD:AMDGPU:9:0:3") - NewName = NewName + "+xnack"; - } + // TODO: Move isa registry into the loader. + static bool GetMachInfo(unsigned mach, std::string &name, bool &sramecc_supported, bool &xnack_supported) { + switch (mach) { + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: name = "gfx700"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: name = "gfx701"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: name = "gfx702"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: name = "gfx703"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: name = "gfx704"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: name = "gfx801"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: name = "gfx802"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: name = "gfx803"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: name = "gfx810"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: name = "gfx900"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: name = "gfx902"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: name = "gfx904"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: name = "gfx906"; xnack_supported = true; sramecc_supported = true; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: name = "gfx908"; xnack_supported = true; sramecc_supported = true; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: name = "gfx1010"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: name = "gfx1011"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: name = "gfx1012"; xnack_supported = true; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: name = "gfx1030"; xnack_supported = false; sramecc_supported = false; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: name = "gfx1031"; xnack_supported = false; sramecc_supported = false; break; + default: return false; } - - if (EFlags & EF_AMDGPU_SRAM_ECC_LC) - NewName += "+sram-ecc"; - - return NewName; + return true; } - bool AmdHsaCode::GetIsa(std::string& isaName) + // This fuction is also copied to the Code Object Manager library. + static std::string ConvertOldTargetNameToNew(const std::string &old_name, bool is_finalizer, uint32_t e_flags) { + assert(!old_name.empty() && "Expecting non-empty old name"); + + unsigned mach = 0; + if (old_name == "AMD:AMDGPU:7:0:0") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; + else if (old_name == "AMD:AMDGPU:7:0:1") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; + else if (old_name == "AMD:AMDGPU:7:0:2") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; + else if (old_name == "AMD:AMDGPU:7:0:3") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; + else if (old_name == "AMD:AMDGPU:7:0:4") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; + else if (old_name == "AMD:AMDGPU:8:0:1") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; + else if (old_name == "AMD:AMDGPU:8:0:2") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; + else if (old_name == "AMD:AMDGPU:8:0:3") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; + else if (old_name == "AMD:AMDGPU:8:1:0") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; + else if (old_name == "AMD:AMDGPU:9:0:0" || old_name == "AMD:AMDGPU:9:0:1") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; + else if (old_name == "AMD:AMDGPU:9:0:2" || old_name == "AMD:AMDGPU:9:0:3") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; + else if (old_name == "AMD:AMDGPU:9:0:4" || old_name == "AMD:AMDGPU:9:0:5") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; + else if (old_name == "AMD:AMDGPU:9:0:6" || old_name == "AMD:AMDGPU:9:0:7") + mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; + else { + // Code object v2 only supports asics up to gfx906. Do NOT add handling + // of new asics into this if-else-if* block. + return ""; + } + std::string name; + bool sramecc_supported = false; + bool xnack_supported = false; + if (!GetMachInfo(mach, name, sramecc_supported, xnack_supported)) + return ""; + + // Only "AMD:AMDGPU:9:0:6" and "AMD:AMDGPU:9:0:7" supports SRAMECC for + // code object V2, and it must be OFF. + if (sramecc_supported) + name += ":sramecc-"; + + if (is_finalizer) { + if (e_flags & ELF::EF_AMDGPU_FEATURE_XNACK_V2) + name += ":xnack+"; + else if (xnack_supported) + name += ":xnack-"; + } else { + if (old_name == "AMD:AMDGPU:8:0:1") + name += ":xnack+"; + else if (old_name == "AMD:AMDGPU:8:1:0") + name += ":xnack+"; + else if (old_name == "AMD:AMDGPU:9:0:1") + name += ":xnack+"; + else if (old_name == "AMD:AMDGPU:9:0:3") + name += ":xnack+"; + else if (old_name == "AMD:AMDGPU:9:0:5") + name += ":xnack+"; + else if (old_name == "AMD:AMDGPU:9:0:7") + name += ":xnack+"; + else if (xnack_supported) + name += ":xnack-"; + } + + return name; + } + + bool AmdHsaCode::GetIsa(std::string& isa_name) { - isaName.clear(); + isa_name.clear(); - uint32_t codeObjectMajorVersion = 0; - uint32_t codeObjectMinorVersion = 0; + uint32_t code_object_major_version = 0; + uint32_t code_object_minor_version = 0; - if (!GetCodeObjectVersion(&codeObjectMajorVersion, &codeObjectMinorVersion)) { return false; } - if (codeObjectMajorVersion >= 3) { - isaName += "amdgcn-amd-amdhsa--"; - unsigned MACH = img->EFlags() & EF_AMDGPU_MACH_LC; - switch (MACH) { - case EF_AMDGPU_MACH_AMDGCN_GFX700_LC: isaName += "gfx700"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX701_LC: isaName += "gfx701"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX702_LC: isaName += "gfx702"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX801_LC: isaName += "gfx801"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX802_LC: isaName += "gfx802"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX803_LC: isaName += "gfx803"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX810_LC: isaName += "gfx810"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX900_LC: isaName += "gfx900"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX902_LC: isaName += "gfx902"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX904_LC: isaName += "gfx904"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX906_LC: isaName += "gfx906"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX908_LC: isaName += "gfx908"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX1010_LC: isaName += "gfx1010"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX1011_LC: isaName += "gfx1011"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX1012_LC: isaName += "gfx1012"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX1030_LC: isaName += "gfx1030"; break; - case EF_AMDGPU_MACH_AMDGCN_GFX1031_LC: isaName += "gfx1031"; break; - default: return false; + if (!GetCodeObjectVersion(&code_object_major_version, &code_object_minor_version)) { + return false; + } + if (code_object_major_version >= 3) { + + switch (img->EClass()) { + case ELFCLASS64: + // There is no e_machine and/or OS ABI for R600 so rely on checking + // the ELFCLASS to determin if AMDGCN verses R600. AMDHSA always uses + // ELFCLASS64 and R600 always uses ELFCLASS32. + isa_name += "amdgcn"; + break; + default: + return false; } - if (img->EFlags() & EF_AMDGPU_XNACK_LC) - isaName += "+xnack"; + if (img->Machine() != ELF::EM_AMDGPU) + return false; + isa_name += "-amd-"; - if (img->EFlags() & EF_AMDGPU_SRAM_ECC_LC) - isaName += "+sram-ecc"; + switch (img->OsAbi()) { + case ELF::ELFOSABI_AMDGPU_HSA: + isa_name += "amdhsa"; + break; + default: + // Only support AMDHSA in the ROCm runtime. + return false; + } + + isa_name += "--"; + + unsigned mach = img->EFlags() & ELF::EF_AMDGPU_MACH; + std::string name = ""; + bool xnack_supported = false; + bool sramecc_supported = false; + + if (!GetMachInfo(mach, name, sramecc_supported, xnack_supported)) + return false; + + isa_name += name; + + if (code_object_major_version == 3) { + if (img->EFlags() & ELF::EF_AMDGPU_FEATURE_SRAMECC_V3) + isa_name += ":sramecc+"; + else if (sramecc_supported) + isa_name += ":sramecc-"; + + if (img->EFlags() & ELF::EF_AMDGPU_FEATURE_XNACK_V3) + isa_name += ":xnack+"; + else if (xnack_supported) + isa_name += ":xnack-"; + } else if (code_object_major_version == 4) { + switch (img->EFlags() & ELF::EF_AMDGPU_FEATURE_SRAMECC_V4) { + case ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: + isa_name += ":sramecc-"; + break; + case ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4: + isa_name += ":sramecc+"; + break; + } + + switch (img->EFlags() & ELF::EF_AMDGPU_FEATURE_XNACK_V4) { + case ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4: + isa_name += ":xnack-"; + break; + case ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4: + isa_name += ":xnack+"; + break; + } + } else { + return false; + } return true; } else { std::string vendor_name, architecture_name; uint32_t major_version, minor_version, stepping; if (!GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { return false; } - isaName += vendor_name; - isaName += ":"; - isaName += architecture_name; - isaName += ":"; - isaName += std::to_string(major_version); - isaName += ":"; - isaName += std::to_string(minor_version); - isaName += ":"; - isaName += std::to_string(stepping); + isa_name += vendor_name; + isa_name += ":"; + isa_name += architecture_name; + isa_name += ":"; + isa_name += std::to_string(major_version); + isa_name += ":"; + isa_name += std::to_string(minor_version); + isa_name += ":"; + isa_name += std::to_string(stepping); amdgpu_hsa_note_hsail_t *hsailNote; - bool IsFinalizer = GetAmdNote(NT_AMDGPU_HSA_HSAIL, &hsailNote); - isaName = ConvertOldTargetNameToNew(isaName, IsFinalizer, img->EFlags()); - return true; + bool IsFinalizer = GetAmdNote(NT_AMD_HSA_HSAIL, &hsailNote); + isa_name = ConvertOldTargetNameToNew(isa_name, IsFinalizer, img->EFlags()); + return !isa_name.empty(); } } @@ -687,13 +756,13 @@ namespace code { desc->producer_major_version = major; desc->producer_minor_version = minor; memcpy(desc->producer_name, producer.c_str(), producer.length() + 1); - AddAmdNote(NT_AMDGPU_HSA_PRODUCER, desc, size); + AddAmdNote(NT_AMD_HSA_PRODUCER, desc, size); } bool AmdHsaCode::GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name) { amdgpu_hsa_note_producer_t* desc; - if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER, &desc)) { return false; } + if (!GetAmdNote(NT_AMD_HSA_PRODUCER, &desc)) { return false; } *major = desc->producer_major_version; *minor = desc->producer_minor_version; producer_name = GetNoteString(desc->producer_name_size, desc->producer_name); @@ -706,7 +775,7 @@ namespace code { amdgpu_hsa_note_producer_options_t *desc = (amdgpu_hsa_note_producer_options_t*) _alloca(size); desc->producer_options_size = options.length(); memcpy(desc->producer_options, options.c_str(), options.length() + 1); - AddAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, desc, size); + AddAmdNote(NT_AMD_HSA_PRODUCER_OPTIONS, desc, size); } void AmdHsaCode::AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options) @@ -726,7 +795,7 @@ namespace code { bool AmdHsaCode::GetNoteProducerOptions(std::string& options) { amdgpu_hsa_note_producer_options_t* desc; - if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, &desc)) { return false; } + if (!GetAmdNote(NT_AMD_HSA_PRODUCER_OPTIONS, &desc)) { return false; } options = GetNoteString(desc->producer_options_size, desc->producer_options); return true; } diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index 0d6ce72ebf..0d982e7a5a 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -1216,7 +1216,7 @@ hsa_status_t ExecutableImpl::LoadCodeObject( return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - if (majorVersion != 1 && majorVersion != 2 && majorVersion != 3) { + if (majorVersion < 1 || majorVersion > 4) { logger_ << "LoaderError: unsupported code object version: " << majorVersion << "\n"; return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } @@ -1309,7 +1309,7 @@ hsa_status_t ExecutableImpl::LoadSegmentsV1(hsa_agent_t agent, hsa_status_t ExecutableImpl::LoadSegmentsV2(hsa_agent_t agent, const code::AmdHsaCode *c) { - assert(c->Machine() == EM_AMDGPU && "Program code objects are not supported"); + assert(c->Machine() == ELF::EM_AMDGPU && "Program code objects are not supported"); if (!c->DataSegmentCount()) return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;