diff --git a/runtime/hsa-runtime/README.md b/runtime/hsa-runtime/README.md index 2b6bea4681..1ee7e84259 100644 --- a/runtime/hsa-runtime/README.md +++ b/runtime/hsa-runtime/README.md @@ -60,18 +60,6 @@ For example, from the top level ROCR repository execute: The name of the core hsa runtime is libhsa-runtime64.so.1. -#### External requirements - -The core runtime requires the sp3.a library to be able to compiler -on x86_64 architechtures. The binaries for the sp3.a librariy can -be found on the amd-codexl-analyzer GitHub repository: - -https://github.com/GPUOpen-Tools/amd-codexl-analyzer - -The x86_64 library and associated header files have been added to -this code base for convenience, but are still subject to the -AMD copyright license. - #### Specs http://www.hsafoundation.com/standards/ diff --git a/runtime/hsa-runtime/core/CMakeLists.txt b/runtime/hsa-runtime/core/CMakeLists.txt index 9dd3915154..0eb9af6862 100644 --- a/runtime/hsa-runtime/core/CMakeLists.txt +++ b/runtime/hsa-runtime/core/CMakeLists.txt @@ -66,18 +66,6 @@ if ( NOT EXISTS ${HSATHK_BUILD_LIB_PATH}/libhsakmt.so.1 ) MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_LIB_PATH is not set to point to the location where KFD Thunk library libhsakmt.so.1 could be found." ) endif () -if ( EXISTS ${LIBSP3_BUILD_INC_PATH}/sp3.h ) - set ( LIBSP3_BUILD_INC_PATH ${LIBSP3_BUILD_INC_PATH} ) -else () - set ( LIBSP3_BUILD_INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 ) -endif () - -if ( EXISTS ${LIBSP3_BUILD_LIB_PATH}/libsp3.a ) - set ( LIBSP3_BUILD_LIB_PATH ${LIBSP3_BUILD_LIB_PATH} ) -else () - set ( LIBSP3_BUILD_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 ) -endif () - MESSAGE ( ------IS64BIT: ${IS64BIT} ) MESSAGE ( ------Compiler: ${CMAKE_CXX_COMPILER} ) MESSAGE ( ------Version: ${CMAKE_CXX_COMPILER_VERSION} ) @@ -153,11 +141,9 @@ include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ) include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/inc ) include_directories ( ${HSATHK_BUILD_INC_PATH} ) -include_directories ( ${LIBSP3_BUILD_INC_PATH} ) ## Library path(s). link_directories ( ${HSATHK_BUILD_LIB_PATH} ) -link_directories ( ${LIBSP3_BUILD_LIB_PATH} ) add_library ( ${CORE_RUNTIME_TARGET} SHARED ${CORE_SRCS} ) @@ -172,7 +158,6 @@ target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE amdhsaloader PRIVATE amdhsacode PRIVATE hsakmt - PRIVATE sp3 dl pthread rt ) diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h new file mode 100644 index 0000000000..2aa074981f --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h @@ -0,0 +1,169 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_ +#define HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_ + +namespace amd { + +static const unsigned int kCodeCopyAligned7[] = { + 0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114, + 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, + 0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, + 0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000, + 0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103, + 0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105, + 0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02, + 0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501, + 0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000, + 0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70, + 0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105, + 0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901, + 0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05, + 0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000, + 0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70, + 0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105, + 0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03, + 0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105, + 0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002, + 0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000, +}; + +static const unsigned int kCodeCopyMisaligned7[] = { + 0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602, + 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103, + 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A, + 0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102, + 0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102, + 0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102, + 0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102, + 0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04, + 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04, + 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04, + 0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04, + 0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02, + 0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500, + 0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E, + 0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103, + 0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05, + 0x01A90105, 0xBF82FFEE, 0xBF810000, +}; + +static const unsigned int kCodeFill7[] = { + 0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A, + 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205, + 0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02, + 0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03, + 0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02, + 0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008, + 0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03, + 0x01A90103, 0xBF82FFF5, 0xBF810000, +}; + +static const unsigned int kCodeTrapHandler8[] = { + 0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073, + 0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000, + 0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008, + 0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9, + 0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000, + 0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70, +}; + +static const unsigned int kCodeCopyAligned8[] = { + 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020, + 0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050, + 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900, + 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05, + 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000, + 0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103, + 0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105, + 0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02, + 0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501, + 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000, + 0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, + 0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105, + 0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901, + 0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05, + 0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000, + 0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, + 0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105, + 0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03, + 0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105, + 0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002, + 0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000, +}; + +static const unsigned int kCodeCopyMisaligned8[] = { + 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020, + 0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, + 0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, + 0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032, + 0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, + 0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, + 0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, + 0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, + 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05, + 0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05, + 0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05, + 0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05, + 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03, + 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105, + 0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002, + 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, + 0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE, + 0xBF810000, +}; + +static const unsigned int kCodeFill8[] = { + 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602, + 0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B, + 0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103, + 0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02, + 0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082, + 0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A, + 0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02, + 0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000, +}; + +} // namespace amd + +#endif // header guard diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index d96fd414d9..d56f7fb518 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -46,19 +46,19 @@ #include #include #include +#include #include #include #include "core/inc/amd_aql_queue.h" #include "core/inc/amd_blit_kernel.h" #include "core/inc/amd_blit_sdma.h" +#include "core/inc/amd_gpu_shaders.h" #include "core/inc/amd_memory_region.h" #include "core/inc/interrupt_signal.h" #include "core/inc/isa.h" #include "core/inc/runtime.h" -#include "utils/sp3/sp3.h" - #include "hsa_ext_image.h" // Size of scratch (private) segment pre-allocated per thread, in bytes. @@ -182,49 +182,58 @@ GpuAgent::~GpuAgent() { void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, AssembleTarget assemble_target, void*& code_buf, size_t& code_buf_size) const { -#ifdef __linux__ // No VS builds of libsp3 available right now - std::string src_sp3_unified(src_sp3); + // Select precompiled shader implementation from name/target. + struct ASICShader { + const void* code; + size_t size; + int num_sgprs; + int num_vgprs; + }; - if (isa_->GetMajorVersion() == 7) { - // On Gfx7 replace v_add_u32 with legacy equivalent v_add_i32. - std::string add_inst_gfx8("v_add_u32"), add_inst_gfx7("v_add_i32"); + struct CompiledShader { + ASICShader compute_7; + ASICShader compute_8; + }; - for (size_t instIdx = 0; (instIdx = src_sp3_unified.find( - add_inst_gfx8, instIdx)) != std::string::npos; - instIdx += add_inst_gfx8.size()) { - src_sp3_unified.replace(instIdx, add_inst_gfx7.size(), add_inst_gfx7); - } - } + std::map compiled_shaders = { + {"TrapHandler", + {{NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}}}, + {"CopyAligned", + {{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12}, + {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}}}, + {"CopyMisaligned", + {{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10}, + {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}}}, + {"Fill", + {{kCodeFill7, sizeof(kCodeFill7), 19, 8}, + {kCodeFill8, sizeof(kCodeFill8), 19, 8}}}}; - // Assemble source string with libsp3. - sp3_context* sp3 = sp3_new(); + auto compiled_shader_it = compiled_shaders.find(func_name); + assert(compiled_shader_it != compiled_shaders.end() && + "Precompiled shader unavailable"); + + ASICShader* asic_shader = NULL; switch (isa_->GetMajorVersion()) { case 7: - sp3_setasic(sp3, "CI"); - sp3_set_param_int(sp3, "kGFXIPVersion", 7); + asic_shader = &compiled_shader_it->second.compute_7; break; case 8: - sp3_setasic(sp3, "VI"); - sp3_set_param_int(sp3, "kGFXIPVersion", 8); + asic_shader = &compiled_shader_it->second.compute_8; break; default: - assert(false && "SP3 assembly not supported on this agent"); + assert(false && "Precompiled shader unavailable for target"); } - sp3_parse_string(sp3, src_sp3_unified.c_str()); - sp3_shader* code_sp3_meta = sp3_compile(sp3, func_name); - // Allocate a GPU-visible buffer for the shader. HsaMemFlags code_buf_flags = {0}; code_buf_flags.ui32.HostAccess = 1; code_buf_flags.ui32.ExecuteAccess = 1; code_buf_flags.ui32.NoSubstitute = 1; - size_t code_size = code_sp3_meta->size * sizeof(uint32_t); size_t header_size = (assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0); - code_buf_size = AlignUp(header_size + code_size, 0x1000); + code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000); HSAKMT_STATUS err = hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf); @@ -239,8 +248,8 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, if (assemble_target == AssembleTarget::AQL) { amd_kernel_code_t* header = reinterpret_cast(code_buf); - int gran_sgprs = std::max(0, (int(code_sp3_meta->nsgprs) - 1) / 8); - int gran_vgprs = std::max(0, (int(code_sp3_meta->nvgprs) - 1) / 4); + int gran_sgprs = std::max(0, (int(asic_shader->num_sgprs) - 1) / 8); + int gran_vgprs = std::max(0, (int(asic_shader->num_vgprs) - 1) / 4); header->kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); AMD_HSA_BITS_SET(header->kernel_code_properties, @@ -262,14 +271,9 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 1); } - // Copy trap handler code into the GPU-visible buffer. - memcpy((void*)(uintptr_t(code_buf) + header_size), code_sp3_meta->data, - code_size); - - // Release SP3 resources. - sp3_free_shader(code_sp3_meta); - sp3_close(sp3); -#endif + // Copy shader code into the GPU-visible buffer. + memcpy((void*)(uintptr_t(code_buf) + header_size), asic_shader->code, + asic_shader->size); } void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const { @@ -988,7 +992,6 @@ void GpuAgent::SyncClocks() { } void GpuAgent::BindTrapHandler() { -#ifdef __linux__ // No raw string literal support in VS builds right now const char* src_sp3 = R"( var s_trap_info_lo = ttmp0 var s_trap_info_hi = ttmp1 @@ -1068,7 +1071,6 @@ void GpuAgent::BindTrapHandler() { HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_, trap_code_buf_size_, NULL, 0); assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtSetTrapHandler() failed"); -#endif } } // namespace diff --git a/runtime/hsa-runtime/utils/sp3/LICENSE.txt b/runtime/hsa-runtime/utils/sp3/LICENSE.txt deleted file mode 100644 index 548cb055df..0000000000 --- a/runtime/hsa-runtime/utils/sp3/LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/runtime/hsa-runtime/utils/sp3/sp3-asic.h b/runtime/hsa-runtime/utils/sp3/sp3-asic.h deleted file mode 100644 index 5696ba53c4..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-asic.h +++ /dev/null @@ -1,181 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_ASIC_H -#define SP3_ASIC_H - - -#include "sp3-int.h" -#include "sp3-vm.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -// ASIC types - - -enum asic_backend { - ASIC_BACKEND_SI, - ASIC_BACKEND_CI, - ASIC_BACKEND_GFX8, - ASIC_BACKEND_GFX81, - ASIC_MAX_BACKEND, // Must be the last entry -}; - - -enum asic_cap_id { - ASIC_THREAD_SIZE = 1, - ASIC_FED_INSTRUCTIONS = 2, - ASIC_LEGACY_LOG = 3, - ASIC_LARGE_DS_READ = 4, - ASIC_32BANK_LDS = 5, -}; - - -struct asic_info { - const char *name; - enum asic_backend backend; // which backend to use - int asic_thread_size; // number of threads in a wave - int asic_fed_instructions; // FED instructions are available - int asic_legacy_log; // Legacy EXP and LOG opcodes are available - int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available - int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available -}; - - -struct sp3_asic_state { - struct sp3_asic_aluop { - int pos; // original position in code - int op, na, nc; // na = number of args, nc = number of consts in args - int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset - unsigned dst; - unsigned arg[3]; - unsigned lit[3]; // float literals are no longer float at this point - unsigned flags; - int scalar; - } bundle [5]; - unsigned lds_lit[2], lds_mask[2]; - int nbundle; - int reorder; - int last_reorder, last_po[5]; - int nscalar; // number of nominally-scalar opcodes in bundle - int barrier_after; // require barrier after this clause - - // sp3-r6xx - int asic; - struct da_reloc { - unsigned addr, ref; - struct da_reloc *next; - } *da_relocs; - struct cf_reloc **instrels; - struct cf_reloc *labels; - int sinstrels; - int slabels; - char unk_name[16]; -}; -#define A S->ap - - -extern struct asic_info asics[]; -#define ASICNAME asics[A->asic].name -#define ASIC asics[A->asic] -void set_asic(Sp, int asic); -int find_asic(const char *name); - - -// opcode tables - -void sp3_unbuild_tables(void); -void sp3_si_unbuild_tables(void); -void sp3_ci_unbuild_tables(void); -void sp3_gfx8_unbuild_tables(void); - -void sp3_build_tables(void); -void sp3_si_build_tables(void); -void sp3_ci_build_tables(void); -void sp3_gfx8_build_tables(void); - - - - -// helper functions - - -#define FMT_FMT 0x00000000 -#define FMT_COMP 0x00010000 -#define FMT_ENDIAN 0x00020000 -#define FMT_NUM 0x00030000 -#define FMT_SRF 0x00040000 -#define FMT_MASK 0xFFFF0000 -#define FMT_IMASK 0x0000FFFF - -void mark_sgpr(Sp, unsigned); -void mark_vgpr(Sp, unsigned); -void mark_global(Sp, unsigned); -void mark_ctemp(Sp, unsigned); -int is_mod_bool(Sp, pnode *, const char *); -int get_mod_bool(Sp, pnode *, const char *); -int get_mod_int(Sp, pnode *, int, int); -int get_mod_int32(Sp, pnode *); -int par_cmask(Sp, pnode *); -unsigned reg_csel(Sp, unsigned , int); -unsigned reg_msel(Sp, unsigned *, int); - -const char *spec_sel_to_name(Sp, int sel); -const char *sp3_fmt_to_name(Sp, int cls, int val); -const char *sp3_si_fmt_to_name(Sp, int cls, int val); -const char *sp3_ci_fmt_to_name(Sp, int cls, int val); -const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val); - -void add_reloc_label(Sp, int li, int blame); -void add_reloc_inst(Sp, int ii, int blame); -void add_reloc_cf(Sp, int offs); - -int grouping_for_group_size(Sp, int group_size); - -//JENNICA - this block of name_tree will go away, replace -//with backend specific. - -enum nametree_enum { - NAMETREE_OPCODES, - NAMETREE_OPCODES_0ARG, - NAMETREE_OPCODES_CALL, - NAMETREE_VTX_FMTS, - NAMETREE_SPEC_SELS, - NAMETREE_SPEC_VEC_SELS, - NAMETREE_SGPR_NAME_SELS, - NAMETREE_CONSTS, - NAMETREE_DEPRECATED, -}; - -struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree); - -extern struct name_tree *opcodes_0arg; -extern struct name_tree *opcodes_call; -extern struct name_tree *vtx_fmts; -extern struct name_tree *spec_sels; -extern struct name_tree *spec_vec_sels; -extern struct name_tree *sgpr_name_sels; -extern struct name_tree *consts; -extern struct name_tree *deprecated; - -extern struct name_tree *asic_names; -struct asic_caps{const char *name; int id;}; -extern struct asic_caps asiccaps[]; -extern struct name_tree *asic_caps; //JENNICA - this may need to go away. - -void update_sgpr_names(Sp); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-int.h b/runtime/hsa-runtime/utils/sp3/sp3-int.h deleted file mode 100644 index a66550b1b7..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-int.h +++ /dev/null @@ -1,553 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_INT_H -#define SP3_INT_H - -#include "sp3.h" - - -#ifdef _MSC_VER -#ifndef strdup -#define strdup _strdup -#endif -#ifndef stricmp -#define stricmp _stricmp -#endif -#ifndef strcasecmp -#define strcasecmp _stricmp -#endif -#pragma warning(disable:4090 4204 4245 4296 4389 4701 4702) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -struct sp3_state; -#define Sp struct sp3_state *S - -// clause types - -#define CT_NONE 0 -#define CT_SHADER 1 - -// parse tree - -#define P_NUM 0 // integer -#define P_FLT 1 // float -#define P_STR 2 // string -#define P_REG 3 // register component(s) -#define P_RANGE 4 // closed range -#define P_RANGEL 5 // right-open range -#define P_SLICE 6 // array concatenation (used for slices) -#define P_RCAST 7 // integer -> register cast -#define P_LIST 8 // list (internal to the parser only) -#define P_VAR 9 // variable (with name) -#define P_VARE 10 // variable-element (result of lvalue slice) -#define P_CL 11 // clause -#define P_CLI 12 // clause instructions -#define P_WHILE 13 // while loop -#define P_REPEAT 14 // repeat-until loop -#define P_IF 15 // if or if-else -#define P_CFOR 16 // C-style for loop -#define P_FOR 17 // vector for loop -#define P_RET 18 // return from function -#define P_CSLICE 19 // componentwise slice -#define P_UREF 20 // unresolved reference -#define P_FREF 21 // resolved reference -#define P_CALL 22 // function call -#define P_PRINT 23 // print to stdout -#define P_PAR 24 // function parameters -#define P_NF 25 // native function -#define P_OMOD 27 // opcode modifier -#define P_OMODS 28 // opcode modifiers -#define P_OPARS 29 // opcode parameters -#define P_OP 30 // opcode -#define P_SWIZ0 31 // register swizzles with N components wrapped -#define P_SWIZ1 32 // -"- -#define P_SWIZ2 33 // -"- -#define P_SWIZ3 34 // -"- -#define P_SWIZ4 35 // -"- -#define P_VTXFMT 36 // vertex formats -#define P_LABEL 37 // unique identifier of a label -#define P_LINIT 38 // generate label identifiers -#define P_MARK 39 // mark a label -#define P_OPCALL 40 // opcode that does a clause instantiation on par0 -#define P_ASIC 41 // ASIC model -#define P_ASICCAP 42 // ASIC capability -#define P_NCLOS 43 // create closure -#define P_CLOS 44 // closure -#define P_SH 45 // compiled shader - -#define P_NOT 0x100 -#define P_BNOT 0x101 -#define P_NEG 0x102 -#define P_MUL 0x103 -#define P_DIV 0x104 -#define P_MOD 0x105 -#define P_ADD 0x106 -#define P_SUB 0x107 -#define P_SHL 0x108 -#define P_SHR 0x109 -#define P_SAR 0x10A -#define P_LT 0x10B -#define P_GT 0x10C -#define P_LEQ 0x10D -#define P_GEQ 0x10E -#define P_EQ 0x10F -#define P_NEQ 0x110 -#define P_BAND 0x111 -#define P_BOR 0x112 -#define P_BXOR 0x113 -#define P_AND 0x114 -#define P_OR 0x115 -#define P_XOR 0x116 -#define P_SEL 0x117 -#define P_XDEC 0x118 -#define P_XINC 0x119 -#define P_DECX 0x11A -#define P_INCX 0x11B -#define P_ASGN 0x11C -#define P_IND 0x11D -#define P_NOP 0x11E -#define P_VSUM 0x11F -#define P_VPROD 0x120 -#define P_VBOR 0x121 -#define P_VBAND 0x122 -#define P_VBXOR 0x123 -#define P_VOR 0x124 -#define P_VAND 0x125 -#define P_VXOR 0x126 -#define P_VMIN 0x127 -#define P_VMAX 0x128 -#define P_CADD 0x129 -#define P_CSUB 0x12A -#define P_CMUL 0x12B -#define P_CDIV 0x12C -#define P_CSHL 0x12D -#define P_CSHR 0x12E -#define P_CSAR 0x12F -#define P_CBAND 0x130 -#define P_CBOR 0x131 -#define P_CBXOR 0x132 -#define P_CAND 0x133 -#define P_COR 0x134 -#define P_CXOR 0x135 -#define P_CMIN 0x136 -#define P_CMAX 0x137 -#define P_MIN 0x138 -#define P_MAX 0x139 -#define P_PROBE 0x13A -#define P_BITS 0x13B - -// register types -#define R_VGPR 0x00000 -#define R_OFF 0x04000 -#define R_SNAME 0x06000 -#define R_INTERP 0x08000 -#define R_SPEC 0x0A000 -#define R_SGPR 0x0C000 -#define R_EXPBUF 0x0E000 -#define R_TMASK 0x1E000 - -// magic values for R_SPEC -#define R_P_CL 3 // used internally only (inline literal) -#define R_P_CI_L 0xDB // used internally only -#define R_P_LDX_L 0xDB // any LDS inline -#define R_P_LDS_L 0xDF // direct LDS inline -#define R_P_LDS_H 0xE0 -#define R_P_LDX_H 0xE0 -#define R_P_CI_S 0xF3 // end of new R8xx constants -#define R_P_CI_H 0xFC -#define R_P_NOTLAST 0xFF// notlast operand for export - -// magic values for R_SNAME -#define R_S_SCRATCH 1 -#define R_S_PSVS_STATE 2 -#define R_S_SO_WRITE_INDEX 3 -#define R_S_SO_BASE_OFFSET0 4 -#define R_S_SO_BASE_OFFSET1 5 -#define R_S_SO_BASE_OFFSET2 6 -#define R_S_SO_BASE_OFFSET3 7 -#define R_S_OFFCHIP_LDS 8 -#define R_S_IS_OFFCHIP 9 -#define R_S_RING_OFFSET 10 -#define R_S_GS_WAVE_ID 11 -#define R_S_TG_SIZE 12 -#define R_S_TF_BASE 13 -#define R_S_TGID_X 14 -#define R_S_TGID_Y 15 -#define R_S_TGID_Z 16 -#define R_S_WAVE_CNT 17 -#define R_S_GLOBAL_WAVE_ID 18 - -// register components -#define R_CMASK 0x1C00 -#define R_CSHIFT 10 -#define R_CX 0x0000 -#define R_CY 0x0400 -#define R_CZ 0x0800 -#define R_CW 0x0C00 -#define R_CS 0x1000 // used to identify scalar elements -#define R_CN 0x1800 - -#define R_IMASK 0x03FF - -// source transforms -#define R_NEG 0x80000 -#define R_ABS 0x100000 -#define R_SEXT 0x200000 - -// subencodings for export targets - -#define R_E_TMASK 0x0380 -#define R_E_MRT 0x0000 -#define R_E_Z 0x0080 -#define R_E_POS 0x0100 -#define R_E_PARAM 0x0180 -#define R_E_ATTR 0x0280 -#define R_E_NULL 0x0300 - -#define R_E_IMASK 0x007F - -// subencodings for interp - -#define R_I_TMASK 0x0380 -#define R_I_P10 0x0000 -#define R_I_P20 0x0080 -#define R_I_P0 0x0100 - -// function parameters -#define F_CANY 0x00000000 -#define F_CNUM 0x01000000 -#define F_CREG 0x02000000 -#define F_CTMP 0x03000000 -#define F_CFPTR 0x04000000 -#define F_CINT 0x05000000 -#define F_CMASK 0x07000000 -#define F_OPT 0x40000000 -#define F_VEC 0x80000000 - -typedef struct pnode { - struct pnode *gc_next; - int gc_mark; - int type; - int et; // error reporting tag - int ni; // number of items - union pnode_item { - int num; // integer - float flt; // float - char *str; // string - struct pnode *ptr; // tree item - struct { - struct pnode *v; - int e; - } ve; // variable-element pair - struct { - int p; - char *n; - } var; // variable (stack offset, name) - struct sp3_shader *sh; - unsigned int reg; // register components - struct pnode *(* nf)(Sp, struct pnode **); // native function - } i[1]; -} pnode; - -pnode *p_str(Sp, char *s); // wrap a string -pnode *p_float(Sp, float f); // wrap a float -pnode *p_num(Sp, int i); // wrap an integer -pnode *p_vec(Sp, int type, int len); // create a vector -pnode *p_list(Sp, pnode *list, pnode *item); // append item to P_LIST -pnode *p_list_rev(Sp, pnode *list); // reverse the order of the list -pnode *p_tree(Sp, int type, int nitems, ...); // create a tree node -pnode *p_l2t(Sp, int type, pnode *list); // list to tree -pnode *p_l2v(Sp, int type, pnode *list); // list to vector -pnode *p_x2x(Sp, int type, pnode *p); // cast to type -pnode *p_clause(Sp, int vstk, int lstk, pnode *parlist, pnode *instlist, int type); -pnode *p_reg(Sp, int type, int idx); // wrap a register -pnode *p_swizzle(Sp, char *str); // parse a swizzle string -pnode *p_lv2rv(Sp, pnode *lval); // lvalue to rvalue -pnode *p_newlabel(Sp, pnode *t, int tag); // define new label -pnode *p_label(Sp, int cnt); // fill with label IDs -pnode *p_clone(Sp, pnode *src); - -void print_node(pnode *); // print to stdout - -void mark_gc_storage(Sp); // mark all internal storage of sp3 for gc - -// functions provided by machine driver -int is_opcode(struct sp3_state *S, const char *name); // is an opcode (any) -int is_opcode_0arg(struct sp3_state *S, const char *name); // is an opcode (0-argument) -int is_opcode_call(struct sp3_state *S, const char *name); // is a call op (1st argument is a closure) -void sp3_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_si_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_ci_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -void sp3_gfx8_gen_opcode(Sp, const char *op, pnode *par, pnode *mod); -pnode *machine_const(Sp, char *name); // if a machine const, parse it (else NULL) -void mark_label(Sp, int li); // "label:" -pnode *asic_getcap(Sp, int id); // get ASIC capability #id -void mach_cleanup(Sp); // initialize generator state - -// name trees - -#define NT_SEARCH 0 -#define NT_ADD 1 -#define NT_ADD_ONLY 2 -#define NT_ADD_STRDUP 4 -struct name_tree { - const char *name; - int tag; - int add; - struct name_tree *l, *r; -}; - -struct name_tree *name_tree_operation(struct name_tree **t, const char *name, int tag, int add); -void name_tree_delete(struct name_tree **t); - -// symbol table - -void f_decl(Sp, char *, pnode *); -pnode *f_ref(Sp, char *); -void f_check(Sp); -pnode *f_call(Sp, const char *); - -void f_decl_native(Sp, int, char *, pnode *(*)(Sp, pnode **), int, ...); - -// parse-time variable stack - -void vs_decl(Sp, const char *, int tag); -int vs_lookup(Sp, const char *, pnode **, int); -char *vs_getname(pnode *); - -void vs_enter_func(Sp); -int vs_leave_func(Sp, int *); // returns number of stack allocations & - // (through param) number of lstack allocs -void vs_enter_block(Sp); -void vs_leave_block(Sp); - -int vs_get_topmax(Sp); // returns number of stack allocation for top level - -// runtime variable stack - -void rv_set(Sp, pnode *, pnode *); -pnode *rv_get(Sp, pnode *); -void rv_alloc(Sp, int); -void rv_setpar(Sp, int, pnode *); -int rv_enter(Sp, int); -void rv_leave(Sp, int); - -int rl_enter(Sp, int); -void rl_leave(Sp, int); - -void rv_leave_native(Sp); -pnode **rv_getpar_native(Sp); - -// all-in-one variable setter - -void rv_set_by_name(Sp, const char *, pnode *); - -// growable binary buffer - -typedef struct grow_buf { - int n, size; - unsigned i[1]; -} grow_buf; - -grow_buf *gb_alloc(int); -grow_buf *gb_append(grow_buf *, int, unsigned *); -grow_buf *gb_add(grow_buf *, unsigned); -grow_buf *gb_reg(grow_buf *, unsigned, unsigned); - -// clause contents - -struct clause_info { - unsigned base; - grow_buf *data; - int type; -}; - -void start_clause(Sp, int); -void cb_emit(Sp, unsigned *, int); -int cb_ptr(Sp); -void cb_patch(Sp, int, int, unsigned); - -int remap_clauses(Sp); - -struct sp3_shader *gen_output(Sp); -void convert_relocs(Sp); -void perform_relocs(Sp); - -pnode *shader_clos(Sp, pnode *); // call this to get a binary shader from closure -pnode *shader_name(Sp, const char *); // call this to get a binary shader from name - -void set_const(Sp, int idx, unsigned val); -int find_const(Sp, unsigned val); - -void set_kbuf(Sp, int kbuf, int idx, unsigned val); - -const char *asic_name(Sp); -int asic_id(Sp); -int asic_capbyname(int, const char *); -int asic_capbyid(int, int); - -// register stream packer -int sp3_guess_shader_type(struct sp3_state *S, struct sp3_shader *sh); -int sp3_si_guess_shader_type(struct sp3_shader *sh); -int sp3_ci_guess_shader_type(struct sp3_shader *sh); -int sp3_gfx8_guess_shader_type(struct sp3_shader *sh); -void sp3_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_si_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_ci_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void sp3_gfx8_pack_reg_stream(Sp, int type, struct sp3_shader *sh); -void unpack_reg_stream(Sp, struct sp3_shader *sh); - -// instances - -int new_instance(Sp, pnode *, int); -void eval_instances(Sp); -int get_instance_clause(Sp, int); -int get_instance_type(Sp, int); - -// error reporting - -void et_parse_mode(Sp, int); -int et_get_id(Sp); -#ifdef _MSC_VER -__declspec(noreturn) -#endif -void et_error(Sp, char *, char *, ...) -#ifdef __GNUC__ -__attribute__ ((__noreturn__)) -__attribute__ ((format(printf, 3, 4))) -#endif -; -void et_warning(Sp, char *, char *, ...) -#ifdef __GNUC__ -__attribute__ ((format(printf, 3, 4))) -#endif -; -void et_blame(Sp, pnode *); -void et_blame_et(Sp, int); -void et_print(Sp, pnode *); -int et_get_blame(Sp); - -// text buffer for disasm -void bprintf(Sp, char *, ...) -#ifdef __GNUC__ -__attribute__ ((format(printf, 2, 3))) -#endif -; -void bcmt(Sp, const char *cmt, const char *start, const char *line, const char *end); -void btab(Sp, int); -char *bget(Sp); - -// state structure -struct sp3_state { - // flex - void *scanner; - void *yystate; - - char *yyfile; - int yyline; - - // sp3-gc - struct sp3_gc_state *gc; - - // asic private - struct sp3_asic_state *ap; - - // sp3-eval - int retflag; - pnode *retval; - - // sp3-int - struct sp3_shader config; - - int clause_id; // counts up during evaluation - int clause_type; - struct clause_info *clauses; - int nclauses, sclauses; - - int memsize, ctsizes[4]; - int in_shader; - - char *disasm_text; - int disasm_column; - int disasm_len, disasm_maxlen; - - sp3_vma *comment_map; - void *comment_ctx; - sp3_comment_cb comment_top, comment_right; - - unsigned const_buf[1024]; - int const_vld[1024], const_vld_range; - - unsigned *kval[16]; - int knum[16]; - - struct et_record { - const char *file; - int line; - } *et_names; - int et_node; - int et_parsing; - int net_names, set_names; - - char *fname_last; - struct name_tree *fnames; - struct fsym { - char *name; - pnode *func; - struct fref *refs; - struct fsym *l, *r; - } *fsymbols; - int func_id; // counts up during parsing - - struct instance { - int type; - int clause_id; - pnode *call; - } *instances; - int ninstances, sinstances; - - struct vstack { - char *name; - int tag; - int vs_sp, vs_level; - struct vstack *next; - } *var_stack, *lbl_stack; - int vs_max, vs_sp, vs_top, vs_topmax; - int ls_max, ls_sp; - - pnode **rl_stack; - int rl_sp, rl_ss, rl_base, rl_id, rl_size; - - pnode **rv_stack; - int rv_sp, rv_ss, rv_base, rv_size; - - int werror, wcount; - const char *err_hdr; - - unsigned entry_point_table_size; - unsigned entry_point_table_alloc_size; - sp3_vmaddr *entry_point_table; -}; -struct sp3_state *sp3_new_state(void); -void sp3_asic_attach_state(Sp); -void sp3_new_parser(Sp); -void sp3_free_parser(Sp); -void sp3_free_state(Sp); - -void reg_natives(Sp); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-type.h b/runtime/hsa-runtime/utils/sp3/sp3-type.h deleted file mode 100644 index 160dc945ed..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-type.h +++ /dev/null @@ -1,137 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_TYPE_H -#define SP3_TYPE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/// @file sp3-type.h -/// @brief sp3 types - -enum sp3_shtype { - SP3_SHTYPE_NONE = -1, - SP3_SHTYPE_PS = 0, - SP3_SHTYPE_VS = 1, - SP3_SHTYPE_GS = 2, - SP3_SHTYPE_ES = 3, - SP3_SHTYPE_HS = 4, - SP3_SHTYPE_LS = 5, - SP3_SHTYPE_CS = 6, -}; - -enum sp3_count { - SP3_NUM_MRT = 8, - SP3_NUM_STRM = 4, -}; - -enum sp3_flag { - SP3DIS_NO_STATE = 0x01, - SP3DIS_NO_BINARY = 0x02, - SP3DIS_COMMENTS = 0x04, - SP3DIS_NO_GPR_COUNT = 0x08, - SP3DIS_FORCEVALID = 0x10, - SP3DIS_NO_ASIC = 0x20, -}; - -/// @brief Shader context. Contains no user-visible fields. -struct sp3_context; - -/// @brief Storage entry for register streams. -struct sp3_reg { - unsigned index; ///< One of the mm* values from chip_enum.h. - unsigned value; -}; - -/// @brief Wrapped shader metadata. -/// -/// After generation, shaders are encapsulated in sp3_shader structures. -/// -/// Those structures contain the shader binary, its register stream, -/// constants and constant buffers and metadata needed for SC compatibility. -struct sp3_shader { - int type; ///< One of the SHTYPE_* constants. - int asic_int; ///< Internal ASIC index. Do not use. - const char *asic; ///< ASIC name as a string ("RV870" etc). - unsigned size; ///< Size of the compiled shader, in 32-bit words. - unsigned nsgprs; ///< Number of scalar GPRs used. - unsigned nvgprs; ///< Number of vector GPRs used. - unsigned trap_present; - unsigned user_sgpr_count; - unsigned scratch_en; - unsigned dispatch_draw_en; - unsigned so_en; - unsigned so_base0_en; - unsigned so_base1_en; - unsigned so_base2_en; - unsigned so_base3_en; - unsigned oc_lds_en; - unsigned tg_size_en; - unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group - unsigned tgid_x_en; - unsigned tgid_y_en; - unsigned tgid_z_en; - unsigned wave_cnt_en; - unsigned sgpr_scratch; - unsigned sgpr_psvs_state; - unsigned sgpr_so_write_index; - unsigned sgpr_so_base_offset0; - unsigned sgpr_so_base_offset1; - unsigned sgpr_so_base_offset2; - unsigned sgpr_so_base_offset3; - unsigned sgpr_offchip_lds; - unsigned sgpr_is_offchip; - unsigned sgpr_ring_offset; - unsigned sgpr_gs_wave_id; - unsigned sgpr_global_wave_id; - unsigned sgpr_tg_size; - unsigned sgpr_tgid_x; - unsigned sgpr_tgid_y; - unsigned sgpr_tgid_z; - unsigned sgpr_tf_base; - unsigned sgpr_wave_cnt; - unsigned pc_exports; ///< Range of parameters exported (if VS). - unsigned pos_export; ///< Shader executes a position export (if VS). - unsigned cb_exports; ///< Range of MRTs exported (if PS). - unsigned mrtz_export_format; ///< Export format of the mrtz export. - unsigned z_export; ///< Shader executes a Z export (if PS). - unsigned pops_en; ///< Shader is POPS (PS) - unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS). - unsigned stencil_test_export; ///< Shader exports stencil (if PS). - unsigned stencil_op_export; ///< Shader exports stencil (if PS). - unsigned kill_used; ///< Shader executes ALU KILL operations. - unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS). - unsigned emit_used; ///< EMIT opcodes used (if GS). - unsigned covmask_export; ///< Shader exports coverage mask (if PS). - unsigned mask_export; ///< Shader exports mask (if PS). - unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map). - unsigned scratch_used; ///< Scratch SMX exports used. - unsigned scratch_itemsize; ///< Scratch ring item size. - unsigned reduction_used; ///< Reduction SMX exports used. - unsigned ring_used; ///< ESGS/GSVS ring SMX exports used. - unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively). - unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS). - unsigned mem_used; ///< Raw memory SMX exports used. - unsigned rats_used; ///< Mask of RATs (UAVs) used - unsigned group_size[3]; ///< Wavefront group size (for ELF files). - unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS) - unsigned *data; ///< Shader binary data. - unsigned nregs; ///< Number of register writes in the stream. - struct sp3_reg *regs; ///< Register writes (index-value pairs). -}; - -/// @brief Comment callback. -typedef const char *(*sp3_comment_cb)(void *, int); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3-vm.h b/runtime/hsa-runtime/utils/sp3/sp3-vm.h deleted file mode 100644 index 15c1baeb3c..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3-vm.h +++ /dev/null @@ -1,119 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_VM_H -#define SP3_VM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined (WIN_OS) && !defined(SP3_STATIC_LIB) - #if defined(DLL_EXPORT_SP3) - #define SP3_EXPORT __declspec(dllexport) - #else - #define SP3_EXPORT __declspec(dllimport) - #endif -#else - #define SP3_EXPORT -#endif - -#ifdef _MSC_VER -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; - -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#else -#include -#endif - -struct sp3_vma; - -/// @file sp3-vm.h -/// @brief sp3 VM API -/// -/// The VM API is used to manage virtual memory maps. Those maps are -/// used for binary storage for disassembly, as they can naturally -/// mirror the GPU's memory map (so no register translation is needed). - -#define SP3_VM_PAGESIZE 64 - -/// @brief VM addresses are 64-bit and the address unit is 32 bits -/// -typedef uint64_t sp3_vmaddr; - -/// @brief Callback function that will fill a VMA on demand -/// -/// The VMA to be filled will be specified through the request address. -/// The callback should fill the VMA using sp3_vm_write calls. -typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx); - -/// @brief VM area -/// -/// VMAs are kept in a sorted list -typedef struct sp3_vma { - sp3_vmaddr base, len; - sp3_vmfill fill; - void *fill_ctx; - uint32_t *data; - struct sp3_vma *prev, *next; -} sp3_vma; - -/// @brief Create a new VM that is empty. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new(void); - -/// @brief Create a new VM that has a sp3_vmfill callback. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx); - -/// @brief Create a new VM from an array of words. -/// @param base VM address to load array at. -/// @param len Number of 32-bit words in the array. -/// @param data Pointer to the array. -/// -SP3_EXPORT -sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data); - -/// @brief Find a VMA, optionally adding it. -/// @param vm VM to search in. -/// @param addr Address to search for. -/// @param add Flag indicating whether a failure should result in adding a new VMA. -/// -SP3_EXPORT -sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add); - -/// @brief Write a word to a VM. -/// -SP3_EXPORT -void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val); - -/// @brief Read a word from a VM. -/// -SP3_EXPORT -uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr); - -/// @brief Probe VM for presence. -/// @return 1 if the specified address is backed in the VM, 0 otherwise. -/// -SP3_EXPORT -int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr); - -/// @brief Free a VM and all its storage. -/// -SP3_EXPORT -void sp3_vm_free(sp3_vma *vm); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/runtime/hsa-runtime/utils/sp3/sp3.h b/runtime/hsa-runtime/utils/sp3/sp3.h deleted file mode 100644 index 7ecc8e67a4..0000000000 --- a/runtime/hsa-runtime/utils/sp3/sp3.h +++ /dev/null @@ -1,198 +0,0 @@ -//===================================================================== -// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. -// -/// \author AMD Developer Tools Team -/// \file -/// -//===================================================================== - -#ifndef SP3_H -#define SP3_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "sp3-vm.h" -#include "sp3-type.h" - -/// @file sp3.h -/// @brief sp3 API - -/// @brief Get version of the sp3 library. -/// -/// @return String containing the version number. -/// -SP3_EXPORT const char *sp3_version(void); - -/// @brief Create a new sp3 context. -/// -SP3_EXPORT struct sp3_context *sp3_new(void); - -/// @brief Set option for sp3. -/// -/// @param state sp3 context. -/// @param option Option name. Unknown options will raise an error. -/// @param value Option value. NULL is used to represent value-less options. -/// -SP3_EXPORT void sp3_set_option(struct sp3_context *state, const char *option, const char *value); - -/// @brief Parse a file into a context. -/// -/// If 'file' is NULL, parse stdin. -/// -SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file); - -/// @brief Parse a string into a context. -/// -SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string); - -/// @brief Parse a file from the standard library into a context. -/// -SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name); - -/// @brief Call a sp3 function. -/// -SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func); - -/// @brief Call a sp3 CF clause. -/// -/// @param state sp3 context. -/// @param cffunc Name of clause to call. By convention, this is "main". -/// -/// @return A compiled and linked shader. Free memory with sp3_free(). -/// -SP3_EXPORT struct sp3_shader *sp3_compile(struct sp3_context *state, const char *cffunc); - -/// @brief Free a sp3_shader. -/// -SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh); - -/// @brief Get current ASIC name set for a context. -/// -SP3_EXPORT const char *sp3_getasic(struct sp3_context *state); - -/// @brief Set current ASIC name for a context. -/// -SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip); - -/// @brief Set global variable in context to an integer. -/// -SP3_EXPORT void sp3_set_param_int(struct sp3_context *state, const char *name, int value); - -/// @brief Set global variable in context to an integer vector. -/// -SP3_EXPORT void sp3_set_param_intvec(struct sp3_context *state, const char *name, int size, const int *value); - -/// @brief Set global variable in context to a float. -/// -SP3_EXPORT void sp3_set_param_float(struct sp3_context *state, const char *name, float value); - -/// @brief Set global variable in context to a float vector. -/// -SP3_EXPORT void sp3_set_param_floatvec(struct sp3_context *state, const char *name, int size, const float *value); - -/// @brief Set error message header. -/// -SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str); - -/// @brief Get ASIC metrics for the ASIC in current state. -/// -/// Used by ELF tools to fill in some CAL fields. -/// -SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name); - -/// @brief Free a context allocated by sp3_new/open/parse. -/// -SP3_EXPORT void sp3_close(struct sp3_context *state); - -/// @brief Disassemble a shader. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param bin Memory map with the opcodes (see sp3-vm.h). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param name Same to give the disassembled shader. -/// @param shader_type One of the SHTYPE_* constants. -/// @param include Literal text to include in the CF clause (NULL includes nothing). -/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set. -/// @param flags A mask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm(struct sp3_context *state, sp3_vma *bin, sp3_vmaddr base, const char *name, int shader_type, const char *include, unsigned max_len, unsigned flags); - -/// @brief Disassemble a single shader instruction. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// @param shader_type One of the SHTYPE_* constants. -/// @param flags A mask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm_inst(struct sp3_context *state, const unsigned inst[2], sp3_vmaddr base, sp3_vmaddr addr, int shader_type, unsigned flags); - -/// @brief Parse a register stream. -/// -/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants. -/// -/// This call is likely to merge with sp3_disasm later on. -/// -/// @param state sp3 context to fill with state. -/// @param nregs Number of register entries. -/// @param regs Register stream to parse. -/// @param shader_type One of the SHTYPE_* constants. -/// -SP3_EXPORT void sp3_setregs(struct sp3_context *state, unsigned nregs, const struct sp3_reg *regs, int shader_type); - - -/// @brief Set shader comments -/// -/// @param state sp3 context. -/// @param map Map of comments (0 for no comment, other values will be passed to the callback). -/// @param f_top Callback returning comment to place above the opcode. -/// @param f_right Callback returning comment to place to the right of the opcode. -/// @param ctx Void pointer to pass to comment callbacks. -/// -SP3_EXPORT void sp3_setcomments(struct sp3_context *state, sp3_vma *map, sp3_comment_cb f_top, sp3_comment_cb f_right, void *ctx); - -/// @brief Set alternate shader entry points -/// -/// Used for disassembly; this marks an additional location in memory -/// (besides the start address) where shader code may be found. Generally -/// required for jump tables and any case where the shader may perform -/// indirect jumps to ensure that disassembly locates all shader -/// instructions. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// -SP3_EXPORT void sp3_setentrypoint(struct sp3_context *state, sp3_vmaddr addr); - -/// @brief Clear alternate shader entry points -/// -/// Clear all entry points previously set with sp3_setentrypoint. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// -SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state); - -/// @brief Free memory allocated by sp3. -/// -/// Windows DLLs that allocate memory have to free it. This function -/// should be used to free the result of sp3_disasm, sp3_compile etc. -/// -SP3_EXPORT void sp3_free(void *ptr); - -#ifdef __cplusplus -} -#endif - - -#endif