Replace SP3 dynamic assembly with pre-assembled binaries

This is the first part of transitioning to the LLVM-based assembler.
SP3 is deprecated and all references to the library are removed.
Pending LLVM support, relevant shaders have been precompiled.

Change-Id: I7d44cef5ded1836c4a74b77881af5bea8803d2c1
This commit is contained in:
Jay Cornwall
2016-07-16 16:38:32 -05:00
parent aba3046bb6
commit 712ea75377
10 changed files with 208 additions and 1271 deletions
-12
View File
@@ -60,18 +60,6 @@ For example, from the top level ROCR repository execute:
The name of the core hsa runtime is libhsa-runtime64.so.1.
#### External requirements
The core runtime requires the sp3.a library to be able to compiler
on x86_64 architechtures. The binaries for the sp3.a librariy can
be found on the amd-codexl-analyzer GitHub repository:
https://github.com/GPUOpen-Tools/amd-codexl-analyzer
The x86_64 library and associated header files have been added to
this code base for convenience, but are still subject to the
AMD copyright license.
#### Specs
http://www.hsafoundation.com/standards/
-15
View File
@@ -66,18 +66,6 @@ if ( NOT EXISTS ${HSATHK_BUILD_LIB_PATH}/libhsakmt.so.1 )
MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_LIB_PATH is not set to point to the location where KFD Thunk library libhsakmt.so.1 could be found." )
endif ()
if ( EXISTS ${LIBSP3_BUILD_INC_PATH}/sp3.h )
set ( LIBSP3_BUILD_INC_PATH ${LIBSP3_BUILD_INC_PATH} )
else ()
set ( LIBSP3_BUILD_INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
endif ()
if ( EXISTS ${LIBSP3_BUILD_LIB_PATH}/libsp3.a )
set ( LIBSP3_BUILD_LIB_PATH ${LIBSP3_BUILD_LIB_PATH} )
else ()
set ( LIBSP3_BUILD_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
endif ()
MESSAGE ( ------IS64BIT: ${IS64BIT} )
MESSAGE ( ------Compiler: ${CMAKE_CXX_COMPILER} )
MESSAGE ( ------Version: ${CMAKE_CXX_COMPILER_VERSION} )
@@ -153,11 +141,9 @@ include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/../inc )
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/inc )
include_directories ( ${HSATHK_BUILD_INC_PATH} )
include_directories ( ${LIBSP3_BUILD_INC_PATH} )
## Library path(s).
link_directories ( ${HSATHK_BUILD_LIB_PATH} )
link_directories ( ${LIBSP3_BUILD_LIB_PATH} )
add_library ( ${CORE_RUNTIME_TARGET} SHARED ${CORE_SRCS} )
@@ -172,7 +158,6 @@ target_link_libraries ( ${CORE_RUNTIME_TARGET}
PRIVATE amdhsaloader
PRIVATE amdhsacode
PRIVATE hsakmt
PRIVATE sp3
dl pthread rt
)
@@ -0,0 +1,169 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
#define HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
namespace amd {
static const unsigned int kCodeCopyAligned7[] = {
0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114,
0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900,
0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05,
0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103,
0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105,
0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02,
0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501,
0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000,
0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901,
0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05,
0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000,
0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03,
0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105,
0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002,
0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
};
static const unsigned int kCodeCopyMisaligned7[] = {
0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602,
0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103,
0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A,
0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102,
0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102,
0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102,
0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102,
0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04,
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04,
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04,
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04,
0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02,
0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500,
0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E,
0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103,
0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05,
0x01A90105, 0xBF82FFEE, 0xBF810000,
};
static const unsigned int kCodeFill7[] = {
0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A,
0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205,
0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02,
0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02,
0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008,
0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
0x01A90103, 0xBF82FFF5, 0xBF810000,
};
static const unsigned int kCodeTrapHandler8[] = {
0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073,
0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000,
0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008,
0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9,
0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000,
0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70,
};
static const unsigned int kCodeCopyAligned8[] = {
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900,
0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05,
0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000,
0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103,
0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105,
0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02,
0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501,
0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000,
0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901,
0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05,
0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000,
0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03,
0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105,
0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002,
0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
};
static const unsigned int kCodeCopyMisaligned8[] = {
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205,
0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04,
0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032,
0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05,
0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05,
0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05,
0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05,
0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03,
0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105,
0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002,
0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000,
0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE,
0xBF810000,
};
static const unsigned int kCodeFill8[] = {
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602,
0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B,
0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103,
0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02,
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082,
0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A,
0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02,
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
};
} // namespace amd
#endif // header guard
@@ -46,19 +46,19 @@
#include <atomic>
#include <cstring>
#include <climits>
#include <map>
#include <string>
#include <vector>
#include "core/inc/amd_aql_queue.h"
#include "core/inc/amd_blit_kernel.h"
#include "core/inc/amd_blit_sdma.h"
#include "core/inc/amd_gpu_shaders.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/isa.h"
#include "core/inc/runtime.h"
#include "utils/sp3/sp3.h"
#include "hsa_ext_image.h"
// Size of scratch (private) segment pre-allocated per thread, in bytes.
@@ -182,49 +182,58 @@ GpuAgent::~GpuAgent() {
void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
AssembleTarget assemble_target, void*& code_buf,
size_t& code_buf_size) const {
#ifdef __linux__ // No VS builds of libsp3 available right now
std::string src_sp3_unified(src_sp3);
// Select precompiled shader implementation from name/target.
struct ASICShader {
const void* code;
size_t size;
int num_sgprs;
int num_vgprs;
};
if (isa_->GetMajorVersion() == 7) {
// On Gfx7 replace v_add_u32 with legacy equivalent v_add_i32.
std::string add_inst_gfx8("v_add_u32"), add_inst_gfx7("v_add_i32");
struct CompiledShader {
ASICShader compute_7;
ASICShader compute_8;
};
for (size_t instIdx = 0; (instIdx = src_sp3_unified.find(
add_inst_gfx8, instIdx)) != std::string::npos;
instIdx += add_inst_gfx8.size()) {
src_sp3_unified.replace(instIdx, add_inst_gfx7.size(), add_inst_gfx7);
}
}
std::map<std::string, CompiledShader> compiled_shaders = {
{"TrapHandler",
{{NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}}},
{"CopyAligned",
{{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}}},
{"CopyMisaligned",
{{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}}},
{"Fill",
{{kCodeFill7, sizeof(kCodeFill7), 19, 8},
{kCodeFill8, sizeof(kCodeFill8), 19, 8}}}};
// Assemble source string with libsp3.
sp3_context* sp3 = sp3_new();
auto compiled_shader_it = compiled_shaders.find(func_name);
assert(compiled_shader_it != compiled_shaders.end() &&
"Precompiled shader unavailable");
ASICShader* asic_shader = NULL;
switch (isa_->GetMajorVersion()) {
case 7:
sp3_setasic(sp3, "CI");
sp3_set_param_int(sp3, "kGFXIPVersion", 7);
asic_shader = &compiled_shader_it->second.compute_7;
break;
case 8:
sp3_setasic(sp3, "VI");
sp3_set_param_int(sp3, "kGFXIPVersion", 8);
asic_shader = &compiled_shader_it->second.compute_8;
break;
default:
assert(false && "SP3 assembly not supported on this agent");
assert(false && "Precompiled shader unavailable for target");
}
sp3_parse_string(sp3, src_sp3_unified.c_str());
sp3_shader* code_sp3_meta = sp3_compile(sp3, func_name);
// Allocate a GPU-visible buffer for the shader.
HsaMemFlags code_buf_flags = {0};
code_buf_flags.ui32.HostAccess = 1;
code_buf_flags.ui32.ExecuteAccess = 1;
code_buf_flags.ui32.NoSubstitute = 1;
size_t code_size = code_sp3_meta->size * sizeof(uint32_t);
size_t header_size =
(assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0);
code_buf_size = AlignUp(header_size + code_size, 0x1000);
code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000);
HSAKMT_STATUS err =
hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf);
@@ -239,8 +248,8 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
if (assemble_target == AssembleTarget::AQL) {
amd_kernel_code_t* header = reinterpret_cast<amd_kernel_code_t*>(code_buf);
int gran_sgprs = std::max(0, (int(code_sp3_meta->nsgprs) - 1) / 8);
int gran_vgprs = std::max(0, (int(code_sp3_meta->nvgprs) - 1) / 4);
int gran_sgprs = std::max(0, (int(asic_shader->num_sgprs) - 1) / 8);
int gran_vgprs = std::max(0, (int(asic_shader->num_vgprs) - 1) / 4);
header->kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
AMD_HSA_BITS_SET(header->kernel_code_properties,
@@ -262,14 +271,9 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 1);
}
// Copy trap handler code into the GPU-visible buffer.
memcpy((void*)(uintptr_t(code_buf) + header_size), code_sp3_meta->data,
code_size);
// Release SP3 resources.
sp3_free_shader(code_sp3_meta);
sp3_close(sp3);
#endif
// Copy shader code into the GPU-visible buffer.
memcpy((void*)(uintptr_t(code_buf) + header_size), asic_shader->code,
asic_shader->size);
}
void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const {
@@ -988,7 +992,6 @@ void GpuAgent::SyncClocks() {
}
void GpuAgent::BindTrapHandler() {
#ifdef __linux__ // No raw string literal support in VS builds right now
const char* src_sp3 = R"(
var s_trap_info_lo = ttmp0
var s_trap_info_hi = ttmp1
@@ -1068,7 +1071,6 @@ void GpuAgent::BindTrapHandler() {
HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_,
trap_code_buf_size_, NULL, 0);
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtSetTrapHandler() failed");
#endif
}
} // namespace
-19
View File
@@ -1,19 +0,0 @@
Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-181
View File
@@ -1,181 +0,0 @@
//=====================================================================
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
//
/// \author AMD Developer Tools Team
/// \file
///
//=====================================================================
#ifndef SP3_ASIC_H
#define SP3_ASIC_H
#include "sp3-int.h"
#include "sp3-vm.h"
#ifdef __cplusplus
extern "C" {
#endif
// ASIC types
enum asic_backend {
ASIC_BACKEND_SI,
ASIC_BACKEND_CI,
ASIC_BACKEND_GFX8,
ASIC_BACKEND_GFX81,
ASIC_MAX_BACKEND, // Must be the last entry
};
enum asic_cap_id {
ASIC_THREAD_SIZE = 1,
ASIC_FED_INSTRUCTIONS = 2,
ASIC_LEGACY_LOG = 3,
ASIC_LARGE_DS_READ = 4,
ASIC_32BANK_LDS = 5,
};
struct asic_info {
const char *name;
enum asic_backend backend; // which backend to use
int asic_thread_size; // number of threads in a wave
int asic_fed_instructions; // FED instructions are available
int asic_legacy_log; // Legacy EXP and LOG opcodes are available
int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available
int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available
};
struct sp3_asic_state {
struct sp3_asic_aluop {
int pos; // original position in code
int op, na, nc; // na = number of args, nc = number of consts in args
int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset
unsigned dst;
unsigned arg[3];
unsigned lit[3]; // float literals are no longer float at this point
unsigned flags;
int scalar;
} bundle [5];
unsigned lds_lit[2], lds_mask[2];
int nbundle;
int reorder;
int last_reorder, last_po[5];
int nscalar; // number of nominally-scalar opcodes in bundle
int barrier_after; // require barrier after this clause
// sp3-r6xx
int asic;
struct da_reloc {
unsigned addr, ref;
struct da_reloc *next;
} *da_relocs;
struct cf_reloc **instrels;
struct cf_reloc *labels;
int sinstrels;
int slabels;
char unk_name[16];
};
#define A S->ap
extern struct asic_info asics[];
#define ASICNAME asics[A->asic].name
#define ASIC asics[A->asic]
void set_asic(Sp, int asic);
int find_asic(const char *name);
// opcode tables
void sp3_unbuild_tables(void);
void sp3_si_unbuild_tables(void);
void sp3_ci_unbuild_tables(void);
void sp3_gfx8_unbuild_tables(void);
void sp3_build_tables(void);
void sp3_si_build_tables(void);
void sp3_ci_build_tables(void);
void sp3_gfx8_build_tables(void);
// helper functions
#define FMT_FMT 0x00000000
#define FMT_COMP 0x00010000
#define FMT_ENDIAN 0x00020000
#define FMT_NUM 0x00030000
#define FMT_SRF 0x00040000
#define FMT_MASK 0xFFFF0000
#define FMT_IMASK 0x0000FFFF
void mark_sgpr(Sp, unsigned);
void mark_vgpr(Sp, unsigned);
void mark_global(Sp, unsigned);
void mark_ctemp(Sp, unsigned);
int is_mod_bool(Sp, pnode *, const char *);
int get_mod_bool(Sp, pnode *, const char *);
int get_mod_int(Sp, pnode *, int, int);
int get_mod_int32(Sp, pnode *);
int par_cmask(Sp, pnode *);
unsigned reg_csel(Sp, unsigned , int);
unsigned reg_msel(Sp, unsigned *, int);
const char *spec_sel_to_name(Sp, int sel);
const char *sp3_fmt_to_name(Sp, int cls, int val);
const char *sp3_si_fmt_to_name(Sp, int cls, int val);
const char *sp3_ci_fmt_to_name(Sp, int cls, int val);
const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val);
void add_reloc_label(Sp, int li, int blame);
void add_reloc_inst(Sp, int ii, int blame);
void add_reloc_cf(Sp, int offs);
int grouping_for_group_size(Sp, int group_size);
//JENNICA - this block of name_tree will go away, replace
//with backend specific.
enum nametree_enum {
NAMETREE_OPCODES,
NAMETREE_OPCODES_0ARG,
NAMETREE_OPCODES_CALL,
NAMETREE_VTX_FMTS,
NAMETREE_SPEC_SELS,
NAMETREE_SPEC_VEC_SELS,
NAMETREE_SGPR_NAME_SELS,
NAMETREE_CONSTS,
NAMETREE_DEPRECATED,
};
struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree);
extern struct name_tree *opcodes_0arg;
extern struct name_tree *opcodes_call;
extern struct name_tree *vtx_fmts;
extern struct name_tree *spec_sels;
extern struct name_tree *spec_vec_sels;
extern struct name_tree *sgpr_name_sels;
extern struct name_tree *consts;
extern struct name_tree *deprecated;
extern struct name_tree *asic_names;
struct asic_caps{const char *name; int id;};
extern struct asic_caps asiccaps[];
extern struct name_tree *asic_caps; //JENNICA - this may need to go away.
void update_sgpr_names(Sp);
#ifdef __cplusplus
}
#endif
#endif
-553
View File
@@ -1,553 +0,0 @@
//=====================================================================
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
//
/// \author AMD Developer Tools Team
/// \file
///
//=====================================================================
#ifndef SP3_INT_H
#define SP3_INT_H
#include "sp3.h"
#ifdef _MSC_VER
#ifndef strdup
#define strdup _strdup
#endif
#ifndef stricmp
#define stricmp _stricmp
#endif
#ifndef strcasecmp
#define strcasecmp _stricmp
#endif
#pragma warning(disable:4090 4204 4245 4296 4389 4701 4702)
#endif
#ifdef __cplusplus
extern "C" {
#endif
struct sp3_state;
#define Sp struct sp3_state *S
// clause types
#define CT_NONE 0
#define CT_SHADER 1
// parse tree
#define P_NUM 0 // integer
#define P_FLT 1 // float
#define P_STR 2 // string
#define P_REG 3 // register component(s)
#define P_RANGE 4 // closed range
#define P_RANGEL 5 // right-open range
#define P_SLICE 6 // array concatenation (used for slices)
#define P_RCAST 7 // integer -> register cast
#define P_LIST 8 // list (internal to the parser only)
#define P_VAR 9 // variable (with name)
#define P_VARE 10 // variable-element (result of lvalue slice)
#define P_CL 11 // clause
#define P_CLI 12 // clause instructions
#define P_WHILE 13 // while loop
#define P_REPEAT 14 // repeat-until loop
#define P_IF 15 // if or if-else
#define P_CFOR 16 // C-style for loop
#define P_FOR 17 // vector for loop
#define P_RET 18 // return from function
#define P_CSLICE 19 // componentwise slice
#define P_UREF 20 // unresolved reference
#define P_FREF 21 // resolved reference
#define P_CALL 22 // function call
#define P_PRINT 23 // print to stdout
#define P_PAR 24 // function parameters
#define P_NF 25 // native function
#define P_OMOD 27 // opcode modifier
#define P_OMODS 28 // opcode modifiers
#define P_OPARS 29 // opcode parameters
#define P_OP 30 // opcode
#define P_SWIZ0 31 // register swizzles with N components wrapped
#define P_SWIZ1 32 // -"-
#define P_SWIZ2 33 // -"-
#define P_SWIZ3 34 // -"-
#define P_SWIZ4 35 // -"-
#define P_VTXFMT 36 // vertex formats
#define P_LABEL 37 // unique identifier of a label
#define P_LINIT 38 // generate label identifiers
#define P_MARK 39 // mark a label
#define P_OPCALL 40 // opcode that does a clause instantiation on par0
#define P_ASIC 41 // ASIC model
#define P_ASICCAP 42 // ASIC capability
#define P_NCLOS 43 // create closure
#define P_CLOS 44 // closure
#define P_SH 45 // compiled shader
#define P_NOT 0x100
#define P_BNOT 0x101
#define P_NEG 0x102
#define P_MUL 0x103
#define P_DIV 0x104
#define P_MOD 0x105
#define P_ADD 0x106
#define P_SUB 0x107
#define P_SHL 0x108
#define P_SHR 0x109
#define P_SAR 0x10A
#define P_LT 0x10B
#define P_GT 0x10C
#define P_LEQ 0x10D
#define P_GEQ 0x10E
#define P_EQ 0x10F
#define P_NEQ 0x110
#define P_BAND 0x111
#define P_BOR 0x112
#define P_BXOR 0x113
#define P_AND 0x114
#define P_OR 0x115
#define P_XOR 0x116
#define P_SEL 0x117
#define P_XDEC 0x118
#define P_XINC 0x119
#define P_DECX 0x11A
#define P_INCX 0x11B
#define P_ASGN 0x11C
#define P_IND 0x11D
#define P_NOP 0x11E
#define P_VSUM 0x11F
#define P_VPROD 0x120
#define P_VBOR 0x121
#define P_VBAND 0x122
#define P_VBXOR 0x123
#define P_VOR 0x124
#define P_VAND 0x125
#define P_VXOR 0x126
#define P_VMIN 0x127
#define P_VMAX 0x128
#define P_CADD 0x129
#define P_CSUB 0x12A
#define P_CMUL 0x12B
#define P_CDIV 0x12C
#define P_CSHL 0x12D
#define P_CSHR 0x12E
#define P_CSAR 0x12F
#define P_CBAND 0x130
#define P_CBOR 0x131
#define P_CBXOR 0x132
#define P_CAND 0x133
#define P_COR 0x134
#define P_CXOR 0x135
#define P_CMIN 0x136
#define P_CMAX 0x137
#define P_MIN 0x138
#define P_MAX 0x139
#define P_PROBE 0x13A
#define P_BITS 0x13B
// register types
#define R_VGPR 0x00000
#define R_OFF 0x04000
#define R_SNAME 0x06000
#define R_INTERP 0x08000
#define R_SPEC 0x0A000
#define R_SGPR 0x0C000
#define R_EXPBUF 0x0E000
#define R_TMASK 0x1E000
// magic values for R_SPEC
#define R_P_CL 3 // used internally only (inline literal)
#define R_P_CI_L 0xDB // used internally only
#define R_P_LDX_L 0xDB // any LDS inline
#define R_P_LDS_L 0xDF // direct LDS inline
#define R_P_LDS_H 0xE0
#define R_P_LDX_H 0xE0
#define R_P_CI_S 0xF3 // end of new R8xx constants
#define R_P_CI_H 0xFC
#define R_P_NOTLAST 0xFF// notlast operand for export
// magic values for R_SNAME
#define R_S_SCRATCH 1
#define R_S_PSVS_STATE 2
#define R_S_SO_WRITE_INDEX 3
#define R_S_SO_BASE_OFFSET0 4
#define R_S_SO_BASE_OFFSET1 5
#define R_S_SO_BASE_OFFSET2 6
#define R_S_SO_BASE_OFFSET3 7
#define R_S_OFFCHIP_LDS 8
#define R_S_IS_OFFCHIP 9
#define R_S_RING_OFFSET 10
#define R_S_GS_WAVE_ID 11
#define R_S_TG_SIZE 12
#define R_S_TF_BASE 13
#define R_S_TGID_X 14
#define R_S_TGID_Y 15
#define R_S_TGID_Z 16
#define R_S_WAVE_CNT 17
#define R_S_GLOBAL_WAVE_ID 18
// register components
#define R_CMASK 0x1C00
#define R_CSHIFT 10
#define R_CX 0x0000
#define R_CY 0x0400
#define R_CZ 0x0800
#define R_CW 0x0C00
#define R_CS 0x1000 // used to identify scalar elements
#define R_CN 0x1800
#define R_IMASK 0x03FF
// source transforms
#define R_NEG 0x80000
#define R_ABS 0x100000
#define R_SEXT 0x200000
// subencodings for export targets
#define R_E_TMASK 0x0380
#define R_E_MRT 0x0000
#define R_E_Z 0x0080
#define R_E_POS 0x0100
#define R_E_PARAM 0x0180
#define R_E_ATTR 0x0280
#define R_E_NULL 0x0300
#define R_E_IMASK 0x007F
// subencodings for interp
#define R_I_TMASK 0x0380
#define R_I_P10 0x0000
#define R_I_P20 0x0080
#define R_I_P0 0x0100
// function parameters
#define F_CANY 0x00000000
#define F_CNUM 0x01000000
#define F_CREG 0x02000000
#define F_CTMP 0x03000000
#define F_CFPTR 0x04000000
#define F_CINT 0x05000000
#define F_CMASK 0x07000000
#define F_OPT 0x40000000
#define F_VEC 0x80000000
typedef struct pnode {
struct pnode *gc_next;
int gc_mark;
int type;
int et; // error reporting tag
int ni; // number of items
union pnode_item {
int num; // integer
float flt; // float
char *str; // string
struct pnode *ptr; // tree item
struct {
struct pnode *v;
int e;
} ve; // variable-element pair
struct {
int p;
char *n;
} var; // variable (stack offset, name)
struct sp3_shader *sh;
unsigned int reg; // register components
struct pnode *(* nf)(Sp, struct pnode **); // native function
} i[1];
} pnode;
pnode *p_str(Sp, char *s); // wrap a string
pnode *p_float(Sp, float f); // wrap a float
pnode *p_num(Sp, int i); // wrap an integer
pnode *p_vec(Sp, int type, int len); // create a vector
pnode *p_list(Sp, pnode *list, pnode *item); // append item to P_LIST
pnode *p_list_rev(Sp, pnode *list); // reverse the order of the list
pnode *p_tree(Sp, int type, int nitems, ...); // create a tree node
pnode *p_l2t(Sp, int type, pnode *list); // list to tree
pnode *p_l2v(Sp, int type, pnode *list); // list to vector
pnode *p_x2x(Sp, int type, pnode *p); // cast to type
pnode *p_clause(Sp, int vstk, int lstk, pnode *parlist, pnode *instlist, int type);
pnode *p_reg(Sp, int type, int idx); // wrap a register
pnode *p_swizzle(Sp, char *str); // parse a swizzle string
pnode *p_lv2rv(Sp, pnode *lval); // lvalue to rvalue
pnode *p_newlabel(Sp, pnode *t, int tag); // define new label
pnode *p_label(Sp, int cnt); // fill with label IDs
pnode *p_clone(Sp, pnode *src);
void print_node(pnode *); // print to stdout
void mark_gc_storage(Sp); // mark all internal storage of sp3 for gc
// functions provided by machine driver
int is_opcode(struct sp3_state *S, const char *name); // is an opcode (any)
int is_opcode_0arg(struct sp3_state *S, const char *name); // is an opcode (0-argument)
int is_opcode_call(struct sp3_state *S, const char *name); // is a call op (1st argument is a closure)
void sp3_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
void sp3_si_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
void sp3_ci_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
void sp3_gfx8_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
pnode *machine_const(Sp, char *name); // if a machine const, parse it (else NULL)
void mark_label(Sp, int li); // "label:"
pnode *asic_getcap(Sp, int id); // get ASIC capability #id
void mach_cleanup(Sp); // initialize generator state
// name trees
#define NT_SEARCH 0
#define NT_ADD 1
#define NT_ADD_ONLY 2
#define NT_ADD_STRDUP 4
struct name_tree {
const char *name;
int tag;
int add;
struct name_tree *l, *r;
};
struct name_tree *name_tree_operation(struct name_tree **t, const char *name, int tag, int add);
void name_tree_delete(struct name_tree **t);
// symbol table
void f_decl(Sp, char *, pnode *);
pnode *f_ref(Sp, char *);
void f_check(Sp);
pnode *f_call(Sp, const char *);
void f_decl_native(Sp, int, char *, pnode *(*)(Sp, pnode **), int, ...);
// parse-time variable stack
void vs_decl(Sp, const char *, int tag);
int vs_lookup(Sp, const char *, pnode **, int);
char *vs_getname(pnode *);
void vs_enter_func(Sp);
int vs_leave_func(Sp, int *); // returns number of stack allocations &
// (through param) number of lstack allocs
void vs_enter_block(Sp);
void vs_leave_block(Sp);
int vs_get_topmax(Sp); // returns number of stack allocation for top level
// runtime variable stack
void rv_set(Sp, pnode *, pnode *);
pnode *rv_get(Sp, pnode *);
void rv_alloc(Sp, int);
void rv_setpar(Sp, int, pnode *);
int rv_enter(Sp, int);
void rv_leave(Sp, int);
int rl_enter(Sp, int);
void rl_leave(Sp, int);
void rv_leave_native(Sp);
pnode **rv_getpar_native(Sp);
// all-in-one variable setter
void rv_set_by_name(Sp, const char *, pnode *);
// growable binary buffer
typedef struct grow_buf {
int n, size;
unsigned i[1];
} grow_buf;
grow_buf *gb_alloc(int);
grow_buf *gb_append(grow_buf *, int, unsigned *);
grow_buf *gb_add(grow_buf *, unsigned);
grow_buf *gb_reg(grow_buf *, unsigned, unsigned);
// clause contents
struct clause_info {
unsigned base;
grow_buf *data;
int type;
};
void start_clause(Sp, int);
void cb_emit(Sp, unsigned *, int);
int cb_ptr(Sp);
void cb_patch(Sp, int, int, unsigned);
int remap_clauses(Sp);
struct sp3_shader *gen_output(Sp);
void convert_relocs(Sp);
void perform_relocs(Sp);
pnode *shader_clos(Sp, pnode *); // call this to get a binary shader from closure
pnode *shader_name(Sp, const char *); // call this to get a binary shader from name
void set_const(Sp, int idx, unsigned val);
int find_const(Sp, unsigned val);
void set_kbuf(Sp, int kbuf, int idx, unsigned val);
const char *asic_name(Sp);
int asic_id(Sp);
int asic_capbyname(int, const char *);
int asic_capbyid(int, int);
// register stream packer
int sp3_guess_shader_type(struct sp3_state *S, struct sp3_shader *sh);
int sp3_si_guess_shader_type(struct sp3_shader *sh);
int sp3_ci_guess_shader_type(struct sp3_shader *sh);
int sp3_gfx8_guess_shader_type(struct sp3_shader *sh);
void sp3_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
void sp3_si_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
void sp3_ci_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
void sp3_gfx8_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
void unpack_reg_stream(Sp, struct sp3_shader *sh);
// instances
int new_instance(Sp, pnode *, int);
void eval_instances(Sp);
int get_instance_clause(Sp, int);
int get_instance_type(Sp, int);
// error reporting
void et_parse_mode(Sp, int);
int et_get_id(Sp);
#ifdef _MSC_VER
__declspec(noreturn)
#endif
void et_error(Sp, char *, char *, ...)
#ifdef __GNUC__
__attribute__ ((__noreturn__))
__attribute__ ((format(printf, 3, 4)))
#endif
;
void et_warning(Sp, char *, char *, ...)
#ifdef __GNUC__
__attribute__ ((format(printf, 3, 4)))
#endif
;
void et_blame(Sp, pnode *);
void et_blame_et(Sp, int);
void et_print(Sp, pnode *);
int et_get_blame(Sp);
// text buffer for disasm
void bprintf(Sp, char *, ...)
#ifdef __GNUC__
__attribute__ ((format(printf, 2, 3)))
#endif
;
void bcmt(Sp, const char *cmt, const char *start, const char *line, const char *end);
void btab(Sp, int);
char *bget(Sp);
// state structure
struct sp3_state {
// flex
void *scanner;
void *yystate;
char *yyfile;
int yyline;
// sp3-gc
struct sp3_gc_state *gc;
// asic private
struct sp3_asic_state *ap;
// sp3-eval
int retflag;
pnode *retval;
// sp3-int
struct sp3_shader config;
int clause_id; // counts up during evaluation
int clause_type;
struct clause_info *clauses;
int nclauses, sclauses;
int memsize, ctsizes[4];
int in_shader;
char *disasm_text;
int disasm_column;
int disasm_len, disasm_maxlen;
sp3_vma *comment_map;
void *comment_ctx;
sp3_comment_cb comment_top, comment_right;
unsigned const_buf[1024];
int const_vld[1024], const_vld_range;
unsigned *kval[16];
int knum[16];
struct et_record {
const char *file;
int line;
} *et_names;
int et_node;
int et_parsing;
int net_names, set_names;
char *fname_last;
struct name_tree *fnames;
struct fsym {
char *name;
pnode *func;
struct fref *refs;
struct fsym *l, *r;
} *fsymbols;
int func_id; // counts up during parsing
struct instance {
int type;
int clause_id;
pnode *call;
} *instances;
int ninstances, sinstances;
struct vstack {
char *name;
int tag;
int vs_sp, vs_level;
struct vstack *next;
} *var_stack, *lbl_stack;
int vs_max, vs_sp, vs_top, vs_topmax;
int ls_max, ls_sp;
pnode **rl_stack;
int rl_sp, rl_ss, rl_base, rl_id, rl_size;
pnode **rv_stack;
int rv_sp, rv_ss, rv_base, rv_size;
int werror, wcount;
const char *err_hdr;
unsigned entry_point_table_size;
unsigned entry_point_table_alloc_size;
sp3_vmaddr *entry_point_table;
};
struct sp3_state *sp3_new_state(void);
void sp3_asic_attach_state(Sp);
void sp3_new_parser(Sp);
void sp3_free_parser(Sp);
void sp3_free_state(Sp);
void reg_natives(Sp);
#ifdef __cplusplus
}
#endif
#endif
-137
View File
@@ -1,137 +0,0 @@
//=====================================================================
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
//
/// \author AMD Developer Tools Team
/// \file
///
//=====================================================================
#ifndef SP3_TYPE_H
#define SP3_TYPE_H
#ifdef __cplusplus
extern "C" {
#endif
/// @file sp3-type.h
/// @brief sp3 types
enum sp3_shtype {
SP3_SHTYPE_NONE = -1,
SP3_SHTYPE_PS = 0,
SP3_SHTYPE_VS = 1,
SP3_SHTYPE_GS = 2,
SP3_SHTYPE_ES = 3,
SP3_SHTYPE_HS = 4,
SP3_SHTYPE_LS = 5,
SP3_SHTYPE_CS = 6,
};
enum sp3_count {
SP3_NUM_MRT = 8,
SP3_NUM_STRM = 4,
};
enum sp3_flag {
SP3DIS_NO_STATE = 0x01,
SP3DIS_NO_BINARY = 0x02,
SP3DIS_COMMENTS = 0x04,
SP3DIS_NO_GPR_COUNT = 0x08,
SP3DIS_FORCEVALID = 0x10,
SP3DIS_NO_ASIC = 0x20,
};
/// @brief Shader context. Contains no user-visible fields.
struct sp3_context;
/// @brief Storage entry for register streams.
struct sp3_reg {
unsigned index; ///< One of the mm* values from chip_enum.h.
unsigned value;
};
/// @brief Wrapped shader metadata.
///
/// After generation, shaders are encapsulated in sp3_shader structures.
///
/// Those structures contain the shader binary, its register stream,
/// constants and constant buffers and metadata needed for SC compatibility.
struct sp3_shader {
int type; ///< One of the SHTYPE_* constants.
int asic_int; ///< Internal ASIC index. Do not use.
const char *asic; ///< ASIC name as a string ("RV870" etc).
unsigned size; ///< Size of the compiled shader, in 32-bit words.
unsigned nsgprs; ///< Number of scalar GPRs used.
unsigned nvgprs; ///< Number of vector GPRs used.
unsigned trap_present;
unsigned user_sgpr_count;
unsigned scratch_en;
unsigned dispatch_draw_en;
unsigned so_en;
unsigned so_base0_en;
unsigned so_base1_en;
unsigned so_base2_en;
unsigned so_base3_en;
unsigned oc_lds_en;
unsigned tg_size_en;
unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group
unsigned tgid_x_en;
unsigned tgid_y_en;
unsigned tgid_z_en;
unsigned wave_cnt_en;
unsigned sgpr_scratch;
unsigned sgpr_psvs_state;
unsigned sgpr_so_write_index;
unsigned sgpr_so_base_offset0;
unsigned sgpr_so_base_offset1;
unsigned sgpr_so_base_offset2;
unsigned sgpr_so_base_offset3;
unsigned sgpr_offchip_lds;
unsigned sgpr_is_offchip;
unsigned sgpr_ring_offset;
unsigned sgpr_gs_wave_id;
unsigned sgpr_global_wave_id;
unsigned sgpr_tg_size;
unsigned sgpr_tgid_x;
unsigned sgpr_tgid_y;
unsigned sgpr_tgid_z;
unsigned sgpr_tf_base;
unsigned sgpr_wave_cnt;
unsigned pc_exports; ///< Range of parameters exported (if VS).
unsigned pos_export; ///< Shader executes a position export (if VS).
unsigned cb_exports; ///< Range of MRTs exported (if PS).
unsigned mrtz_export_format; ///< Export format of the mrtz export.
unsigned z_export; ///< Shader executes a Z export (if PS).
unsigned pops_en; ///< Shader is POPS (PS)
unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS).
unsigned stencil_test_export; ///< Shader exports stencil (if PS).
unsigned stencil_op_export; ///< Shader exports stencil (if PS).
unsigned kill_used; ///< Shader executes ALU KILL operations.
unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS).
unsigned emit_used; ///< EMIT opcodes used (if GS).
unsigned covmask_export; ///< Shader exports coverage mask (if PS).
unsigned mask_export; ///< Shader exports mask (if PS).
unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map).
unsigned scratch_used; ///< Scratch SMX exports used.
unsigned scratch_itemsize; ///< Scratch ring item size.
unsigned reduction_used; ///< Reduction SMX exports used.
unsigned ring_used; ///< ESGS/GSVS ring SMX exports used.
unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively).
unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS).
unsigned mem_used; ///< Raw memory SMX exports used.
unsigned rats_used; ///< Mask of RATs (UAVs) used
unsigned group_size[3]; ///< Wavefront group size (for ELF files).
unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS)
unsigned *data; ///< Shader binary data.
unsigned nregs; ///< Number of register writes in the stream.
struct sp3_reg *regs; ///< Register writes (index-value pairs).
};
/// @brief Comment callback.
typedef const char *(*sp3_comment_cb)(void *, int);
#ifdef __cplusplus
}
#endif
#endif
-119
View File
@@ -1,119 +0,0 @@
//=====================================================================
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
//
/// \author AMD Developer Tools Team
/// \file
///
//=====================================================================
#ifndef SP3_VM_H
#define SP3_VM_H
#ifdef __cplusplus
extern "C" {
#endif
#if defined (WIN_OS) && !defined(SP3_STATIC_LIB)
#if defined(DLL_EXPORT_SP3)
#define SP3_EXPORT __declspec(dllexport)
#else
#define SP3_EXPORT __declspec(dllimport)
#endif
#else
#define SP3_EXPORT
#endif
#ifdef _MSC_VER
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#include <inttypes.h>
#endif
struct sp3_vma;
/// @file sp3-vm.h
/// @brief sp3 VM API
///
/// The VM API is used to manage virtual memory maps. Those maps are
/// used for binary storage for disassembly, as they can naturally
/// mirror the GPU's memory map (so no register translation is needed).
#define SP3_VM_PAGESIZE 64
/// @brief VM addresses are 64-bit and the address unit is 32 bits
///
typedef uint64_t sp3_vmaddr;
/// @brief Callback function that will fill a VMA on demand
///
/// The VMA to be filled will be specified through the request address.
/// The callback should fill the VMA using sp3_vm_write calls.
typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx);
/// @brief VM area
///
/// VMAs are kept in a sorted list
typedef struct sp3_vma {
sp3_vmaddr base, len;
sp3_vmfill fill;
void *fill_ctx;
uint32_t *data;
struct sp3_vma *prev, *next;
} sp3_vma;
/// @brief Create a new VM that is empty.
///
SP3_EXPORT
sp3_vma *sp3_vm_new(void);
/// @brief Create a new VM that has a sp3_vmfill callback.
///
SP3_EXPORT
sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx);
/// @brief Create a new VM from an array of words.
/// @param base VM address to load array at.
/// @param len Number of 32-bit words in the array.
/// @param data Pointer to the array.
///
SP3_EXPORT
sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data);
/// @brief Find a VMA, optionally adding it.
/// @param vm VM to search in.
/// @param addr Address to search for.
/// @param add Flag indicating whether a failure should result in adding a new VMA.
///
SP3_EXPORT
sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add);
/// @brief Write a word to a VM.
///
SP3_EXPORT
void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val);
/// @brief Read a word from a VM.
///
SP3_EXPORT
uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr);
/// @brief Probe VM for presence.
/// @return 1 if the specified address is backed in the VM, 0 otherwise.
///
SP3_EXPORT
int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr);
/// @brief Free a VM and all its storage.
///
SP3_EXPORT
void sp3_vm_free(sp3_vma *vm);
#ifdef __cplusplus
}
#endif
#endif
-198
View File
@@ -1,198 +0,0 @@
//=====================================================================
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
//
/// \author AMD Developer Tools Team
/// \file
///
//=====================================================================
#ifndef SP3_H
#define SP3_H
#ifdef __cplusplus
extern "C" {
#endif
#include "sp3-vm.h"
#include "sp3-type.h"
/// @file sp3.h
/// @brief sp3 API
/// @brief Get version of the sp3 library.
///
/// @return String containing the version number.
///
SP3_EXPORT const char *sp3_version(void);
/// @brief Create a new sp3 context.
///
SP3_EXPORT struct sp3_context *sp3_new(void);
/// @brief Set option for sp3.
///
/// @param state sp3 context.
/// @param option Option name. Unknown options will raise an error.
/// @param value Option value. NULL is used to represent value-less options.
///
SP3_EXPORT void sp3_set_option(struct sp3_context *state, const char *option, const char *value);
/// @brief Parse a file into a context.
///
/// If 'file' is NULL, parse stdin.
///
SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file);
/// @brief Parse a string into a context.
///
SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string);
/// @brief Parse a file from the standard library into a context.
///
SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name);
/// @brief Call a sp3 function.
///
SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func);
/// @brief Call a sp3 CF clause.
///
/// @param state sp3 context.
/// @param cffunc Name of clause to call. By convention, this is "main".
///
/// @return A compiled and linked shader. Free memory with sp3_free().
///
SP3_EXPORT struct sp3_shader *sp3_compile(struct sp3_context *state, const char *cffunc);
/// @brief Free a sp3_shader.
///
SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh);
/// @brief Get current ASIC name set for a context.
///
SP3_EXPORT const char *sp3_getasic(struct sp3_context *state);
/// @brief Set current ASIC name for a context.
///
SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip);
/// @brief Set global variable in context to an integer.
///
SP3_EXPORT void sp3_set_param_int(struct sp3_context *state, const char *name, int value);
/// @brief Set global variable in context to an integer vector.
///
SP3_EXPORT void sp3_set_param_intvec(struct sp3_context *state, const char *name, int size, const int *value);
/// @brief Set global variable in context to a float.
///
SP3_EXPORT void sp3_set_param_float(struct sp3_context *state, const char *name, float value);
/// @brief Set global variable in context to a float vector.
///
SP3_EXPORT void sp3_set_param_floatvec(struct sp3_context *state, const char *name, int size, const float *value);
/// @brief Set error message header.
///
SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str);
/// @brief Get ASIC metrics for the ASIC in current state.
///
/// Used by ELF tools to fill in some CAL fields.
///
SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name);
/// @brief Free a context allocated by sp3_new/open/parse.
///
SP3_EXPORT void sp3_close(struct sp3_context *state);
/// @brief Disassemble a shader.
///
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
///
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
/// @param bin Memory map with the opcodes (see sp3-vm.h).
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
/// @param name Same to give the disassembled shader.
/// @param shader_type One of the SHTYPE_* constants.
/// @param include Literal text to include in the CF clause (NULL includes nothing).
/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set.
/// @param flags A mask of SP3DIS_* flags.
///
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
///
SP3_EXPORT char *sp3_disasm(struct sp3_context *state, sp3_vma *bin, sp3_vmaddr base, const char *name, int shader_type, const char *include, unsigned max_len, unsigned flags);
/// @brief Disassemble a single shader instruction.
///
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
///
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction).
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
/// @param shader_type One of the SHTYPE_* constants.
/// @param flags A mask of SP3DIS_* flags.
///
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
///
SP3_EXPORT char *sp3_disasm_inst(struct sp3_context *state, const unsigned inst[2], sp3_vmaddr base, sp3_vmaddr addr, int shader_type, unsigned flags);
/// @brief Parse a register stream.
///
/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants.
///
/// This call is likely to merge with sp3_disasm later on.
///
/// @param state sp3 context to fill with state.
/// @param nregs Number of register entries.
/// @param regs Register stream to parse.
/// @param shader_type One of the SHTYPE_* constants.
///
SP3_EXPORT void sp3_setregs(struct sp3_context *state, unsigned nregs, const struct sp3_reg *regs, int shader_type);
/// @brief Set shader comments
///
/// @param state sp3 context.
/// @param map Map of comments (0 for no comment, other values will be passed to the callback).
/// @param f_top Callback returning comment to place above the opcode.
/// @param f_right Callback returning comment to place to the right of the opcode.
/// @param ctx Void pointer to pass to comment callbacks.
///
SP3_EXPORT void sp3_setcomments(struct sp3_context *state, sp3_vma *map, sp3_comment_cb f_top, sp3_comment_cb f_right, void *ctx);
/// @brief Set alternate shader entry points
///
/// Used for disassembly; this marks an additional location in memory
/// (besides the start address) where shader code may be found. Generally
/// required for jump tables and any case where the shader may perform
/// indirect jumps to ensure that disassembly locates all shader
/// instructions.
///
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
///
SP3_EXPORT void sp3_setentrypoint(struct sp3_context *state, sp3_vmaddr addr);
/// @brief Clear alternate shader entry points
///
/// Clear all entry points previously set with sp3_setentrypoint.
///
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
///
SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state);
/// @brief Free memory allocated by sp3.
///
/// Windows DLLs that allocate memory have to free it. This function
/// should be used to free the result of sp3_disasm, sp3_compile etc.
///
SP3_EXPORT void sp3_free(void *ptr);
#ifdef __cplusplus
}
#endif
#endif