Replace SP3 dynamic assembly with pre-assembled binaries
This is the first part of transitioning to the LLVM-based assembler. SP3 is deprecated and all references to the library are removed. Pending LLVM support, relevant shaders have been precompiled. Change-Id: I7d44cef5ded1836c4a74b77881af5bea8803d2c1
This commit is contained in:
@@ -60,18 +60,6 @@ For example, from the top level ROCR repository execute:
|
||||
|
||||
The name of the core hsa runtime is libhsa-runtime64.so.1.
|
||||
|
||||
#### External requirements
|
||||
|
||||
The core runtime requires the sp3.a library to be able to compiler
|
||||
on x86_64 architechtures. The binaries for the sp3.a librariy can
|
||||
be found on the amd-codexl-analyzer GitHub repository:
|
||||
|
||||
https://github.com/GPUOpen-Tools/amd-codexl-analyzer
|
||||
|
||||
The x86_64 library and associated header files have been added to
|
||||
this code base for convenience, but are still subject to the
|
||||
AMD copyright license.
|
||||
|
||||
#### Specs
|
||||
|
||||
http://www.hsafoundation.com/standards/
|
||||
|
||||
@@ -66,18 +66,6 @@ if ( NOT EXISTS ${HSATHK_BUILD_LIB_PATH}/libhsakmt.so.1 )
|
||||
MESSAGE ( FATAL_ERROR "Environment variable HSATHK_BUILD_LIB_PATH is not set to point to the location where KFD Thunk library libhsakmt.so.1 could be found." )
|
||||
endif ()
|
||||
|
||||
if ( EXISTS ${LIBSP3_BUILD_INC_PATH}/sp3.h )
|
||||
set ( LIBSP3_BUILD_INC_PATH ${LIBSP3_BUILD_INC_PATH} )
|
||||
else ()
|
||||
set ( LIBSP3_BUILD_INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
|
||||
endif ()
|
||||
|
||||
if ( EXISTS ${LIBSP3_BUILD_LIB_PATH}/libsp3.a )
|
||||
set ( LIBSP3_BUILD_LIB_PATH ${LIBSP3_BUILD_LIB_PATH} )
|
||||
else ()
|
||||
set ( LIBSP3_BUILD_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../utils/sp3 )
|
||||
endif ()
|
||||
|
||||
MESSAGE ( ------IS64BIT: ${IS64BIT} )
|
||||
MESSAGE ( ------Compiler: ${CMAKE_CXX_COMPILER} )
|
||||
MESSAGE ( ------Version: ${CMAKE_CXX_COMPILER_VERSION} )
|
||||
@@ -153,11 +141,9 @@ include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/../inc )
|
||||
include_directories ( ${CMAKE_CURRENT_SOURCE_DIR}/inc )
|
||||
include_directories ( ${HSATHK_BUILD_INC_PATH} )
|
||||
include_directories ( ${LIBSP3_BUILD_INC_PATH} )
|
||||
|
||||
## Library path(s).
|
||||
link_directories ( ${HSATHK_BUILD_LIB_PATH} )
|
||||
link_directories ( ${LIBSP3_BUILD_LIB_PATH} )
|
||||
|
||||
add_library ( ${CORE_RUNTIME_TARGET} SHARED ${CORE_SRCS} )
|
||||
|
||||
@@ -172,7 +158,6 @@ target_link_libraries ( ${CORE_RUNTIME_TARGET}
|
||||
PRIVATE amdhsaloader
|
||||
PRIVATE amdhsacode
|
||||
PRIVATE hsakmt
|
||||
PRIVATE sp3
|
||||
dl pthread rt
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_GPU_SHADERS_H_
|
||||
|
||||
namespace amd {
|
||||
|
||||
static const unsigned int kCodeCopyAligned7[] = {
|
||||
0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114,
|
||||
0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900,
|
||||
0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05,
|
||||
0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
|
||||
0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103,
|
||||
0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105,
|
||||
0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02,
|
||||
0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501,
|
||||
0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000,
|
||||
0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
|
||||
0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
|
||||
0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901,
|
||||
0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05,
|
||||
0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000,
|
||||
0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
|
||||
0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
|
||||
0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03,
|
||||
0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105,
|
||||
0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002,
|
||||
0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyMisaligned7[] = {
|
||||
0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602,
|
||||
0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103,
|
||||
0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A,
|
||||
0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102,
|
||||
0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102,
|
||||
0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102,
|
||||
0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102,
|
||||
0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04,
|
||||
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04,
|
||||
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04,
|
||||
0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04,
|
||||
0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02,
|
||||
0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500,
|
||||
0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E,
|
||||
0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103,
|
||||
0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05,
|
||||
0x01A90105, 0xBF82FFEE, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeFill7[] = {
|
||||
0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A,
|
||||
0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205,
|
||||
0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02,
|
||||
0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
|
||||
0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02,
|
||||
0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008,
|
||||
0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
|
||||
0x01A90103, 0xBF82FFF5, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeTrapHandler8[] = {
|
||||
0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073,
|
||||
0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000,
|
||||
0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008,
|
||||
0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9,
|
||||
0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000,
|
||||
0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyAligned8[] = {
|
||||
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
|
||||
0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
|
||||
0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900,
|
||||
0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05,
|
||||
0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000,
|
||||
0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103,
|
||||
0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105,
|
||||
0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02,
|
||||
0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501,
|
||||
0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000,
|
||||
0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
|
||||
0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
|
||||
0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901,
|
||||
0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05,
|
||||
0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000,
|
||||
0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
|
||||
0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
|
||||
0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03,
|
||||
0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105,
|
||||
0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002,
|
||||
0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyMisaligned8[] = {
|
||||
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
|
||||
0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205,
|
||||
0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04,
|
||||
0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032,
|
||||
0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
|
||||
0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
|
||||
0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
|
||||
0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
|
||||
0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05,
|
||||
0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05,
|
||||
0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05,
|
||||
0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05,
|
||||
0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03,
|
||||
0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105,
|
||||
0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002,
|
||||
0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000,
|
||||
0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE,
|
||||
0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeFill8[] = {
|
||||
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602,
|
||||
0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B,
|
||||
0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103,
|
||||
0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02,
|
||||
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082,
|
||||
0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A,
|
||||
0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02,
|
||||
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard
|
||||
@@ -46,19 +46,19 @@
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/inc/amd_aql_queue.h"
|
||||
#include "core/inc/amd_blit_kernel.h"
|
||||
#include "core/inc/amd_blit_sdma.h"
|
||||
#include "core/inc/amd_gpu_shaders.h"
|
||||
#include "core/inc/amd_memory_region.h"
|
||||
#include "core/inc/interrupt_signal.h"
|
||||
#include "core/inc/isa.h"
|
||||
#include "core/inc/runtime.h"
|
||||
|
||||
#include "utils/sp3/sp3.h"
|
||||
|
||||
#include "hsa_ext_image.h"
|
||||
|
||||
// Size of scratch (private) segment pre-allocated per thread, in bytes.
|
||||
@@ -182,49 +182,58 @@ GpuAgent::~GpuAgent() {
|
||||
void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
AssembleTarget assemble_target, void*& code_buf,
|
||||
size_t& code_buf_size) const {
|
||||
#ifdef __linux__ // No VS builds of libsp3 available right now
|
||||
std::string src_sp3_unified(src_sp3);
|
||||
// Select precompiled shader implementation from name/target.
|
||||
struct ASICShader {
|
||||
const void* code;
|
||||
size_t size;
|
||||
int num_sgprs;
|
||||
int num_vgprs;
|
||||
};
|
||||
|
||||
if (isa_->GetMajorVersion() == 7) {
|
||||
// On Gfx7 replace v_add_u32 with legacy equivalent v_add_i32.
|
||||
std::string add_inst_gfx8("v_add_u32"), add_inst_gfx7("v_add_i32");
|
||||
struct CompiledShader {
|
||||
ASICShader compute_7;
|
||||
ASICShader compute_8;
|
||||
};
|
||||
|
||||
for (size_t instIdx = 0; (instIdx = src_sp3_unified.find(
|
||||
add_inst_gfx8, instIdx)) != std::string::npos;
|
||||
instIdx += add_inst_gfx8.size()) {
|
||||
src_sp3_unified.replace(instIdx, add_inst_gfx7.size(), add_inst_gfx7);
|
||||
}
|
||||
}
|
||||
std::map<std::string, CompiledShader> compiled_shaders = {
|
||||
{"TrapHandler",
|
||||
{{NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}}},
|
||||
{"CopyAligned",
|
||||
{{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}}},
|
||||
{"CopyMisaligned",
|
||||
{{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}}},
|
||||
{"Fill",
|
||||
{{kCodeFill7, sizeof(kCodeFill7), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8}}}};
|
||||
|
||||
// Assemble source string with libsp3.
|
||||
sp3_context* sp3 = sp3_new();
|
||||
auto compiled_shader_it = compiled_shaders.find(func_name);
|
||||
assert(compiled_shader_it != compiled_shaders.end() &&
|
||||
"Precompiled shader unavailable");
|
||||
|
||||
ASICShader* asic_shader = NULL;
|
||||
|
||||
switch (isa_->GetMajorVersion()) {
|
||||
case 7:
|
||||
sp3_setasic(sp3, "CI");
|
||||
sp3_set_param_int(sp3, "kGFXIPVersion", 7);
|
||||
asic_shader = &compiled_shader_it->second.compute_7;
|
||||
break;
|
||||
case 8:
|
||||
sp3_setasic(sp3, "VI");
|
||||
sp3_set_param_int(sp3, "kGFXIPVersion", 8);
|
||||
asic_shader = &compiled_shader_it->second.compute_8;
|
||||
break;
|
||||
default:
|
||||
assert(false && "SP3 assembly not supported on this agent");
|
||||
assert(false && "Precompiled shader unavailable for target");
|
||||
}
|
||||
|
||||
sp3_parse_string(sp3, src_sp3_unified.c_str());
|
||||
sp3_shader* code_sp3_meta = sp3_compile(sp3, func_name);
|
||||
|
||||
// Allocate a GPU-visible buffer for the shader.
|
||||
HsaMemFlags code_buf_flags = {0};
|
||||
code_buf_flags.ui32.HostAccess = 1;
|
||||
code_buf_flags.ui32.ExecuteAccess = 1;
|
||||
code_buf_flags.ui32.NoSubstitute = 1;
|
||||
|
||||
size_t code_size = code_sp3_meta->size * sizeof(uint32_t);
|
||||
size_t header_size =
|
||||
(assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0);
|
||||
code_buf_size = AlignUp(header_size + code_size, 0x1000);
|
||||
code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000);
|
||||
|
||||
HSAKMT_STATUS err =
|
||||
hsaKmtAllocMemory(node_id(), code_buf_size, code_buf_flags, &code_buf);
|
||||
@@ -239,8 +248,8 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
if (assemble_target == AssembleTarget::AQL) {
|
||||
amd_kernel_code_t* header = reinterpret_cast<amd_kernel_code_t*>(code_buf);
|
||||
|
||||
int gran_sgprs = std::max(0, (int(code_sp3_meta->nsgprs) - 1) / 8);
|
||||
int gran_vgprs = std::max(0, (int(code_sp3_meta->nvgprs) - 1) / 4);
|
||||
int gran_sgprs = std::max(0, (int(asic_shader->num_sgprs) - 1) / 8);
|
||||
int gran_vgprs = std::max(0, (int(asic_shader->num_vgprs) - 1) / 4);
|
||||
|
||||
header->kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
|
||||
AMD_HSA_BITS_SET(header->kernel_code_properties,
|
||||
@@ -262,14 +271,9 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 1);
|
||||
}
|
||||
|
||||
// Copy trap handler code into the GPU-visible buffer.
|
||||
memcpy((void*)(uintptr_t(code_buf) + header_size), code_sp3_meta->data,
|
||||
code_size);
|
||||
|
||||
// Release SP3 resources.
|
||||
sp3_free_shader(code_sp3_meta);
|
||||
sp3_close(sp3);
|
||||
#endif
|
||||
// Copy shader code into the GPU-visible buffer.
|
||||
memcpy((void*)(uintptr_t(code_buf) + header_size), asic_shader->code,
|
||||
asic_shader->size);
|
||||
}
|
||||
|
||||
void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const {
|
||||
@@ -988,7 +992,6 @@ void GpuAgent::SyncClocks() {
|
||||
}
|
||||
|
||||
void GpuAgent::BindTrapHandler() {
|
||||
#ifdef __linux__ // No raw string literal support in VS builds right now
|
||||
const char* src_sp3 = R"(
|
||||
var s_trap_info_lo = ttmp0
|
||||
var s_trap_info_hi = ttmp1
|
||||
@@ -1068,7 +1071,6 @@ void GpuAgent::BindTrapHandler() {
|
||||
HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_,
|
||||
trap_code_buf_size_, NULL, 0);
|
||||
assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtSetTrapHandler() failed");
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,181 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_ASIC_H
|
||||
#define SP3_ASIC_H
|
||||
|
||||
|
||||
#include "sp3-int.h"
|
||||
#include "sp3-vm.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// ASIC types
|
||||
|
||||
|
||||
enum asic_backend {
|
||||
ASIC_BACKEND_SI,
|
||||
ASIC_BACKEND_CI,
|
||||
ASIC_BACKEND_GFX8,
|
||||
ASIC_BACKEND_GFX81,
|
||||
ASIC_MAX_BACKEND, // Must be the last entry
|
||||
};
|
||||
|
||||
|
||||
enum asic_cap_id {
|
||||
ASIC_THREAD_SIZE = 1,
|
||||
ASIC_FED_INSTRUCTIONS = 2,
|
||||
ASIC_LEGACY_LOG = 3,
|
||||
ASIC_LARGE_DS_READ = 4,
|
||||
ASIC_32BANK_LDS = 5,
|
||||
};
|
||||
|
||||
|
||||
struct asic_info {
|
||||
const char *name;
|
||||
enum asic_backend backend; // which backend to use
|
||||
int asic_thread_size; // number of threads in a wave
|
||||
int asic_fed_instructions; // FED instructions are available
|
||||
int asic_legacy_log; // Legacy EXP and LOG opcodes are available
|
||||
int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available
|
||||
int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available
|
||||
};
|
||||
|
||||
|
||||
struct sp3_asic_state {
|
||||
struct sp3_asic_aluop {
|
||||
int pos; // original position in code
|
||||
int op, na, nc; // na = number of args, nc = number of consts in args
|
||||
int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset
|
||||
unsigned dst;
|
||||
unsigned arg[3];
|
||||
unsigned lit[3]; // float literals are no longer float at this point
|
||||
unsigned flags;
|
||||
int scalar;
|
||||
} bundle [5];
|
||||
unsigned lds_lit[2], lds_mask[2];
|
||||
int nbundle;
|
||||
int reorder;
|
||||
int last_reorder, last_po[5];
|
||||
int nscalar; // number of nominally-scalar opcodes in bundle
|
||||
int barrier_after; // require barrier after this clause
|
||||
|
||||
// sp3-r6xx
|
||||
int asic;
|
||||
struct da_reloc {
|
||||
unsigned addr, ref;
|
||||
struct da_reloc *next;
|
||||
} *da_relocs;
|
||||
struct cf_reloc **instrels;
|
||||
struct cf_reloc *labels;
|
||||
int sinstrels;
|
||||
int slabels;
|
||||
char unk_name[16];
|
||||
};
|
||||
#define A S->ap
|
||||
|
||||
|
||||
extern struct asic_info asics[];
|
||||
#define ASICNAME asics[A->asic].name
|
||||
#define ASIC asics[A->asic]
|
||||
void set_asic(Sp, int asic);
|
||||
int find_asic(const char *name);
|
||||
|
||||
|
||||
// opcode tables
|
||||
|
||||
void sp3_unbuild_tables(void);
|
||||
void sp3_si_unbuild_tables(void);
|
||||
void sp3_ci_unbuild_tables(void);
|
||||
void sp3_gfx8_unbuild_tables(void);
|
||||
|
||||
void sp3_build_tables(void);
|
||||
void sp3_si_build_tables(void);
|
||||
void sp3_ci_build_tables(void);
|
||||
void sp3_gfx8_build_tables(void);
|
||||
|
||||
|
||||
|
||||
|
||||
// helper functions
|
||||
|
||||
|
||||
#define FMT_FMT 0x00000000
|
||||
#define FMT_COMP 0x00010000
|
||||
#define FMT_ENDIAN 0x00020000
|
||||
#define FMT_NUM 0x00030000
|
||||
#define FMT_SRF 0x00040000
|
||||
#define FMT_MASK 0xFFFF0000
|
||||
#define FMT_IMASK 0x0000FFFF
|
||||
|
||||
void mark_sgpr(Sp, unsigned);
|
||||
void mark_vgpr(Sp, unsigned);
|
||||
void mark_global(Sp, unsigned);
|
||||
void mark_ctemp(Sp, unsigned);
|
||||
int is_mod_bool(Sp, pnode *, const char *);
|
||||
int get_mod_bool(Sp, pnode *, const char *);
|
||||
int get_mod_int(Sp, pnode *, int, int);
|
||||
int get_mod_int32(Sp, pnode *);
|
||||
int par_cmask(Sp, pnode *);
|
||||
unsigned reg_csel(Sp, unsigned , int);
|
||||
unsigned reg_msel(Sp, unsigned *, int);
|
||||
|
||||
const char *spec_sel_to_name(Sp, int sel);
|
||||
const char *sp3_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_si_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_ci_fmt_to_name(Sp, int cls, int val);
|
||||
const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val);
|
||||
|
||||
void add_reloc_label(Sp, int li, int blame);
|
||||
void add_reloc_inst(Sp, int ii, int blame);
|
||||
void add_reloc_cf(Sp, int offs);
|
||||
|
||||
int grouping_for_group_size(Sp, int group_size);
|
||||
|
||||
//JENNICA - this block of name_tree will go away, replace
|
||||
//with backend specific.
|
||||
|
||||
enum nametree_enum {
|
||||
NAMETREE_OPCODES,
|
||||
NAMETREE_OPCODES_0ARG,
|
||||
NAMETREE_OPCODES_CALL,
|
||||
NAMETREE_VTX_FMTS,
|
||||
NAMETREE_SPEC_SELS,
|
||||
NAMETREE_SPEC_VEC_SELS,
|
||||
NAMETREE_SGPR_NAME_SELS,
|
||||
NAMETREE_CONSTS,
|
||||
NAMETREE_DEPRECATED,
|
||||
};
|
||||
|
||||
struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree);
|
||||
|
||||
extern struct name_tree *opcodes_0arg;
|
||||
extern struct name_tree *opcodes_call;
|
||||
extern struct name_tree *vtx_fmts;
|
||||
extern struct name_tree *spec_sels;
|
||||
extern struct name_tree *spec_vec_sels;
|
||||
extern struct name_tree *sgpr_name_sels;
|
||||
extern struct name_tree *consts;
|
||||
extern struct name_tree *deprecated;
|
||||
|
||||
extern struct name_tree *asic_names;
|
||||
struct asic_caps{const char *name; int id;};
|
||||
extern struct asic_caps asiccaps[];
|
||||
extern struct name_tree *asic_caps; //JENNICA - this may need to go away.
|
||||
|
||||
void update_sgpr_names(Sp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,553 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_INT_H
|
||||
#define SP3_INT_H
|
||||
|
||||
#include "sp3.h"
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef strdup
|
||||
#define strdup _strdup
|
||||
#endif
|
||||
#ifndef stricmp
|
||||
#define stricmp _stricmp
|
||||
#endif
|
||||
#ifndef strcasecmp
|
||||
#define strcasecmp _stricmp
|
||||
#endif
|
||||
#pragma warning(disable:4090 4204 4245 4296 4389 4701 4702)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct sp3_state;
|
||||
#define Sp struct sp3_state *S
|
||||
|
||||
// clause types
|
||||
|
||||
#define CT_NONE 0
|
||||
#define CT_SHADER 1
|
||||
|
||||
// parse tree
|
||||
|
||||
#define P_NUM 0 // integer
|
||||
#define P_FLT 1 // float
|
||||
#define P_STR 2 // string
|
||||
#define P_REG 3 // register component(s)
|
||||
#define P_RANGE 4 // closed range
|
||||
#define P_RANGEL 5 // right-open range
|
||||
#define P_SLICE 6 // array concatenation (used for slices)
|
||||
#define P_RCAST 7 // integer -> register cast
|
||||
#define P_LIST 8 // list (internal to the parser only)
|
||||
#define P_VAR 9 // variable (with name)
|
||||
#define P_VARE 10 // variable-element (result of lvalue slice)
|
||||
#define P_CL 11 // clause
|
||||
#define P_CLI 12 // clause instructions
|
||||
#define P_WHILE 13 // while loop
|
||||
#define P_REPEAT 14 // repeat-until loop
|
||||
#define P_IF 15 // if or if-else
|
||||
#define P_CFOR 16 // C-style for loop
|
||||
#define P_FOR 17 // vector for loop
|
||||
#define P_RET 18 // return from function
|
||||
#define P_CSLICE 19 // componentwise slice
|
||||
#define P_UREF 20 // unresolved reference
|
||||
#define P_FREF 21 // resolved reference
|
||||
#define P_CALL 22 // function call
|
||||
#define P_PRINT 23 // print to stdout
|
||||
#define P_PAR 24 // function parameters
|
||||
#define P_NF 25 // native function
|
||||
#define P_OMOD 27 // opcode modifier
|
||||
#define P_OMODS 28 // opcode modifiers
|
||||
#define P_OPARS 29 // opcode parameters
|
||||
#define P_OP 30 // opcode
|
||||
#define P_SWIZ0 31 // register swizzles with N components wrapped
|
||||
#define P_SWIZ1 32 // -"-
|
||||
#define P_SWIZ2 33 // -"-
|
||||
#define P_SWIZ3 34 // -"-
|
||||
#define P_SWIZ4 35 // -"-
|
||||
#define P_VTXFMT 36 // vertex formats
|
||||
#define P_LABEL 37 // unique identifier of a label
|
||||
#define P_LINIT 38 // generate label identifiers
|
||||
#define P_MARK 39 // mark a label
|
||||
#define P_OPCALL 40 // opcode that does a clause instantiation on par0
|
||||
#define P_ASIC 41 // ASIC model
|
||||
#define P_ASICCAP 42 // ASIC capability
|
||||
#define P_NCLOS 43 // create closure
|
||||
#define P_CLOS 44 // closure
|
||||
#define P_SH 45 // compiled shader
|
||||
|
||||
#define P_NOT 0x100
|
||||
#define P_BNOT 0x101
|
||||
#define P_NEG 0x102
|
||||
#define P_MUL 0x103
|
||||
#define P_DIV 0x104
|
||||
#define P_MOD 0x105
|
||||
#define P_ADD 0x106
|
||||
#define P_SUB 0x107
|
||||
#define P_SHL 0x108
|
||||
#define P_SHR 0x109
|
||||
#define P_SAR 0x10A
|
||||
#define P_LT 0x10B
|
||||
#define P_GT 0x10C
|
||||
#define P_LEQ 0x10D
|
||||
#define P_GEQ 0x10E
|
||||
#define P_EQ 0x10F
|
||||
#define P_NEQ 0x110
|
||||
#define P_BAND 0x111
|
||||
#define P_BOR 0x112
|
||||
#define P_BXOR 0x113
|
||||
#define P_AND 0x114
|
||||
#define P_OR 0x115
|
||||
#define P_XOR 0x116
|
||||
#define P_SEL 0x117
|
||||
#define P_XDEC 0x118
|
||||
#define P_XINC 0x119
|
||||
#define P_DECX 0x11A
|
||||
#define P_INCX 0x11B
|
||||
#define P_ASGN 0x11C
|
||||
#define P_IND 0x11D
|
||||
#define P_NOP 0x11E
|
||||
#define P_VSUM 0x11F
|
||||
#define P_VPROD 0x120
|
||||
#define P_VBOR 0x121
|
||||
#define P_VBAND 0x122
|
||||
#define P_VBXOR 0x123
|
||||
#define P_VOR 0x124
|
||||
#define P_VAND 0x125
|
||||
#define P_VXOR 0x126
|
||||
#define P_VMIN 0x127
|
||||
#define P_VMAX 0x128
|
||||
#define P_CADD 0x129
|
||||
#define P_CSUB 0x12A
|
||||
#define P_CMUL 0x12B
|
||||
#define P_CDIV 0x12C
|
||||
#define P_CSHL 0x12D
|
||||
#define P_CSHR 0x12E
|
||||
#define P_CSAR 0x12F
|
||||
#define P_CBAND 0x130
|
||||
#define P_CBOR 0x131
|
||||
#define P_CBXOR 0x132
|
||||
#define P_CAND 0x133
|
||||
#define P_COR 0x134
|
||||
#define P_CXOR 0x135
|
||||
#define P_CMIN 0x136
|
||||
#define P_CMAX 0x137
|
||||
#define P_MIN 0x138
|
||||
#define P_MAX 0x139
|
||||
#define P_PROBE 0x13A
|
||||
#define P_BITS 0x13B
|
||||
|
||||
// register types
|
||||
#define R_VGPR 0x00000
|
||||
#define R_OFF 0x04000
|
||||
#define R_SNAME 0x06000
|
||||
#define R_INTERP 0x08000
|
||||
#define R_SPEC 0x0A000
|
||||
#define R_SGPR 0x0C000
|
||||
#define R_EXPBUF 0x0E000
|
||||
#define R_TMASK 0x1E000
|
||||
|
||||
// magic values for R_SPEC
|
||||
#define R_P_CL 3 // used internally only (inline literal)
|
||||
#define R_P_CI_L 0xDB // used internally only
|
||||
#define R_P_LDX_L 0xDB // any LDS inline
|
||||
#define R_P_LDS_L 0xDF // direct LDS inline
|
||||
#define R_P_LDS_H 0xE0
|
||||
#define R_P_LDX_H 0xE0
|
||||
#define R_P_CI_S 0xF3 // end of new R8xx constants
|
||||
#define R_P_CI_H 0xFC
|
||||
#define R_P_NOTLAST 0xFF// notlast operand for export
|
||||
|
||||
// magic values for R_SNAME
|
||||
#define R_S_SCRATCH 1
|
||||
#define R_S_PSVS_STATE 2
|
||||
#define R_S_SO_WRITE_INDEX 3
|
||||
#define R_S_SO_BASE_OFFSET0 4
|
||||
#define R_S_SO_BASE_OFFSET1 5
|
||||
#define R_S_SO_BASE_OFFSET2 6
|
||||
#define R_S_SO_BASE_OFFSET3 7
|
||||
#define R_S_OFFCHIP_LDS 8
|
||||
#define R_S_IS_OFFCHIP 9
|
||||
#define R_S_RING_OFFSET 10
|
||||
#define R_S_GS_WAVE_ID 11
|
||||
#define R_S_TG_SIZE 12
|
||||
#define R_S_TF_BASE 13
|
||||
#define R_S_TGID_X 14
|
||||
#define R_S_TGID_Y 15
|
||||
#define R_S_TGID_Z 16
|
||||
#define R_S_WAVE_CNT 17
|
||||
#define R_S_GLOBAL_WAVE_ID 18
|
||||
|
||||
// register components
|
||||
#define R_CMASK 0x1C00
|
||||
#define R_CSHIFT 10
|
||||
#define R_CX 0x0000
|
||||
#define R_CY 0x0400
|
||||
#define R_CZ 0x0800
|
||||
#define R_CW 0x0C00
|
||||
#define R_CS 0x1000 // used to identify scalar elements
|
||||
#define R_CN 0x1800
|
||||
|
||||
#define R_IMASK 0x03FF
|
||||
|
||||
// source transforms
|
||||
#define R_NEG 0x80000
|
||||
#define R_ABS 0x100000
|
||||
#define R_SEXT 0x200000
|
||||
|
||||
// subencodings for export targets
|
||||
|
||||
#define R_E_TMASK 0x0380
|
||||
#define R_E_MRT 0x0000
|
||||
#define R_E_Z 0x0080
|
||||
#define R_E_POS 0x0100
|
||||
#define R_E_PARAM 0x0180
|
||||
#define R_E_ATTR 0x0280
|
||||
#define R_E_NULL 0x0300
|
||||
|
||||
#define R_E_IMASK 0x007F
|
||||
|
||||
// subencodings for interp
|
||||
|
||||
#define R_I_TMASK 0x0380
|
||||
#define R_I_P10 0x0000
|
||||
#define R_I_P20 0x0080
|
||||
#define R_I_P0 0x0100
|
||||
|
||||
// function parameters
|
||||
#define F_CANY 0x00000000
|
||||
#define F_CNUM 0x01000000
|
||||
#define F_CREG 0x02000000
|
||||
#define F_CTMP 0x03000000
|
||||
#define F_CFPTR 0x04000000
|
||||
#define F_CINT 0x05000000
|
||||
#define F_CMASK 0x07000000
|
||||
#define F_OPT 0x40000000
|
||||
#define F_VEC 0x80000000
|
||||
|
||||
typedef struct pnode {
|
||||
struct pnode *gc_next;
|
||||
int gc_mark;
|
||||
int type;
|
||||
int et; // error reporting tag
|
||||
int ni; // number of items
|
||||
union pnode_item {
|
||||
int num; // integer
|
||||
float flt; // float
|
||||
char *str; // string
|
||||
struct pnode *ptr; // tree item
|
||||
struct {
|
||||
struct pnode *v;
|
||||
int e;
|
||||
} ve; // variable-element pair
|
||||
struct {
|
||||
int p;
|
||||
char *n;
|
||||
} var; // variable (stack offset, name)
|
||||
struct sp3_shader *sh;
|
||||
unsigned int reg; // register components
|
||||
struct pnode *(* nf)(Sp, struct pnode **); // native function
|
||||
} i[1];
|
||||
} pnode;
|
||||
|
||||
pnode *p_str(Sp, char *s); // wrap a string
|
||||
pnode *p_float(Sp, float f); // wrap a float
|
||||
pnode *p_num(Sp, int i); // wrap an integer
|
||||
pnode *p_vec(Sp, int type, int len); // create a vector
|
||||
pnode *p_list(Sp, pnode *list, pnode *item); // append item to P_LIST
|
||||
pnode *p_list_rev(Sp, pnode *list); // reverse the order of the list
|
||||
pnode *p_tree(Sp, int type, int nitems, ...); // create a tree node
|
||||
pnode *p_l2t(Sp, int type, pnode *list); // list to tree
|
||||
pnode *p_l2v(Sp, int type, pnode *list); // list to vector
|
||||
pnode *p_x2x(Sp, int type, pnode *p); // cast to type
|
||||
pnode *p_clause(Sp, int vstk, int lstk, pnode *parlist, pnode *instlist, int type);
|
||||
pnode *p_reg(Sp, int type, int idx); // wrap a register
|
||||
pnode *p_swizzle(Sp, char *str); // parse a swizzle string
|
||||
pnode *p_lv2rv(Sp, pnode *lval); // lvalue to rvalue
|
||||
pnode *p_newlabel(Sp, pnode *t, int tag); // define new label
|
||||
pnode *p_label(Sp, int cnt); // fill with label IDs
|
||||
pnode *p_clone(Sp, pnode *src);
|
||||
|
||||
void print_node(pnode *); // print to stdout
|
||||
|
||||
void mark_gc_storage(Sp); // mark all internal storage of sp3 for gc
|
||||
|
||||
// functions provided by machine driver
|
||||
int is_opcode(struct sp3_state *S, const char *name); // is an opcode (any)
|
||||
int is_opcode_0arg(struct sp3_state *S, const char *name); // is an opcode (0-argument)
|
||||
int is_opcode_call(struct sp3_state *S, const char *name); // is a call op (1st argument is a closure)
|
||||
void sp3_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_si_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_ci_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
void sp3_gfx8_gen_opcode(Sp, const char *op, pnode *par, pnode *mod);
|
||||
pnode *machine_const(Sp, char *name); // if a machine const, parse it (else NULL)
|
||||
void mark_label(Sp, int li); // "label:"
|
||||
pnode *asic_getcap(Sp, int id); // get ASIC capability #id
|
||||
void mach_cleanup(Sp); // initialize generator state
|
||||
|
||||
// name trees
|
||||
|
||||
#define NT_SEARCH 0
|
||||
#define NT_ADD 1
|
||||
#define NT_ADD_ONLY 2
|
||||
#define NT_ADD_STRDUP 4
|
||||
struct name_tree {
|
||||
const char *name;
|
||||
int tag;
|
||||
int add;
|
||||
struct name_tree *l, *r;
|
||||
};
|
||||
|
||||
struct name_tree *name_tree_operation(struct name_tree **t, const char *name, int tag, int add);
|
||||
void name_tree_delete(struct name_tree **t);
|
||||
|
||||
// symbol table
|
||||
|
||||
void f_decl(Sp, char *, pnode *);
|
||||
pnode *f_ref(Sp, char *);
|
||||
void f_check(Sp);
|
||||
pnode *f_call(Sp, const char *);
|
||||
|
||||
void f_decl_native(Sp, int, char *, pnode *(*)(Sp, pnode **), int, ...);
|
||||
|
||||
// parse-time variable stack
|
||||
|
||||
void vs_decl(Sp, const char *, int tag);
|
||||
int vs_lookup(Sp, const char *, pnode **, int);
|
||||
char *vs_getname(pnode *);
|
||||
|
||||
void vs_enter_func(Sp);
|
||||
int vs_leave_func(Sp, int *); // returns number of stack allocations &
|
||||
// (through param) number of lstack allocs
|
||||
void vs_enter_block(Sp);
|
||||
void vs_leave_block(Sp);
|
||||
|
||||
int vs_get_topmax(Sp); // returns number of stack allocation for top level
|
||||
|
||||
// runtime variable stack
|
||||
|
||||
void rv_set(Sp, pnode *, pnode *);
|
||||
pnode *rv_get(Sp, pnode *);
|
||||
void rv_alloc(Sp, int);
|
||||
void rv_setpar(Sp, int, pnode *);
|
||||
int rv_enter(Sp, int);
|
||||
void rv_leave(Sp, int);
|
||||
|
||||
int rl_enter(Sp, int);
|
||||
void rl_leave(Sp, int);
|
||||
|
||||
void rv_leave_native(Sp);
|
||||
pnode **rv_getpar_native(Sp);
|
||||
|
||||
// all-in-one variable setter
|
||||
|
||||
void rv_set_by_name(Sp, const char *, pnode *);
|
||||
|
||||
// growable binary buffer
|
||||
|
||||
typedef struct grow_buf {
|
||||
int n, size;
|
||||
unsigned i[1];
|
||||
} grow_buf;
|
||||
|
||||
grow_buf *gb_alloc(int);
|
||||
grow_buf *gb_append(grow_buf *, int, unsigned *);
|
||||
grow_buf *gb_add(grow_buf *, unsigned);
|
||||
grow_buf *gb_reg(grow_buf *, unsigned, unsigned);
|
||||
|
||||
// clause contents
|
||||
|
||||
struct clause_info {
|
||||
unsigned base;
|
||||
grow_buf *data;
|
||||
int type;
|
||||
};
|
||||
|
||||
void start_clause(Sp, int);
|
||||
void cb_emit(Sp, unsigned *, int);
|
||||
int cb_ptr(Sp);
|
||||
void cb_patch(Sp, int, int, unsigned);
|
||||
|
||||
int remap_clauses(Sp);
|
||||
|
||||
struct sp3_shader *gen_output(Sp);
|
||||
void convert_relocs(Sp);
|
||||
void perform_relocs(Sp);
|
||||
|
||||
pnode *shader_clos(Sp, pnode *); // call this to get a binary shader from closure
|
||||
pnode *shader_name(Sp, const char *); // call this to get a binary shader from name
|
||||
|
||||
void set_const(Sp, int idx, unsigned val);
|
||||
int find_const(Sp, unsigned val);
|
||||
|
||||
void set_kbuf(Sp, int kbuf, int idx, unsigned val);
|
||||
|
||||
const char *asic_name(Sp);
|
||||
int asic_id(Sp);
|
||||
int asic_capbyname(int, const char *);
|
||||
int asic_capbyid(int, int);
|
||||
|
||||
// register stream packer
|
||||
int sp3_guess_shader_type(struct sp3_state *S, struct sp3_shader *sh);
|
||||
int sp3_si_guess_shader_type(struct sp3_shader *sh);
|
||||
int sp3_ci_guess_shader_type(struct sp3_shader *sh);
|
||||
int sp3_gfx8_guess_shader_type(struct sp3_shader *sh);
|
||||
void sp3_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_si_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_ci_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void sp3_gfx8_pack_reg_stream(Sp, int type, struct sp3_shader *sh);
|
||||
void unpack_reg_stream(Sp, struct sp3_shader *sh);
|
||||
|
||||
// instances
|
||||
|
||||
int new_instance(Sp, pnode *, int);
|
||||
void eval_instances(Sp);
|
||||
int get_instance_clause(Sp, int);
|
||||
int get_instance_type(Sp, int);
|
||||
|
||||
// error reporting
|
||||
|
||||
void et_parse_mode(Sp, int);
|
||||
int et_get_id(Sp);
|
||||
#ifdef _MSC_VER
|
||||
__declspec(noreturn)
|
||||
#endif
|
||||
void et_error(Sp, char *, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((__noreturn__))
|
||||
__attribute__ ((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
void et_warning(Sp, char *, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
void et_blame(Sp, pnode *);
|
||||
void et_blame_et(Sp, int);
|
||||
void et_print(Sp, pnode *);
|
||||
int et_get_blame(Sp);
|
||||
|
||||
// text buffer for disasm
|
||||
void bprintf(Sp, char *, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((format(printf, 2, 3)))
|
||||
#endif
|
||||
;
|
||||
void bcmt(Sp, const char *cmt, const char *start, const char *line, const char *end);
|
||||
void btab(Sp, int);
|
||||
char *bget(Sp);
|
||||
|
||||
// state structure
|
||||
struct sp3_state {
|
||||
// flex
|
||||
void *scanner;
|
||||
void *yystate;
|
||||
|
||||
char *yyfile;
|
||||
int yyline;
|
||||
|
||||
// sp3-gc
|
||||
struct sp3_gc_state *gc;
|
||||
|
||||
// asic private
|
||||
struct sp3_asic_state *ap;
|
||||
|
||||
// sp3-eval
|
||||
int retflag;
|
||||
pnode *retval;
|
||||
|
||||
// sp3-int
|
||||
struct sp3_shader config;
|
||||
|
||||
int clause_id; // counts up during evaluation
|
||||
int clause_type;
|
||||
struct clause_info *clauses;
|
||||
int nclauses, sclauses;
|
||||
|
||||
int memsize, ctsizes[4];
|
||||
int in_shader;
|
||||
|
||||
char *disasm_text;
|
||||
int disasm_column;
|
||||
int disasm_len, disasm_maxlen;
|
||||
|
||||
sp3_vma *comment_map;
|
||||
void *comment_ctx;
|
||||
sp3_comment_cb comment_top, comment_right;
|
||||
|
||||
unsigned const_buf[1024];
|
||||
int const_vld[1024], const_vld_range;
|
||||
|
||||
unsigned *kval[16];
|
||||
int knum[16];
|
||||
|
||||
struct et_record {
|
||||
const char *file;
|
||||
int line;
|
||||
} *et_names;
|
||||
int et_node;
|
||||
int et_parsing;
|
||||
int net_names, set_names;
|
||||
|
||||
char *fname_last;
|
||||
struct name_tree *fnames;
|
||||
struct fsym {
|
||||
char *name;
|
||||
pnode *func;
|
||||
struct fref *refs;
|
||||
struct fsym *l, *r;
|
||||
} *fsymbols;
|
||||
int func_id; // counts up during parsing
|
||||
|
||||
struct instance {
|
||||
int type;
|
||||
int clause_id;
|
||||
pnode *call;
|
||||
} *instances;
|
||||
int ninstances, sinstances;
|
||||
|
||||
struct vstack {
|
||||
char *name;
|
||||
int tag;
|
||||
int vs_sp, vs_level;
|
||||
struct vstack *next;
|
||||
} *var_stack, *lbl_stack;
|
||||
int vs_max, vs_sp, vs_top, vs_topmax;
|
||||
int ls_max, ls_sp;
|
||||
|
||||
pnode **rl_stack;
|
||||
int rl_sp, rl_ss, rl_base, rl_id, rl_size;
|
||||
|
||||
pnode **rv_stack;
|
||||
int rv_sp, rv_ss, rv_base, rv_size;
|
||||
|
||||
int werror, wcount;
|
||||
const char *err_hdr;
|
||||
|
||||
unsigned entry_point_table_size;
|
||||
unsigned entry_point_table_alloc_size;
|
||||
sp3_vmaddr *entry_point_table;
|
||||
};
|
||||
struct sp3_state *sp3_new_state(void);
|
||||
void sp3_asic_attach_state(Sp);
|
||||
void sp3_new_parser(Sp);
|
||||
void sp3_free_parser(Sp);
|
||||
void sp3_free_state(Sp);
|
||||
|
||||
void reg_natives(Sp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,137 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_TYPE_H
|
||||
#define SP3_TYPE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// @file sp3-type.h
|
||||
/// @brief sp3 types
|
||||
|
||||
enum sp3_shtype {
|
||||
SP3_SHTYPE_NONE = -1,
|
||||
SP3_SHTYPE_PS = 0,
|
||||
SP3_SHTYPE_VS = 1,
|
||||
SP3_SHTYPE_GS = 2,
|
||||
SP3_SHTYPE_ES = 3,
|
||||
SP3_SHTYPE_HS = 4,
|
||||
SP3_SHTYPE_LS = 5,
|
||||
SP3_SHTYPE_CS = 6,
|
||||
};
|
||||
|
||||
enum sp3_count {
|
||||
SP3_NUM_MRT = 8,
|
||||
SP3_NUM_STRM = 4,
|
||||
};
|
||||
|
||||
enum sp3_flag {
|
||||
SP3DIS_NO_STATE = 0x01,
|
||||
SP3DIS_NO_BINARY = 0x02,
|
||||
SP3DIS_COMMENTS = 0x04,
|
||||
SP3DIS_NO_GPR_COUNT = 0x08,
|
||||
SP3DIS_FORCEVALID = 0x10,
|
||||
SP3DIS_NO_ASIC = 0x20,
|
||||
};
|
||||
|
||||
/// @brief Shader context. Contains no user-visible fields.
|
||||
struct sp3_context;
|
||||
|
||||
/// @brief Storage entry for register streams.
|
||||
struct sp3_reg {
|
||||
unsigned index; ///< One of the mm* values from chip_enum.h.
|
||||
unsigned value;
|
||||
};
|
||||
|
||||
/// @brief Wrapped shader metadata.
|
||||
///
|
||||
/// After generation, shaders are encapsulated in sp3_shader structures.
|
||||
///
|
||||
/// Those structures contain the shader binary, its register stream,
|
||||
/// constants and constant buffers and metadata needed for SC compatibility.
|
||||
struct sp3_shader {
|
||||
int type; ///< One of the SHTYPE_* constants.
|
||||
int asic_int; ///< Internal ASIC index. Do not use.
|
||||
const char *asic; ///< ASIC name as a string ("RV870" etc).
|
||||
unsigned size; ///< Size of the compiled shader, in 32-bit words.
|
||||
unsigned nsgprs; ///< Number of scalar GPRs used.
|
||||
unsigned nvgprs; ///< Number of vector GPRs used.
|
||||
unsigned trap_present;
|
||||
unsigned user_sgpr_count;
|
||||
unsigned scratch_en;
|
||||
unsigned dispatch_draw_en;
|
||||
unsigned so_en;
|
||||
unsigned so_base0_en;
|
||||
unsigned so_base1_en;
|
||||
unsigned so_base2_en;
|
||||
unsigned so_base3_en;
|
||||
unsigned oc_lds_en;
|
||||
unsigned tg_size_en;
|
||||
unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group
|
||||
unsigned tgid_x_en;
|
||||
unsigned tgid_y_en;
|
||||
unsigned tgid_z_en;
|
||||
unsigned wave_cnt_en;
|
||||
unsigned sgpr_scratch;
|
||||
unsigned sgpr_psvs_state;
|
||||
unsigned sgpr_so_write_index;
|
||||
unsigned sgpr_so_base_offset0;
|
||||
unsigned sgpr_so_base_offset1;
|
||||
unsigned sgpr_so_base_offset2;
|
||||
unsigned sgpr_so_base_offset3;
|
||||
unsigned sgpr_offchip_lds;
|
||||
unsigned sgpr_is_offchip;
|
||||
unsigned sgpr_ring_offset;
|
||||
unsigned sgpr_gs_wave_id;
|
||||
unsigned sgpr_global_wave_id;
|
||||
unsigned sgpr_tg_size;
|
||||
unsigned sgpr_tgid_x;
|
||||
unsigned sgpr_tgid_y;
|
||||
unsigned sgpr_tgid_z;
|
||||
unsigned sgpr_tf_base;
|
||||
unsigned sgpr_wave_cnt;
|
||||
unsigned pc_exports; ///< Range of parameters exported (if VS).
|
||||
unsigned pos_export; ///< Shader executes a position export (if VS).
|
||||
unsigned cb_exports; ///< Range of MRTs exported (if PS).
|
||||
unsigned mrtz_export_format; ///< Export format of the mrtz export.
|
||||
unsigned z_export; ///< Shader executes a Z export (if PS).
|
||||
unsigned pops_en; ///< Shader is POPS (PS)
|
||||
unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS).
|
||||
unsigned stencil_test_export; ///< Shader exports stencil (if PS).
|
||||
unsigned stencil_op_export; ///< Shader exports stencil (if PS).
|
||||
unsigned kill_used; ///< Shader executes ALU KILL operations.
|
||||
unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS).
|
||||
unsigned emit_used; ///< EMIT opcodes used (if GS).
|
||||
unsigned covmask_export; ///< Shader exports coverage mask (if PS).
|
||||
unsigned mask_export; ///< Shader exports mask (if PS).
|
||||
unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map).
|
||||
unsigned scratch_used; ///< Scratch SMX exports used.
|
||||
unsigned scratch_itemsize; ///< Scratch ring item size.
|
||||
unsigned reduction_used; ///< Reduction SMX exports used.
|
||||
unsigned ring_used; ///< ESGS/GSVS ring SMX exports used.
|
||||
unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively).
|
||||
unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS).
|
||||
unsigned mem_used; ///< Raw memory SMX exports used.
|
||||
unsigned rats_used; ///< Mask of RATs (UAVs) used
|
||||
unsigned group_size[3]; ///< Wavefront group size (for ELF files).
|
||||
unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS)
|
||||
unsigned *data; ///< Shader binary data.
|
||||
unsigned nregs; ///< Number of register writes in the stream.
|
||||
struct sp3_reg *regs; ///< Register writes (index-value pairs).
|
||||
};
|
||||
|
||||
/// @brief Comment callback.
|
||||
typedef const char *(*sp3_comment_cb)(void *, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,119 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_VM_H
|
||||
#define SP3_VM_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined (WIN_OS) && !defined(SP3_STATIC_LIB)
|
||||
#if defined(DLL_EXPORT_SP3)
|
||||
#define SP3_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define SP3_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define SP3_EXPORT
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
typedef __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
struct sp3_vma;
|
||||
|
||||
/// @file sp3-vm.h
|
||||
/// @brief sp3 VM API
|
||||
///
|
||||
/// The VM API is used to manage virtual memory maps. Those maps are
|
||||
/// used for binary storage for disassembly, as they can naturally
|
||||
/// mirror the GPU's memory map (so no register translation is needed).
|
||||
|
||||
#define SP3_VM_PAGESIZE 64
|
||||
|
||||
/// @brief VM addresses are 64-bit and the address unit is 32 bits
|
||||
///
|
||||
typedef uint64_t sp3_vmaddr;
|
||||
|
||||
/// @brief Callback function that will fill a VMA on demand
|
||||
///
|
||||
/// The VMA to be filled will be specified through the request address.
|
||||
/// The callback should fill the VMA using sp3_vm_write calls.
|
||||
typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx);
|
||||
|
||||
/// @brief VM area
|
||||
///
|
||||
/// VMAs are kept in a sorted list
|
||||
typedef struct sp3_vma {
|
||||
sp3_vmaddr base, len;
|
||||
sp3_vmfill fill;
|
||||
void *fill_ctx;
|
||||
uint32_t *data;
|
||||
struct sp3_vma *prev, *next;
|
||||
} sp3_vma;
|
||||
|
||||
/// @brief Create a new VM that is empty.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new(void);
|
||||
|
||||
/// @brief Create a new VM that has a sp3_vmfill callback.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx);
|
||||
|
||||
/// @brief Create a new VM from an array of words.
|
||||
/// @param base VM address to load array at.
|
||||
/// @param len Number of 32-bit words in the array.
|
||||
/// @param data Pointer to the array.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data);
|
||||
|
||||
/// @brief Find a VMA, optionally adding it.
|
||||
/// @param vm VM to search in.
|
||||
/// @param addr Address to search for.
|
||||
/// @param add Flag indicating whether a failure should result in adding a new VMA.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add);
|
||||
|
||||
/// @brief Write a word to a VM.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val);
|
||||
|
||||
/// @brief Read a word from a VM.
|
||||
///
|
||||
SP3_EXPORT
|
||||
uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Probe VM for presence.
|
||||
/// @return 1 if the specified address is backed in the VM, 0 otherwise.
|
||||
///
|
||||
SP3_EXPORT
|
||||
int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Free a VM and all its storage.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_free(sp3_vma *vm);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,198 +0,0 @@
|
||||
//=====================================================================
|
||||
// Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
/// \author AMD Developer Tools Team
|
||||
/// \file
|
||||
///
|
||||
//=====================================================================
|
||||
|
||||
#ifndef SP3_H
|
||||
#define SP3_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "sp3-vm.h"
|
||||
#include "sp3-type.h"
|
||||
|
||||
/// @file sp3.h
|
||||
/// @brief sp3 API
|
||||
|
||||
/// @brief Get version of the sp3 library.
|
||||
///
|
||||
/// @return String containing the version number.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_version(void);
|
||||
|
||||
/// @brief Create a new sp3 context.
|
||||
///
|
||||
SP3_EXPORT struct sp3_context *sp3_new(void);
|
||||
|
||||
/// @brief Set option for sp3.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param option Option name. Unknown options will raise an error.
|
||||
/// @param value Option value. NULL is used to represent value-less options.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_option(struct sp3_context *state, const char *option, const char *value);
|
||||
|
||||
/// @brief Parse a file into a context.
|
||||
///
|
||||
/// If 'file' is NULL, parse stdin.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file);
|
||||
|
||||
/// @brief Parse a string into a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string);
|
||||
|
||||
/// @brief Parse a file from the standard library into a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name);
|
||||
|
||||
/// @brief Call a sp3 function.
|
||||
///
|
||||
SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func);
|
||||
|
||||
/// @brief Call a sp3 CF clause.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param cffunc Name of clause to call. By convention, this is "main".
|
||||
///
|
||||
/// @return A compiled and linked shader. Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT struct sp3_shader *sp3_compile(struct sp3_context *state, const char *cffunc);
|
||||
|
||||
/// @brief Free a sp3_shader.
|
||||
///
|
||||
SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh);
|
||||
|
||||
/// @brief Get current ASIC name set for a context.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_getasic(struct sp3_context *state);
|
||||
|
||||
/// @brief Set current ASIC name for a context.
|
||||
///
|
||||
SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip);
|
||||
|
||||
/// @brief Set global variable in context to an integer.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_int(struct sp3_context *state, const char *name, int value);
|
||||
|
||||
/// @brief Set global variable in context to an integer vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_intvec(struct sp3_context *state, const char *name, int size, const int *value);
|
||||
|
||||
/// @brief Set global variable in context to a float.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_float(struct sp3_context *state, const char *name, float value);
|
||||
|
||||
/// @brief Set global variable in context to a float vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_floatvec(struct sp3_context *state, const char *name, int size, const float *value);
|
||||
|
||||
/// @brief Set error message header.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str);
|
||||
|
||||
/// @brief Get ASIC metrics for the ASIC in current state.
|
||||
///
|
||||
/// Used by ELF tools to fill in some CAL fields.
|
||||
///
|
||||
SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name);
|
||||
|
||||
/// @brief Free a context allocated by sp3_new/open/parse.
|
||||
///
|
||||
SP3_EXPORT void sp3_close(struct sp3_context *state);
|
||||
|
||||
/// @brief Disassemble a shader.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param bin Memory map with the opcodes (see sp3-vm.h).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param name Same to give the disassembled shader.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param include Literal text to include in the CF clause (NULL includes nothing).
|
||||
/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set.
|
||||
/// @param flags A mask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm(struct sp3_context *state, sp3_vma *bin, sp3_vmaddr base, const char *name, int shader_type, const char *include, unsigned max_len, unsigned flags);
|
||||
|
||||
/// @brief Disassemble a single shader instruction.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param flags A mask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string (allocated with malloc()). Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm_inst(struct sp3_context *state, const unsigned inst[2], sp3_vmaddr base, sp3_vmaddr addr, int shader_type, unsigned flags);
|
||||
|
||||
/// @brief Parse a register stream.
|
||||
///
|
||||
/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants.
|
||||
///
|
||||
/// This call is likely to merge with sp3_disasm later on.
|
||||
///
|
||||
/// @param state sp3 context to fill with state.
|
||||
/// @param nregs Number of register entries.
|
||||
/// @param regs Register stream to parse.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
///
|
||||
SP3_EXPORT void sp3_setregs(struct sp3_context *state, unsigned nregs, const struct sp3_reg *regs, int shader_type);
|
||||
|
||||
|
||||
/// @brief Set shader comments
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param map Map of comments (0 for no comment, other values will be passed to the callback).
|
||||
/// @param f_top Callback returning comment to place above the opcode.
|
||||
/// @param f_right Callback returning comment to place to the right of the opcode.
|
||||
/// @param ctx Void pointer to pass to comment callbacks.
|
||||
///
|
||||
SP3_EXPORT void sp3_setcomments(struct sp3_context *state, sp3_vma *map, sp3_comment_cb f_top, sp3_comment_cb f_right, void *ctx);
|
||||
|
||||
/// @brief Set alternate shader entry points
|
||||
///
|
||||
/// Used for disassembly; this marks an additional location in memory
|
||||
/// (besides the start address) where shader code may be found. Generally
|
||||
/// required for jump tables and any case where the shader may perform
|
||||
/// indirect jumps to ensure that disassembly locates all shader
|
||||
/// instructions.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
///
|
||||
SP3_EXPORT void sp3_setentrypoint(struct sp3_context *state, sp3_vmaddr addr);
|
||||
|
||||
/// @brief Clear alternate shader entry points
|
||||
///
|
||||
/// Clear all entry points previously set with sp3_setentrypoint.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
///
|
||||
SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state);
|
||||
|
||||
/// @brief Free memory allocated by sp3.
|
||||
///
|
||||
/// Windows DLLs that allocate memory have to free it. This function
|
||||
/// should be used to free the result of sp3_disasm, sp3_compile etc.
|
||||
///
|
||||
SP3_EXPORT void sp3_free(void *ptr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user