Files
2025-09-05 10:32:44 -04:00

275 строки
13 KiB
C

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_KERNEL_CODE_H
#define AMD_HSA_KERNEL_CODE_H
#include "amd_hsa_common.h"
#include "hsa.h"
// AMD Kernel Code Version Enumeration Values.
typedef uint32_t amd_kernel_code_version32_t;
enum amd_kernel_code_version_t {
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
AMD_KERNEL_CODE_VERSION_MINOR = 1
};
// AMD Machine Kind Enumeration Values.
typedef uint16_t amd_machine_kind16_t;
enum amd_machine_kind_t {
AMD_MACHINE_KIND_UNDEFINED = 0,
AMD_MACHINE_KIND_AMDGPU = 1
};
// AMD Machine Version.
typedef uint16_t amd_machine_version16_t;
// AMD Float Round Mode Enumeration Values.
enum amd_float_round_mode_t {
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
AMD_FLOAT_ROUND_MODE_ZERO = 3
};
// AMD Float Denorm Mode Enumeration Values.
enum amd_float_denorm_mode_t {
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
};
// AMD Compute Program Resource Register One.
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
enum amd_compute_pgm_rsrc_one_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FP16_OVFL, 26, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED0, 27, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_WGP_MODE, 29, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_MEM_ORDERED, 30, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FWD_PROGRESS, 31, 1),
};
// AMD System VGPR Workitem ID Enumeration Values.
enum amd_system_vgpr_workitem_id_t {
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
};
// AMD Compute Program Resource Register Two.
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
enum amd_compute_pgm_rsrc_two_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
};
// AMD Element Byte Size Enumeration Values.
enum amd_element_byte_size_t {
AMD_ELEMENT_BYTE_SIZE_2 = 0,
AMD_ELEMENT_BYTE_SIZE_4 = 1,
AMD_ELEMENT_BYTE_SIZE_8 = 2,
AMD_ELEMENT_BYTE_SIZE_16 = 3
};
// AMD Kernel Code Properties.
typedef uint32_t amd_kernel_code_properties32_t;
enum amd_kernel_code_properties_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
};
// AMD Power Of Two Enumeration Values.
typedef uint8_t amd_powertwo8_t;
enum amd_powertwo_t {
AMD_POWERTWO_1 = 0,
AMD_POWERTWO_2 = 1,
AMD_POWERTWO_4 = 2,
AMD_POWERTWO_8 = 3,
AMD_POWERTWO_16 = 4,
AMD_POWERTWO_32 = 5,
AMD_POWERTWO_64 = 6,
AMD_POWERTWO_128 = 7,
AMD_POWERTWO_256 = 8
};
// AMD Enabled Control Directive Enumeration Values.
typedef uint64_t amd_enabled_control_directive64_t;
enum amd_enabled_control_directive_t {
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
};
// AMD Exception Kind Enumeration Values.
typedef uint16_t amd_exception_kind16_t;
enum amd_exception_kind_t {
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
AMD_EXCEPTION_KIND_OVERFLOW = 4,
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
AMD_EXCEPTION_KIND_INEXACT = 16
};
// AMD Control Directives.
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
amd_enabled_control_directive64_t enabled_control_directives;
uint16_t enable_break_exceptions;
uint16_t enable_detect_exceptions;
uint32_t max_dynamic_group_size;
uint64_t max_flat_grid_size;
uint32_t max_flat_workgroup_size;
uint8_t required_dim;
uint8_t reserved1[3];
uint64_t required_grid_size[3];
uint32_t required_workgroup_size[3];
uint8_t reserved2[60];
} amd_control_directives_t;
// AMD Kernel Code.
#define AMD_ISA_ALIGN_BYTES 256
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
amd_kernel_code_version32_t amd_kernel_code_version_major;
amd_kernel_code_version32_t amd_kernel_code_version_minor;
amd_machine_kind16_t amd_machine_kind;
amd_machine_version16_t amd_machine_version_major;
amd_machine_version16_t amd_machine_version_minor;
amd_machine_version16_t amd_machine_version_stepping;
int64_t kernel_code_entry_byte_offset;
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
uint64_t max_scratch_backing_memory_byte_size;
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
amd_kernel_code_properties32_t kernel_code_properties;
uint32_t workitem_private_segment_byte_size;
uint32_t workgroup_group_segment_byte_size;
uint32_t gds_segment_byte_size;
uint64_t kernarg_segment_byte_size;
uint32_t workgroup_fbarrier_count;
uint16_t wavefront_sgpr_count;
uint16_t workitem_vgpr_count;
uint16_t reserved_vgpr_first;
uint16_t reserved_vgpr_count;
uint16_t reserved_sgpr_first;
uint16_t reserved_sgpr_count;
uint16_t debug_wavefront_private_segment_offset_sgpr;
uint16_t debug_private_segment_buffer_sgpr;
amd_powertwo8_t kernarg_segment_alignment;
amd_powertwo8_t group_segment_alignment;
amd_powertwo8_t private_segment_alignment;
amd_powertwo8_t wavefront_size;
int32_t call_convention;
uint8_t reserved1[12];
uint64_t runtime_loader_kernel_symbol;
amd_control_directives_t control_directives;
} amd_kernel_code_t;
// TODO: this struct should be completely gone once debugger designs/implements
// Debugger APIs.
typedef struct amd_runtime_loader_debug_info_s {
const void* elf_raw;
size_t elf_size;
const char *kernel_name;
const void *owning_segment;
} amd_runtime_loader_debug_info_t;
#endif // AMD_HSA_KERNEL_CODE_H