2
0

Add 'projects/rocr-runtime/libhsakmt/include/impl/' from commit 'c34ec1e52fcb52da248c00207ebe646197ea9d3e'

git-subtree-dir: projects/rocr-runtime/libhsakmt/include/impl
git-subtree-mainline: 55f7d39fa5
git-subtree-split: c34ec1e52f
Este cometimento está contido em:
German Andryeyev
2026-01-15 15:54:37 -05:00
ascendente 55f7d39fa5 c34ec1e52f
cometimento 5319163521
28 ficheiros modificados com 19269 adições e 0 eliminações
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande Carregar diff
@@ -0,0 +1,91 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// The following set of header files provides definitions for AMD GPU
// Architecture:
// - amd_hsa_common.h
// - amd_hsa_elf.h
// - amd_hsa_kernel_code.h
// - amd_hsa_queue.h
// - amd_hsa_signal.h
//
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
// information.
#ifndef AMD_HSA_COMMON_H
#define AMD_HSA_COMMON_H
#include <stddef.h>
#include <stdint.h>
// Descriptive version of the HSA Application Binary Interface.
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
// Alignment attribute that specifies a minimum alignment (in bytes) for
// variables of the specified type.
#if defined(__GNUC__)
# define __ALIGNED__(x) __attribute__((aligned(x)))
#elif defined(_MSC_VER)
# define __ALIGNED__(x) __declspec(align(x))
#elif defined(RC_INVOKED)
# define __ALIGNED__(x)
#else
# error
#endif
// Creates enumeration entries for packed types. Enumeration entries include
// bit shift amount, bit width, and bit mask.
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
name##_SHIFT = (shift), \
name##_WIDTH = (width), \
name = (((1 << (width)) - 1) << (shift)) \
// Gets bits for specified mask from specified src packed instance.
#define AMD_HSA_BITS_GET(src, mask) \
((src & mask) >> mask ## _SHIFT) \
// Sets val bits for specified mask in specified dst packed instance.
#define AMD_HSA_BITS_SET(dst, mask, val) \
dst &= (~(1 << mask##_SHIFT) & ~mask); \
dst |= (((val) << mask##_SHIFT) & mask) \
#endif // AMD_HSA_COMMON_H
@@ -0,0 +1,467 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// Undefine the macro in case it is defined in the system elf.h.
#undef EM_AMDGPU
#ifndef AMD_HSA_ELF_H
#define AMD_HSA_ELF_H
// AMD GPU Specific ELF Header Enumeration Values.
//
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
// code object V1 defintions which are not part of the LLVM header. Code object
// V1 was only supported by the Finalizer which is now deprecated and removed.
//
// TODO: Deprecate and remove V1 support and replace this header with using the
// LLVM header.
namespace ELF {
// Machine architectures
// See current registered ELF machine architectures at:
// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
enum {
EM_AMDGPU = 224, // AMD GPU architecture
};
// OS ABI identification.
enum {
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
};
// AMDGPU OS ABI Version identification.
enum {
// ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
// AMDGPU specific e_flags.
enum : unsigned {
// Processor selection mask for EF_AMDGPU_MACH_* values.
EF_AMDGPU_MACH = 0x0ff,
// Not specified processor.
EF_AMDGPU_MACH_NONE = 0x000,
// AMDGCN-based processors.
// clang-format off
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058,
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
// Indicates if the trap handler is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
// Indicates if the "sramecc" target feature is enabled for all code
// contained in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
// XNACK is not supported.
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
// XNACK is any/default/unspecified.
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
// XNACK is off.
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
// XNACK is on.
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
// SRAMECC is not supported.
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
// SRAMECC is any/default/unspecified.
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
// SRAMECC is off.
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
// Generic target versioning. This is contained in the list byte of EFLAGS.
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};
// ELF Relocation types for AMDGPU.
enum : unsigned {
R_AMDGPU_ABS32_LO = 1,
R_AMDGPU_ABS32_HI = 2,
R_AMDGPU_ABS64 = 3,
R_AMDGPU_ABS32 = 6,
R_AMDGPU_RELATIVE64 = 13,
};
} // end namespace ELF
// ELF Section Header Flag Enumeration Values.
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
//
typedef enum {
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
AMDGPU_HSA_SEGMENT_LAST,
} amdgpu_hsa_elf_segment_t;
// ELF Program Header Type Enumeration Values.
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
// ELF Symbol Type Enumeration Values.
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
// ELF Symbol Binding Enumeration Values.
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
// ELF Symbol Other Information Creation/Retrieval.
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
typedef enum {
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
} amdgpu_hsa_symbol_allocation_t;
// ELF Symbol Allocation Enumeration Values.
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
typedef enum {
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
AMDGPU_HSA_SYMBOL_FLAG_LAST,
} amdgpu_hsa_symbol_flag_t;
// ELF Symbol Flag Enumeration Values.
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
// Legacy/V1 AMD GPU Relocation Type Enumeration Values.
#define R_AMDGPU_V1_NONE 0
#define R_AMDGPU_V1_32_LOW 1
#define R_AMDGPU_V1_32_HIGH 2
#define R_AMDGPU_V1_64 3
#define R_AMDGPU_V1_INIT_SAMPLER 4
#define R_AMDGPU_V1_INIT_IMAGE 5
#define R_AMDGPU_V1_RELATIVE64 13
// AMD GPU Note Type Enumeration Values.
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
#define NT_AMD_HSA_HSAIL 2
#define NT_AMD_HSA_ISA_VERSION 3
#define NT_AMD_HSA_PRODUCER 4
#define NT_AMD_HSA_PRODUCER_OPTIONS 5
#define NT_AMD_HSA_EXTENSION 6
#define NT_AMD_HSA_ISA_NAME 11
/* AMDGPU snapshots of runtime, agent and queues state for use in core dump */
#define NT_AMDGPU_CORE_STATE 33
#define NT_AMD_HSA_HLDEBUG_DEBUG 101
#define NT_AMD_HSA_HLDEBUG_TARGET 102
// AMD GPU Metadata Kind Enumeration Values.
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
typedef enum {
AMDGPU_HSA_METADATA_KIND_NONE = 0,
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
} amdgpu_hsa_metadata_kind_t;
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
} amdgpu_hsa_sampler_coord_t;
// AMD GPU Sampler Filter Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
} amdgpu_hsa_sampler_filter_t;
// AMD GPU Sampler Addressing Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
typedef enum {
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
} amdgpu_hsa_sampler_addressing_t;
// AMD GPU Sampler Descriptor.
typedef struct amdgpu_hsa_sampler_descriptor_s {
uint16_t size;
amdgpu_hsa_metadata_kind16_t kind;
amdgpu_hsa_sampler_coord8_t coord;
amdgpu_hsa_sampler_filter8_t filter;
amdgpu_hsa_sampler_addressing8_t addressing;
uint8_t reserved1;
} amdgpu_hsa_sampler_descriptor_t;
// AMD GPU Image Geometry Enumeration Values.
typedef uint8_t amdgpu_hsa_image_geometry8_t;
typedef enum {
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
} amdgpu_hsa_image_geometry_t;
// AMD GPU Image Channel Order Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
typedef enum {
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
} amdgpu_hsa_image_channel_order_t;
// AMD GPU Image Channel Type Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
typedef enum {
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
} amdgpu_hsa_image_channel_type_t;
// AMD GPU Image Descriptor.
typedef struct amdgpu_hsa_image_descriptor_s {
uint16_t size;
amdgpu_hsa_metadata_kind16_t kind;
amdgpu_hsa_image_geometry8_t geometry;
amdgpu_hsa_image_channel_order8_t channel_order;
amdgpu_hsa_image_channel_type8_t channel_type;
uint8_t reserved1;
uint64_t width;
uint64_t height;
uint64_t depth;
uint64_t array;
} amdgpu_hsa_image_descriptor_t;
typedef struct amdgpu_hsa_note_code_object_version_s {
uint32_t major_version;
uint32_t minor_version;
} amdgpu_hsa_note_code_object_version_t;
typedef struct amdgpu_hsa_note_hsail_s {
uint32_t hsail_major_version;
uint32_t hsail_minor_version;
uint8_t profile;
uint8_t machine_model;
uint8_t default_float_round;
} amdgpu_hsa_note_hsail_t;
typedef struct amdgpu_hsa_note_isa_s {
uint16_t vendor_name_size;
uint16_t architecture_name_size;
uint32_t major;
uint32_t minor;
uint32_t stepping;
char vendor_and_architecture_name[1];
} amdgpu_hsa_note_isa_t;
typedef struct amdgpu_hsa_note_producer_s {
uint16_t producer_name_size;
uint16_t reserved;
uint32_t producer_major_version;
uint32_t producer_minor_version;
char producer_name[1];
} amdgpu_hsa_note_producer_t;
typedef struct amdgpu_hsa_note_producer_options_s {
uint16_t producer_options_size;
char producer_options[1];
} amdgpu_hsa_note_producer_options_t;
typedef enum {
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
AMDGPU_HSA_RODATA_READONLY_AGENT,
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
AMDGPU_HSA_DATA_GLOBAL_AGENT,
AMDGPU_HSA_DATA_READONLY_AGENT,
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
AMDGPU_HSA_BSS_GLOBAL_AGENT,
AMDGPU_HSA_BSS_READONLY_AGENT,
AMDGPU_HSA_SECTION_LAST,
} amdgpu_hsa_elf_section_t;
#endif // AMD_HSA_ELF_H
@@ -0,0 +1,270 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_KERNEL_CODE_H
#define AMD_HSA_KERNEL_CODE_H
#include "amd_hsa_common.h"
#include "hsa.h"
// AMD Kernel Code Version Enumeration Values.
typedef uint32_t amd_kernel_code_version32_t;
enum amd_kernel_code_version_t {
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
AMD_KERNEL_CODE_VERSION_MINOR = 1
};
// AMD Machine Kind Enumeration Values.
typedef uint16_t amd_machine_kind16_t;
enum amd_machine_kind_t {
AMD_MACHINE_KIND_UNDEFINED = 0,
AMD_MACHINE_KIND_AMDGPU = 1
};
// AMD Machine Version.
typedef uint16_t amd_machine_version16_t;
// AMD Float Round Mode Enumeration Values.
enum amd_float_round_mode_t {
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
AMD_FLOAT_ROUND_MODE_ZERO = 3
};
// AMD Float Denorm Mode Enumeration Values.
enum amd_float_denorm_mode_t {
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
};
// AMD Compute Program Resource Register One.
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
enum amd_compute_pgm_rsrc_one_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
};
// AMD System VGPR Workitem ID Enumeration Values.
enum amd_system_vgpr_workitem_id_t {
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
};
// AMD Compute Program Resource Register Two.
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
enum amd_compute_pgm_rsrc_two_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
};
// AMD Element Byte Size Enumeration Values.
enum amd_element_byte_size_t {
AMD_ELEMENT_BYTE_SIZE_2 = 0,
AMD_ELEMENT_BYTE_SIZE_4 = 1,
AMD_ELEMENT_BYTE_SIZE_8 = 2,
AMD_ELEMENT_BYTE_SIZE_16 = 3
};
// AMD Kernel Code Properties.
typedef uint32_t amd_kernel_code_properties32_t;
enum amd_kernel_code_properties_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
};
// AMD Power Of Two Enumeration Values.
typedef uint8_t amd_powertwo8_t;
enum amd_powertwo_t {
AMD_POWERTWO_1 = 0,
AMD_POWERTWO_2 = 1,
AMD_POWERTWO_4 = 2,
AMD_POWERTWO_8 = 3,
AMD_POWERTWO_16 = 4,
AMD_POWERTWO_32 = 5,
AMD_POWERTWO_64 = 6,
AMD_POWERTWO_128 = 7,
AMD_POWERTWO_256 = 8
};
// AMD Enabled Control Directive Enumeration Values.
typedef uint64_t amd_enabled_control_directive64_t;
enum amd_enabled_control_directive_t {
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
};
// AMD Exception Kind Enumeration Values.
typedef uint16_t amd_exception_kind16_t;
enum amd_exception_kind_t {
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
AMD_EXCEPTION_KIND_OVERFLOW = 4,
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
AMD_EXCEPTION_KIND_INEXACT = 16
};
// AMD Control Directives.
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
amd_enabled_control_directive64_t enabled_control_directives;
uint16_t enable_break_exceptions;
uint16_t enable_detect_exceptions;
uint32_t max_dynamic_group_size;
uint64_t max_flat_grid_size;
uint32_t max_flat_workgroup_size;
uint8_t required_dim;
uint8_t reserved1[3];
uint64_t required_grid_size[3];
uint32_t required_workgroup_size[3];
uint8_t reserved2[60];
} amd_control_directives_t;
// AMD Kernel Code.
#define AMD_ISA_ALIGN_BYTES 256
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
amd_kernel_code_version32_t amd_kernel_code_version_major;
amd_kernel_code_version32_t amd_kernel_code_version_minor;
amd_machine_kind16_t amd_machine_kind;
amd_machine_version16_t amd_machine_version_major;
amd_machine_version16_t amd_machine_version_minor;
amd_machine_version16_t amd_machine_version_stepping;
int64_t kernel_code_entry_byte_offset;
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
uint64_t max_scratch_backing_memory_byte_size;
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
amd_kernel_code_properties32_t kernel_code_properties;
uint32_t workitem_private_segment_byte_size;
uint32_t workgroup_group_segment_byte_size;
uint32_t gds_segment_byte_size;
uint64_t kernarg_segment_byte_size;
uint32_t workgroup_fbarrier_count;
uint16_t wavefront_sgpr_count;
uint16_t workitem_vgpr_count;
uint16_t reserved_vgpr_first;
uint16_t reserved_vgpr_count;
uint16_t reserved_sgpr_first;
uint16_t reserved_sgpr_count;
uint16_t debug_wavefront_private_segment_offset_sgpr;
uint16_t debug_private_segment_buffer_sgpr;
amd_powertwo8_t kernarg_segment_alignment;
amd_powertwo8_t group_segment_alignment;
amd_powertwo8_t private_segment_alignment;
amd_powertwo8_t wavefront_size;
int32_t call_convention;
uint8_t reserved1[12];
uint64_t runtime_loader_kernel_symbol;
amd_control_directives_t control_directives;
} amd_kernel_code_t;
// TODO: this struct should be completely gone once debugger designs/implements
// Debugger APIs.
typedef struct amd_runtime_loader_debug_info_s {
const void* elf_raw;
size_t elf_size;
const char *kernel_name;
const void *owning_segment;
} amd_runtime_loader_debug_info_t;
#endif // AMD_HSA_KERNEL_CODE_H
@@ -0,0 +1,154 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_QUEUE_H
#define AMD_HSA_QUEUE_H
#include "amd_hsa_common.h"
#include "hsa.h"
// AMD Queue Properties.
typedef uint32_t amd_queue_properties32_t;
enum amd_queue_properties_t {
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
};
// AMD Queue.
#define AMD_QUEUE_ALIGN_BYTES 64
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
// AMD Queue Capabilities.
typedef uint32_t amd_queue_capabilities32_t;
enum amd_queue_capabilities_t {
/* This version of CP FW supports dual-scratch and async-reclaim */
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM, 0, 1),
/*
* This version of ROCr supports async-reclaim and CP FW may access the
* V2 fields.
*/
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM, 1, 1),
};
/* This is the original amd_queue_t structure. The definition is only kept
* for reference purposes. This structure should not be used. */
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
hsa_queue_t hsa_queue;
uint32_t caps;
uint32_t reserved1[3];
volatile uint64_t write_dispatch_id;
uint32_t group_segment_aperture_base_hi;
uint32_t private_segment_aperture_base_hi;
uint32_t max_cu_id;
uint32_t max_wave_id;
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
volatile uint32_t legacy_doorbell_lock;
uint32_t reserved2[9];
volatile uint64_t read_dispatch_id;
uint32_t read_dispatch_id_field_base_byte_offset;
uint32_t compute_tmpring_size;
uint32_t scratch_resource_descriptor[4];
uint64_t scratch_backing_memory_location;
uint32_t reserved3[2];
uint32_t scratch_wave64_lane_byte_size;
amd_queue_properties32_t queue_properties;
uint32_t reserved4[2];
hsa_signal_t queue_inactive_signal;
uint32_t reserved5[14];
} amd_queue_t;
/*
* AMD_QUEUE Version 2
* amd_queue_v2_t is backwards compatible with amd_queue_t structure and can
* be used with previous versions of CP FW. The added fields tagged as V2 are
* ignored when running previous versions of CP FW.
* CP FW will not try to access elements beyond the original 64-bytes
* (sizeof(amd_queue_t)) unless the AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM bit is set.
*/
#define MAX_NUM_XCC 128
typedef struct scratch_last_used_index_xcc_s {
volatile uint64_t main;
volatile uint64_t alt;
} scratch_last_used_index_xcc_t;
typedef struct AMD_QUEUE_ALIGN amd_queue_v2_s {
hsa_queue_t hsa_queue;
uint32_t caps;
uint32_t reserved1[3];
volatile uint64_t write_dispatch_id;
uint32_t group_segment_aperture_base_hi;
uint32_t private_segment_aperture_base_hi;
uint32_t max_cu_id;
uint32_t max_wave_id;
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
volatile uint32_t legacy_doorbell_lock;
uint32_t reserved2[9];
volatile uint64_t read_dispatch_id;
uint32_t read_dispatch_id_field_base_byte_offset;
uint32_t compute_tmpring_size;
uint32_t scratch_resource_descriptor[4];
uint64_t scratch_backing_memory_location;
uint64_t scratch_backing_memory_byte_size;
uint32_t scratch_wave64_lane_byte_size;
amd_queue_properties32_t queue_properties;
volatile uint64_t scratch_max_use_index; /* V2 */
hsa_signal_t queue_inactive_signal;
volatile uint64_t alt_scratch_max_use_index; /* V2 */
uint32_t alt_scratch_resource_descriptor[4]; /* V2 */
uint64_t alt_scratch_backing_memory_location; /* V2 */
uint32_t alt_scratch_dispatch_limit_x; /* V2 */
uint32_t alt_scratch_dispatch_limit_y; /* V2 */
uint32_t alt_scratch_dispatch_limit_z; /* V2 */
uint32_t alt_scratch_wave64_lane_byte_size; /* V2 */
uint32_t alt_compute_tmpring_size; /* V2 */
uint32_t reserved5;
scratch_last_used_index_xcc_t scratch_last_used_index[MAX_NUM_XCC];
} amd_queue_v2_t;
#endif // AMD_HSA_QUEUE_H
@@ -0,0 +1,79 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef AMD_HSA_SIGNAL_H
#define AMD_HSA_SIGNAL_H
#include "amd_hsa_common.h"
#include "amd_hsa_queue.h"
// AMD Signal Kind Enumeration Values.
typedef int64_t amd_signal_kind64_t;
enum amd_signal_kind_t {
AMD_SIGNAL_KIND_INVALID = 0,
AMD_SIGNAL_KIND_USER = 1,
AMD_SIGNAL_KIND_DOORBELL = -1,
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
};
// AMD Signal.
#define AMD_SIGNAL_ALIGN_BYTES 64
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
amd_signal_kind64_t kind;
union {
volatile int64_t value;
volatile uint64_t* hardware_doorbell_ptr;
};
uint64_t event_mailbox_ptr;
uint32_t event_id;
uint32_t reserved1;
uint64_t start_ts;
uint64_t end_ts;
union {
amd_queue_v2_t* queue_ptr;
uint64_t reserved2;
};
uint32_t reserved3[2];
} amd_signal_t;
#endif // AMD_HSA_SIGNAL_H
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande Carregar diff
@@ -0,0 +1,97 @@
/*
* Copyright © Advanced Micro Devices, Inc., or its affiliates.
*
* SPDX-License-Identifier: MIT
*/
#ifndef HSA_RUNTIME_AMD_TOOL_EVENTS_H_
#define HSA_RUNTIME_AMD_TOOL_EVENTS_H_
// Insert license header
#include <stddef.h>
#include <stdint.h>
#include "hsa.h"
typedef enum {
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE = 0,
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE =
(1 << 0), // This scratch allocation is only valid for 1 dispatch.
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT =
(1 << 1), // Used alternate scratch instead of main scratch
} hsa_amd_event_scratch_alloc_flag_t;
typedef enum {
HSA_AMD_TOOL_EVENT_MIN = 0,
// Scratch memory tracking
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,
// Add new events above ^
HSA_AMD_TOOL_EVENT_MAX
} hsa_amd_tool_event_kind_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
} hsa_amd_tool_event_none_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
} hsa_amd_event_scratch_alloc_start_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
size_t size; // Amount of scratch allocated - in bytes
size_t num_slots; // limit of number of waves
} hsa_amd_event_scratch_alloc_end_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_free_start_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_free_end_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_async_reclaim_start_t;
typedef struct {
hsa_amd_tool_event_kind_t kind;
const hsa_queue_t* queue;
hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_async_reclaim_end_t;
typedef union {
const hsa_amd_tool_event_none_t* none;
const hsa_amd_event_scratch_alloc_start_t* scratch_alloc_start;
const hsa_amd_event_scratch_alloc_end_t* scratch_alloc_end;
const hsa_amd_event_scratch_free_start_t* scratch_free_start;
const hsa_amd_event_scratch_free_end_t* scratch_free_end;
const hsa_amd_event_scratch_async_reclaim_start_t* scratch_async_reclaim_start;
const hsa_amd_event_scratch_async_reclaim_end_t* scratch_async_reclaim_end;
} hsa_amd_tool_event_t;
typedef hsa_status_t (*hsa_amd_tool_event)(hsa_amd_tool_event_t);
#endif
@@ -0,0 +1,587 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
#define HSA_RUNTIME_INC_HSA_API_TRACE_H
#include "hsa.h"
#include "hsa_api_trace_version.h"
#ifdef AMD_INTERNAL_BUILD
#include "hsa_ext_image.h"
#include "hsa_ext_amd.h"
#include "hsa_ext_finalize.h"
#include "hsa_amd_tool.h"
#include "hsa_ven_amd_pc_sampling.h"
#else
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_finalize.h"
#include "inc/hsa_amd_tool.h"
#include "inc/hsa_ven_amd_pc_sampling.h"
#endif
#include <string.h>
#include <assert.h>
#include <stddef.h>
// Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h
// Min function used to copy Api Tables
static inline uint32_t Min(const uint32_t a, const uint32_t b) {
return (a > b) ? b : a;
}
// Declarations of APIs intended for use only by tools.
// An AQL packet that can be put in an intercept queue to cause a callback to
// be invoked when the packet is about to be submitted to the underlying
// hardware queue. These packets are not copied to the underlying hardware
// queue. These packets should come immediately before the regular AQL packet
// they relate to. This implies that packet rewriters should always keep these
// packets adjacent to the regular AQL packet that follows them.
const uint32_t AMD_AQL_FORMAT_INTERCEPT_MARKER = 0xFE;
struct amd_aql_intercept_marker_s;
// When an intercept queue is processing rewritten packets to put them on the
// underlying hardware queue, if it encounters a
// AMD_AQL_FORMAT_INTERCEPT_MARKER vendor AQL packet it will call the following
// handler. packet points to the packet, queue is the underlying hardware
// queue, and packet_id is the packet id of the next packet to be put on the
// underlying hardware queue. The intercept queue does not put these packets
// onto the underlying hardware queue.
typedef void (*amd_intercept_marker_handler)(const struct amd_aql_intercept_marker_s* packet,
hsa_queue_t* queue, uint64_t packet_id);
// An AQL vendor packet used by the intercept queue to mark the following
// packet. The callback will be invoked to allow a tool to know where in the
// underlying hardware queue the following packet will be placed. user_data can
// be used to hold any data useful to the tool.
typedef struct amd_aql_intercept_marker_s {
uint16_t header; // Must have a packet type of HSA_PACKET_TYPE_VENDOR_SPECIFIC.
uint8_t format; // Must be AMD_AQL_FORMAT_INTERCEPT_MARKER.
uint8_t reserved[5]; // Must be 0.
#ifdef HSA_LARGE_MODEL
amd_intercept_marker_handler callback;
#elif defined HSA_LITTLE_ENDIAN
amd_intercept_marker_handler callback;
uint32_t reserved1; // Must be 0.
#else
uint32_t reserved1; // Must be 0.
amd_intercept_marker_handler callback;
#endif
uint64_t user_data[6];
} amd_aql_intercept_marker_t;
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
uint64_t user_pkt_index, void* data,
hsa_amd_queue_intercept_packet_writer writer);
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
hsa_amd_queue_intercept_handler callback,
void* user_data);
hsa_status_t hsa_amd_queue_intercept_create(
hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
void* data);
hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
void* user_data);
// Structure of Version used to identify an instance of Api table
// Must be the first member (offsetof == 0) of all API tables.
// This is the root of the table passing ABI.
struct ApiTableVersion {
uint32_t major_id;
uint32_t minor_id;
uint32_t step_id;
uint32_t reserved;
};
struct ToolsApiTable {
ApiTableVersion version;
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_start_fn;
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_end_fn;
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_start_fn;
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_end_fn;
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_start_fn;
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_end_fn;
};
// Table to export HSA Finalizer Extension Apis
struct FinalizerExtTable {
ApiTableVersion version;
decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
};
// Table to export HSA Image Extension Apis
struct ImageExtTable {
ApiTableVersion version;
decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
decltype(hsa_ext_sampler_create_v2)* hsa_ext_sampler_create_v2_fn;
};
// Table to export HSA PC Sampling Extension Apis
struct PcSamplingExtTable {
ApiTableVersion version;
decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
};
// Table to export AMD Extension Apis
struct AmdExtTable {
ApiTableVersion version;
decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
decltype(hsa_amd_memory_async_copy_on_engine)* hsa_amd_memory_async_copy_on_engine_fn;
decltype(hsa_amd_memory_copy_engine_status)* hsa_amd_memory_copy_engine_status_fn;
decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
decltype(hsa_amd_vmem_address_reserve)* hsa_amd_vmem_address_reserve_fn;
decltype(hsa_amd_vmem_address_free)* hsa_amd_vmem_address_free_fn;
decltype(hsa_amd_vmem_handle_create)* hsa_amd_vmem_handle_create_fn;
decltype(hsa_amd_vmem_handle_release)* hsa_amd_vmem_handle_release_fn;
decltype(hsa_amd_vmem_map)* hsa_amd_vmem_map_fn;
decltype(hsa_amd_vmem_unmap)* hsa_amd_vmem_unmap_fn;
decltype(hsa_amd_vmem_set_access)* hsa_amd_vmem_set_access_fn;
decltype(hsa_amd_vmem_get_access)* hsa_amd_vmem_get_access_fn;
decltype(hsa_amd_vmem_export_shareable_handle)* hsa_amd_vmem_export_shareable_handle_fn;
decltype(hsa_amd_vmem_import_shareable_handle)* hsa_amd_vmem_import_shareable_handle_fn;
decltype(hsa_amd_vmem_retain_alloc_handle)* hsa_amd_vmem_retain_alloc_handle_fn;
decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
hsa_amd_vmem_get_alloc_properties_from_handle_fn;
decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
decltype(hsa_amd_ais_file_write)* hsa_amd_ais_file_write_fn;
decltype(hsa_amd_ais_file_read)* hsa_amd_ais_file_read_fn;
};
// Table to export HSA Core Runtime Apis
struct CoreApiTable {
ApiTableVersion version;
decltype(hsa_init)* hsa_init_fn;
decltype(hsa_shut_down)* hsa_shut_down_fn;
decltype(hsa_system_get_info)* hsa_system_get_info_fn;
decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
decltype(hsa_queue_create)* hsa_queue_create_fn;
decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
decltype(hsa_region_get_info)* hsa_region_get_info_fn;
decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
decltype(hsa_memory_register)* hsa_memory_register_fn;
decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
decltype(hsa_memory_free)* hsa_memory_free_fn;
decltype(hsa_memory_copy)* hsa_memory_copy_fn;
decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
decltype(hsa_signal_create)* hsa_signal_create_fn;
decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
//===--- Instruction Set Architecture -----------------------------------===//
decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
// Deprecated since v1.1.
decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
// Deprecated since v1.1.
decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
//===--- Code Objects (deprecated) --------------------------------------===//
// Deprecated since v1.1.
decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
// Deprecated since v1.1.
decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
// Deprecated since v1.1.
decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
// Deprecated since v1.1.
decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
// Deprecated since v1.1.
decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
// Deprecated since v1.1.
decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
// Deprecated since v1.1.
decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
//===--- Executable -----------------------------------------------------===//
// Deprecated since v1.1.
decltype(hsa_executable_create)* hsa_executable_create_fn;
decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
// Deprecated since v1.1.
decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
decltype(hsa_executable_global_variable_define)*
hsa_executable_global_variable_define_fn;
decltype(hsa_executable_agent_global_variable_define)*
hsa_executable_agent_global_variable_define_fn;
decltype(hsa_executable_readonly_variable_define)*
hsa_executable_readonly_variable_define_fn;
decltype(hsa_executable_validate)* hsa_executable_validate_fn;
// Deprecated since v1.1.
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
// Deprecated since v1.1.
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
//===--- Runtime Notifications ------------------------------------------===//
decltype(hsa_status_string)* hsa_status_string_fn;
// Start HSA v1.1 additions
decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
//===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
//===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
// Deprecated since v1.1.
decltype(hsa_code_object_get_symbol_from_name)*
hsa_code_object_get_symbol_from_name_fn;
//===--- Executable - HSA v1.1 additions --------------------------------===//
decltype(hsa_code_object_reader_create_from_file)*
hsa_code_object_reader_create_from_file_fn;
decltype(hsa_code_object_reader_create_from_memory)*
hsa_code_object_reader_create_from_memory_fn;
decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
decltype(hsa_executable_load_program_code_object)*
hsa_executable_load_program_code_object_fn;
decltype(hsa_executable_load_agent_code_object)*
hsa_executable_load_agent_code_object_fn;
decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
decltype(hsa_executable_get_symbol_by_name)*
hsa_executable_get_symbol_by_name_fn;
decltype(hsa_executable_iterate_agent_symbols)*
hsa_executable_iterate_agent_symbols_fn;
decltype(hsa_executable_iterate_program_symbols)*
hsa_executable_iterate_program_symbols_fn;
};
// Table to export HSA Apis from Core Runtime, Amd Extensions
// Finalizer and Images
struct HsaApiTable {
// Version of Hsa Api Table
ApiTableVersion version;
// Table of function pointers to HSA Core Runtime
CoreApiTable* core_;
// Table of function pointers to AMD extensions
AmdExtTable* amd_ext_;
// Table of function pointers to HSA Finalizer Extension
FinalizerExtTable* finalizer_ext_;
// Table of function pointers to HSA Image Extension
ImageExtTable* image_ext_;
// Table of function pointers for tools to use
ToolsApiTable* tools_;
// Table of function pointers to AMD PC Sampling Extension
PcSamplingExtTable* pc_sampling_ext_;
};
// Structure containing instances of different api tables
struct HsaApiTableContainer {
HsaApiTable root;
CoreApiTable core;
AmdExtTable amd_ext;
FinalizerExtTable finalizer_ext;
ImageExtTable image_ext;
ToolsApiTable tools;
PcSamplingExtTable pc_sampling_ext;
// Default initialization of a container instance
HsaApiTableContainer() {
root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
root.version.minor_id = sizeof(HsaApiTable);
root.version.step_id = HSA_API_TABLE_STEP_VERSION;
core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
core.version.minor_id = sizeof(CoreApiTable);
core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
root.core_ = &core;
amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
amd_ext.version.minor_id = sizeof(AmdExtTable);
amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
root.amd_ext_ = &amd_ext;
finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
root.finalizer_ext_ = &finalizer_ext;
image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
image_ext.version.minor_id = sizeof(ImageExtTable);
image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
root.image_ext_ = &image_ext;
tools.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
tools.version.minor_id = sizeof(ToolsApiTable);
tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
root.tools_ = &tools;
pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
root.pc_sampling_ext_ = &pc_sampling_ext;
}
};
// Api to copy function pointers of a table
static
void inline copyApi(void* src, void* dest, size_t size) {
assert(size >= sizeof(ApiTableVersion));
memcpy((char*)src + sizeof(ApiTableVersion),
(char*)dest + sizeof(ApiTableVersion),
(size - sizeof(ApiTableVersion)));
}
// Copy Api child tables if valid.
static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
if (src->major_id && (dest->major_id == src->major_id)) {
dest->step_id = src->step_id;
dest->minor_id = Min(dest->minor_id, src->minor_id);
copyApi(dest, src, dest->minor_id);
} else {
dest->major_id = 0;
dest->minor_id = 0;
dest->step_id = 0;
}
}
// Copy constructor for all Api tables. The function assumes the
// user has initialized an instance of tables container correctly
// for the Major, Minor and Stepping Ids of Root and Child Api tables.
// The function will overwrite the value of Minor Id by taking the
// minimum of source and destination parameters. It will also overwrite
// the stepping Id with value from source parameter.
static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
// Verify Major Id of source and destination tables match
if (dest->version.major_id != src->version.major_id) {
dest->version.major_id = 0;
dest->version.minor_id = 0;
dest->version.step_id = 0;
return;
}
// Initialize the stepping id and minor id of root table. For the
// minor id which encodes struct size, take the minimum of source
// and destination parameters
dest->version.step_id = src->version.step_id;
dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
// Copy child tables if present
if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
copyElement(&dest->core_->version, &src->core_->version);
if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
copyElement(&dest->image_ext_->version, &src->image_ext_->version);
if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
copyElement(&dest->tools_->version, &src->tools_->version);
if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
}
#endif
@@ -0,0 +1,70 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
#define HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
// CODE IN THIS FILE **MUST** BE C-COMPATIBLE
// Major Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_MAJOR_VERSION 0x03
#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x02
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x02
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x02
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x02
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION 0x01
#define HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION 0x01
// Step Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_STEP_VERSION 0x01
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x08
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
#define HSA_IMAGE_EXT_API_TABLE_STEP_VERSION HSA_IMAGE_API_TABLE_STEP_VERSION
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
#define HSA_TOOLS_API_TABLE_STEP_VERSION 0x00
#define HSA_PC_SAMPLING_API_TABLE_STEP_VERSION 0x00
#endif // HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande Carregar diff
@@ -0,0 +1,531 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
#include "hsa.h"
#undef HSA_API
#ifdef HSA_EXPORT_FINALIZER
#define HSA_API HSA_API_EXPORT
#else
#define HSA_API HSA_API_IMPORT
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
struct BrigModuleHeader;
typedef struct BrigModuleHeader* BrigModule_t;
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
* @{
*/
/**
* @brief Enumeration constants added to ::hsa_status_t by this extension.
*/
enum {
/**
* The HSAIL program is invalid.
*/
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
/**
* The HSAIL module is invalid.
*/
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
/**
* Machine model or profile of the HSAIL module do not match the machine model
* or profile of the HSAIL program.
*/
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
/**
* The HSAIL module is already a part of the HSAIL program.
*/
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
/**
* Compatibility mismatch between symbol declaration and symbol definition.
*/
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
/**
* The finalization encountered an error while finalizing a kernel or
* indirect function.
*/
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
/**
* Mismatch between a directive in the control directive structure and in
* the HSAIL kernel.
*/
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
};
/** @} */
/** \defgroup ext-alt-finalizer-program Finalization Program
* @{
*/
/**
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
* the definition of the BrigModule_t type.
*/
typedef BrigModule_t hsa_ext_module_t;
/**
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
* modules that collectively define functions and variables used by kernels and
* indirect functions.
*/
typedef struct hsa_ext_program_s {
/**
* Opaque handle.
*/
uint64_t handle;
} hsa_ext_program_t;
/**
* @brief Create an empty HSAIL program.
*
* @param[in] machine_model Machine model used in the HSAIL program.
*
* @param[in] profile Profile used in the HSAIL program.
*
* @param[in] default_float_rounding_mode Default float rounding mode used in
* the HSAIL program.
*
* @param[in] options Vendor-specific options. May be NULL.
*
* @param[out] program Memory location where the HSA runtime stores the newly
* created HSAIL program handle.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
* @p program is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_create(
hsa_machine_model_t machine_model,
hsa_profile_t profile,
hsa_default_float_rounding_mode_t default_float_rounding_mode,
const char *options,
hsa_ext_program_t *program);
/**
* @brief Destroy a HSAIL program.
*
* @details The HSAIL program handle becomes invalid after it has been
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
* still valid after the HSAIL program has been destroyed, and can be used as
* intended. Resources allocated outside and associated with the HSAIL program
* (such as HSAIL modules that are added to the HSAIL program) can be released
* after the finalization program has been destroyed.
*
* @param[in] program HSAIL program.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
* invalid.
*/
hsa_status_t HSA_API hsa_ext_program_destroy(
hsa_ext_program_t program);
/**
* @brief Add a HSAIL module to an existing HSAIL program.
*
* @details The HSA runtime does not perform a deep copy of the HSAIL module
* upon addition. Instead, it stores a pointer to the HSAIL module. The
* ownership of the HSAIL module belongs to the application, which must ensure
* that @p module is not released before destroying the HSAIL program.
*
* The HSAIL module is successfully added to the HSAIL program if @p module is
* valid, if all the declarations and definitions for the same symbol are
* compatible, and if @p module specify machine model and profile that matches
* the HSAIL program.
*
* @param[in] program HSAIL program.
*
* @param[in] module HSAIL module. The application can add the same HSAIL module
* to @p program at most once. The HSAIL module must specify the same machine
* model and profile as @p program. If the floating-mode rounding mode of @p
* module is not default, then it should match that of @p program.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
* module does not match machine model of @p program, or the profile of @p
* module does not match profile of @p program.
*
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
* already a part of the HSAIL program.
*
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
* definition compatibility mismatch. See the symbol compatibility rules in the
* HSA Programming Reference Manual.
*/
hsa_status_t HSA_API hsa_ext_program_add_module(
hsa_ext_program_t program,
hsa_ext_module_t module);
/**
* @brief Iterate over the HSAIL modules in a program, and invoke an
* application-defined callback on every iteration.
*
* @param[in] program HSAIL program.
*
* @param[in] callback Callback to be invoked once per HSAIL module in the
* program. The HSA runtime passes three arguments to the callback: the program,
* a HSAIL module, and the application data. If @p callback returns a status
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
* stops and ::hsa_ext_program_iterate_modules returns that status value.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
hsa_ext_program_t program,
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
void* data),
void* data);
/**
* @brief HSAIL program attributes.
*/
typedef enum {
/**
* Machine model specified when the HSAIL program was created. The type
* of this attribute is ::hsa_machine_model_t.
*/
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
/**
* Profile specified when the HSAIL program was created. The type of
* this attribute is ::hsa_profile_t.
*/
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
/**
* Default float rounding mode specified when the HSAIL program was
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
*/
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
} hsa_ext_program_info_t;
/**
* @brief Get the current value of an attribute for a given HSAIL program.
*
* @param[in] program HSAIL program.
*
* @param[in] attribute Attribute to query.
*
* @param[out] value Pointer to an application-allocated buffer where to store
* the value of the attribute. If the buffer passed by the application is not
* large enough to hold the value of @p attribute, the behaviour is undefined.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
* HSAIL program attribute, or @p value is NULL.
*/
hsa_status_t HSA_API hsa_ext_program_get_info(
hsa_ext_program_t program,
hsa_ext_program_info_t attribute,
void *value);
/**
* @brief Finalizer-determined call convention.
*/
typedef enum {
/**
* Finalizer-determined call convention.
*/
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
} hsa_ext_finalizer_call_convention_t;
/**
* @brief Control directives specify low-level information about the
* finalization process.
*/
typedef struct hsa_ext_control_directives_s {
/**
* Bitset indicating which control directives are enabled. The bit assigned to
* a control directive is determined by the corresponding value in
* BrigControlDirective.
*
* If a control directive is disabled, its corresponding field value (if any)
* must be 0. Control directives that are only present or absent (such as
* partial workgroups) have no corresponding field as the presence of the bit
* in this mask is sufficient.
*/
uint64_t control_directives_mask;
/**
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
* assigned to an HSAIL exception is determined by the corresponding value
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
* control directive, the finalizer uses the union of the two masks.
*/
uint16_t break_exceptions_mask;
/**
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
* bit assigned to an HSAIL exception is determined by the corresponding value
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
* control directive, the finalizer uses the union of the two masks.
*/
uint16_t detect_exceptions_mask;
/**
* Maximum size (in bytes) of dynamic group memory that will be allocated by
* the application for any dispatch of the kernel. If the kernel contains a
* maxdynamicsize control directive, the two values should match.
*/
uint32_t max_dynamic_group_size;
/**
* Maximum number of grid work-items that will be used by the application to
* launch the kernel. If the kernel contains a maxflatgridsize control
* directive, the value of @a max_flat_grid_size must not be greater than the
* value of the directive, and takes precedence.
*
* The value specified for maximum absolute grid size must be greater than or
* equal to the product of the values specified by @a required_grid_size.
*
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
* control_directives_mask, this field must be greater than 0.
*/
uint64_t max_flat_grid_size;
/**
* Maximum number of work-group work-items that will be used by the
* application to launch the kernel. If the kernel contains a
* maxflatworkgroupsize control directive, the value of @a
* max_flat_workgroup_size must not be greater than the value of the
* directive, and takes precedence.
*
* The value specified for maximum absolute grid size must be greater than or
* equal to the product of the values specified by @a required_workgroup_size.
*
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
* control_directives_mask, this field must be greater than 0.
*/
uint32_t max_flat_workgroup_size;
/**
* Reserved. Must be 0.
*/
uint32_t reserved1;
/**
* Grid size that will be used by the application in any dispatch of the
* kernel. If the kernel contains a requiredgridsize control directive, the
* dimensions should match.
*
* The specified grid size must be consistent with @a required_workgroup_size
* and @a required_dim. Also, the product of the three dimensions must not
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
* only if all the corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
* control_directives_mask, the three dimension values must be greater than 0.
*/
uint64_t required_grid_size[3];
/**
* Work-group size that will be used by the application in any dispatch of the
* kernel. If the kernel contains a requiredworkgroupsize control directive,
* the dimensions should match.
*
* The specified work-group size must be consistent with @a required_grid_size
* and @a required_dim. Also, the product of the three dimensions must not
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
* hold only if all the corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
* control_directives_mask, the three dimension values must be greater than 0.
*/
hsa_dim3_t required_workgroup_size;
/**
* Number of dimensions that will be used by the application to launch the
* kernel. If the kernel contains a requireddim control directive, the two
* values should match.
*
* The specified dimensions must be consistent with @a required_grid_size and
* @a required_workgroup_size. This invariant must hold only if all the
* corresponding control directives are enabled.
*
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
* control_directives_mask, this field must be 1, 2, or 3.
*/
uint8_t required_dim;
/**
* Reserved. Must be 0.
*/
uint8_t reserved2[75];
} hsa_ext_control_directives_t;
/**
* @brief Finalize an HSAIL program for a given instruction set architecture.
*
* @details Finalize all of the kernels and indirect functions that belong to
* the same HSAIL program for a specific instruction set architecture (ISA). The
* transitive closure of all functions specified by call or scall must be
* defined. Kernels and indirect functions that are being finalized must be
* defined. Kernels and indirect functions that are referenced in kernels and
* indirect functions being finalized may or may not be defined, but must be
* declared. All the global/readonly segment variables that are referenced in
* kernels and indirect functions being finalized may or may not be defined, but
* must be declared.
*
* @param[in] program HSAIL program.
*
* @param[in] isa Instruction set architecture to finalize for.
*
* @param[in] call_convention A call convention used in a finalization. Must
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
* isa (not inclusive).
*
* @param[in] control_directives Low-level control directives that influence
* the finalization process.
*
* @param[in] options Vendor-specific options. May be NULL.
*
* @param[in] code_object_type Type of code object to produce.
*
* @param[out] code_object Code object generated by the Finalizer, which
* contains the machine code for the kernels and indirect functions in the HSAIL
* program. The code object is independent of the HSAIL module that was used to
* generate it.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
* resources required for the operation.
*
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
* invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
* the control directive structure and in the HSAIL kernel mismatch, or if the
* same directive is used with a different value in one of the functions used by
* this kernel.
*
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
* encountered an error while compiling a kernel or an indirect function.
*/
hsa_status_t HSA_API hsa_ext_program_finalize(
hsa_ext_program_t program,
hsa_isa_t isa,
int32_t call_convention,
hsa_ext_control_directives_t control_directives,
const char *options,
hsa_code_object_type_t code_object_type,
hsa_code_object_t *code_object);
/** @} */
#define hsa_ext_finalizer_1_00
typedef struct hsa_ext_finalizer_1_00_pfn_s {
hsa_status_t (*hsa_ext_program_create)(
hsa_machine_model_t machine_model, hsa_profile_t profile,
hsa_default_float_rounding_mode_t default_float_rounding_mode,
const char *options, hsa_ext_program_t *program);
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
hsa_ext_module_t module);
hsa_status_t (*hsa_ext_program_iterate_modules)(
hsa_ext_program_t program,
hsa_status_t (*callback)(hsa_ext_program_t program,
hsa_ext_module_t module, void *data),
void *data);
hsa_status_t (*hsa_ext_program_get_info)(
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
void *value);
hsa_status_t (*hsa_ext_program_finalize)(
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
hsa_ext_control_directives_t control_directives, const char *options,
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
} hsa_ext_finalizer_1_00_pfn_t;
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande Carregar diff
@@ -0,0 +1,488 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
#include <stdint.h>
#include "hsa.h"
#define HSA_AQLPROFILE_VERSION_MAJOR 2
#define HSA_AQLPROFILE_VERSION_MINOR 0
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Library version
uint32_t hsa_ven_amd_aqlprofile_version_major();
uint32_t hsa_ven_amd_aqlprofile_version_minor();
///////////////////////////////////////////////////////////////////////
// Library API:
// The library provides helper methods for instantiation of
// the profile context object and for populating of the start
// and stop AQL packets. The profile object contains a profiling
// events list and needed for profiling buffers descriptors,
// a command buffer and an output data buffer. To check if there
// was an error the library methods return a status code. Also
// the library provides methods for querying required buffers
// attributes, to validate the event attributes and to get profiling
// output data.
//
// Returned status:
// hsa_status_t HSA status codes are used from hsa.h header
//
// Supported profiling features:
//
// Supported profiling events
typedef enum {
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
} hsa_ven_amd_aqlprofile_event_type_t;
// Supported performance counters (PMC) blocks
// The block ID is the same for a block instances set, for example
// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
typedef enum {
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
// Memory related blocks
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
// System blocks
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
// GFX10 added blocks
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,
// UMC & MMEA System Blocks
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,
HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
} hsa_ven_amd_aqlprofile_block_name_t;
// PMC event object structure
// counter_id value is specified in GFXIPs perfcounter user guides
// which is the counters select value, “Performance Counters Selection”
// chapter.
typedef struct {
hsa_ven_amd_aqlprofile_block_name_t block_name;
uint32_t block_index;
uint32_t counter_id;
} hsa_ven_amd_aqlprofile_event_t;
// Check if event is valid for the specific GPU
hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
hsa_agent_t agent, // HSA handle for the profiling GPU
const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
bool* result); // [out] True if the event valid, False otherwise
// Profiling parameters
// All parameters are generic and if not applicable for a specific
// profile configuration then error status will be returned.
typedef enum {
/**
* Select the target compute unit (wgp) for profiling.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
/**
* VMID Mask
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
/**
* Legacy. Deprecated.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
/**
* Legacy. Deprecated.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
/**
* Legacy. Deprecated.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
/**
* Shader engine mask for selection.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
/**
* Legacy. Deprecated.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
/**
* Legacy. Deprecated.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
/**
* Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
/**
* Set true for occupancy collection only.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
/**
* ATT collection max data size, in MB. Shared among shader engines.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
/**
* Mask of which compute units to generate perfcounters. GFX9 only.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
/**
* Select collection period for perfcounters. GFX9 only.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
/**
* Select perfcounter ID (SQ block) for collection. GFX9 only.
*/
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
} hsa_ven_amd_aqlprofile_parameter_name_t;
// Profile parameter object
typedef struct {
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
uint32_t value;
} hsa_ven_amd_aqlprofile_parameter_t;
typedef enum {
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
} hsa_ven_amd_aqlprofile_att_marker_channel_t;
//
// Profile context object:
// The library provides a profile object structure which contains
// the events array, a buffer for the profiling start/stop commands
// and a buffer for the output data.
// The buffers are specified by the buffer descriptors and allocated
// by the application. The buffers allocation attributes, the command
// buffer size, the PMC output buffer size as well as profiling output
// data can be get using the generic get profile info helper _get_info.
//
// Buffer descriptor
typedef struct {
void* ptr;
uint32_t size;
} hsa_ven_amd_aqlprofile_descriptor_t;
// Profile context object structure, contains profiling events list and
// needed for profiling buffers descriptors, a command buffer and
// an output data buffer
typedef struct {
hsa_agent_t agent; // GFXIP handle
hsa_ven_amd_aqlprofile_event_type_t type; // Events type
const hsa_ven_amd_aqlprofile_event_t* events; // Events array
uint32_t event_count; // Events count
const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
uint32_t parameter_count; // Parameters count
hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
} hsa_ven_amd_aqlprofile_profile_t;
//
// AQL packets populating methods:
// The helper methods to populate provided by the application START and
// STOP AQL packets which the application is required to submit before and
// after profiled GPU task packets respectively.
//
// AQL Vendor Specific packet which carries a PM4 command
typedef struct {
uint16_t header;
uint16_t pm4_command[27];
hsa_signal_t completion_signal;
} hsa_ext_amd_aql_pm4_packet_t;
// Method to populate the provided AQL packet with profiling start commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_start(
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
// Method to populate the provided AQL packet with profiling stop commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type and a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_stop(
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
// Method to populate the provided AQL packet with profiling read commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type and a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_read(
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
// Legacy devices, PM4 profiling packet size
const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
// Legacy devices, converting the profiling AQL packet to PM4 packet blob
hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
void* data); // [out] PM4 packet blob
// Method to add a marker (correlation ID) into the ATT buffer.
hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet, // [out] profile marker AQL packet
uint32_t data, // [in] Data to be inserted
hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel
//
// Get profile info:
// Generic method for getting various profile info including profile buffers
// attributes like the command buffer size and the profiling PMC results.
// Its implied that all counters are 64bit values.
//
// Profile generic output data:
typedef struct {
uint32_t sample_id; // PMC sample or trace buffer index
union {
struct {
hsa_ven_amd_aqlprofile_event_t event; // PMC event
uint64_t result; // PMC result
} pmc_data;
hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
};
} hsa_ven_amd_aqlprofile_info_data_t;
// ID query type
typedef struct {
const char* name;
uint32_t id;
uint32_t instance_count;
} hsa_ven_amd_aqlprofile_id_query_t;
// Profile attributes
typedef enum {
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
// in info_data object
HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
// in info_data object
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
// by name string using _id_query_t
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
// counters enable command buffer
HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
// counters disable command buffer
} hsa_ven_amd_aqlprofile_info_type_t;
// Definition of output data iterator callback
typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
void* callback_data); // [in,out] data passed to the callback
// Method for getting the profile info
hsa_status_t hsa_ven_amd_aqlprofile_get_info(
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
void* value); // [in,out] returned value
// Method for iterating the events output data
hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
void* data); // [in,out] data passed to the callback
// Return error string
hsa_status_t hsa_ven_amd_aqlprofile_error_string(
const char** str); // [out] pointer on the error string
/**
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
*/
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
/**
* @brief Iterate over all possible event coordinate IDs and their names.
*/
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);
/**
* @brief Iterate over all event coordinates for a given agent_t and event_t.
* @param position A counting sequence indicating callback number.
* @param id Coordinate ID as in _iterate_event_ids.
* @param extent Coordinate extent indicating maximum allowed instances.
* @param coordinate The coordinate, in the range [0,extent-1].
* @param name Coordinate name as in _iterate_event_ids.
* @param userdata Userdata returned from _iterate_event_coord function.
*/
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
int position,
int id,
int extent,
int coordinate,
const char* name,
void* userdata
);
/**
* @brief Iterate over all event coordinates for a given agent_t and event_t.
* @param[in] agent HSA agent.
* @param[in] event The event ID and block ID to iterate for.
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
* @param[in] callback Callback function to return the coordinates.
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
*/
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id,
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
void* userdata
);
/**
* @brief Extension version.
*/
#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
#ifdef HSA_LARGE_MODEL
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
#else
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
#endif
/**
* @brief Extension function table.
*/
typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
const char** str);
hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
hsa_agent_t agent,
const hsa_ven_amd_aqlprofile_event_t* event,
bool* result);
hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
void* data);
hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ven_amd_aqlprofile_info_type_t attribute,
void* value);
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
const hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ven_amd_aqlprofile_data_callback_t callback,
void* data);
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
hsa_ven_amd_aqlprofile_eventname_callback_t
);
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id,
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
void* userdata
);
hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
hsa_ven_amd_aqlprofile_profile_t* profile,
hsa_ext_amd_aql_pm4_packet_t* aql_packet,
uint32_t data,
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
);
} hsa_ven_amd_aqlprofile_1_00_pfn_t;
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
@@ -0,0 +1,667 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// HSA AMD extension for additional loader functionality.
#ifndef HSA_VEN_AMD_LOADER_H
#define HSA_VEN_AMD_LOADER_H
#include "hsa.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/**
* @brief Queries equivalent host address for given @p device_address, and
* records it in @p host_address.
*
*
* @details Contents of memory pointed to by @p host_address would be identical
* to contents of memory pointed to by @p device_address. Only difference
* between the two is host accessibility: @p host_address is always accessible
* from host, @p device_address might not be accessible from host.
*
* If @p device_address already points to host accessible memory, then the value
* of @p device_address is simply copied into @p host_address.
*
* The lifetime of @p host_address is the same as the lifetime of @p
* device_address, and both lifetimes are limited by the lifetime of the
* executable that is managing these addresses.
*
*
* @param[in] device_address Device address to query equivalent host address
* for.
*
* @param[out] host_address Pointer to application-allocated buffer to record
* queried equivalent host address in.
*
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
* null, or @p host_address is null.
*/
hsa_status_t hsa_ven_amd_loader_query_host_address(
const void *device_address,
const void **host_address);
/**
* @brief The storage type of the code object that is backing loaded memory
* segment.
*/
typedef enum {
/**
* Loaded memory segment is not backed by any code object (anonymous), as the
* case would be with BSS (uninitialized data).
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
/**
* Loaded memory segment is backed by the code object that is stored in the
* file.
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
/**
* Loaded memory segment is backed by the code object that is stored in the
* memory.
*/
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
} hsa_ven_amd_loader_code_object_storage_type_t;
/**
* @brief Loaded memory segment descriptor.
*
*
* @details Loaded memory segment descriptor describes underlying loaded memory
* segment. Loaded memory segment is created/allocated by the executable during
* the loading of the code object that is backing underlying memory segment.
*
* The lifetime of underlying memory segment is limited by the lifetime of the
* executable that is managing underlying memory segment.
*/
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
/**
* Agent underlying memory segment is allocated on. If the code object that is
* backing underlying memory segment is program code object, then 0.
*/
hsa_agent_t agent;
/**
* Executable that is managing this underlying memory segment.
*/
hsa_executable_t executable;
/**
* Storage type of the code object that is backing underlying memory segment.
*/
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
* filepath to the code object;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
* accessible pointer to the first byte of the code object.
*/
const void *code_object_storage_base;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
* the filepath to the code object (including null-terminating character);
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
* bytes, of the memory occupied by the code object.
*/
size_t code_object_storage_size;
/**
* If the storage type of the code object that is backing underlying memory
* segment is:
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
* - other, then offset, in bytes, from the beginning of the code object to
* the first byte in the code object data is copied from.
*/
size_t code_object_storage_offset;
/**
* Starting address of the underlying memory segment.
*/
const void *segment_base;
/**
* Size, in bytes, of the underlying memory segment.
*/
size_t segment_size;
} hsa_ven_amd_loader_segment_descriptor_t;
/**
* @brief Either queries loaded memory segment descriptors, or total number of
* loaded memory segment descriptors.
*
*
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
* points to number that exactly matches total number of loaded memory segment
* descriptors, then queries loaded memory segment descriptors, and records them
* in @p segment_descriptors. If @p segment_descriptors is null and @p
* num_segment_descriptors points to zero, then queries total number of loaded
* memory segment descriptors, and records it in @p num_segment_descriptors. In
* all other cases returns appropriate error code (see below).
*
* The caller of this function is responsible for the allocation/deallocation
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
*
* The lifetime of loaded memory segments that are described by queried loaded
* memory segment descriptors is limited by the lifetime of the executable that
* is managing loaded memory segments.
*
* Queried loaded memory segment descriptors are always self-consistent: they
* describe a complete set of loaded memory segments that are being backed by
* fully loaded code objects that are present at the time (i.e. this function
* is blocked until all executable manipulations are fully complete).
*
*
* @param[out] segment_descriptors Pointer to application-allocated buffer to
* record queried loaded memory segment descriptors in. Can be null if @p
* num_segment_descriptors points to zero.
*
* @param[in,out] num_segment_descriptors Pointer to application-allocated
* buffer that contains either total number of loaded memory segment descriptors
* or zero.
*
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
* while @p num_segment_descriptors points to non-zero number, @p
* segment_descriptors is not null while @p num_segment_descriptors points to
* zero, or @p num_segment_descriptors is null.
*
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
* does not point to number that exactly matches total number of loaded memory
* segment descriptors.
*/
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
/**
* @brief Obtains the handle of executable to which the device address belongs.
*
* @details This method should not be used to obtain executable handle by using
* a host address. The executable returned is expected to be alive until its
* destroyed by the user.
*
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
*
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
*
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
* is no exectuable found for this kernel code object.
*/
hsa_status_t hsa_ven_amd_loader_query_executable(
const void *device_address,
hsa_executable_t *executable);
//===----------------------------------------------------------------------===//
/**
* @brief Iterate over the loaded code objects in an executable, and invoke
* an application-defined callback on every iteration.
*
* @param[in] executable Executable.
*
* @param[in] callback Callback to be invoked once per loaded code object. The
* HSA runtime passes three arguments to the callback: the executable, a
* loaded code object, and the application data. If @p callback returns a
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
* traversal stops and
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
* status value.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
/**
* @brief Loaded code object kind.
*/
typedef enum {
/**
* Program code object.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
/**
* Agent code object.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
} hsa_ven_amd_loader_loaded_code_object_kind_t;
/**
* @brief Loaded code object attributes.
*/
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
/**
* The executable in which this loaded code object is loaded. The
* type of this attribute is ::hsa_executable_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
/**
* The kind of this loaded code object. The type of this attribute is
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
/**
* The agent on which this loaded code object is loaded. The
* value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
* attribute is ::hsa_agent_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
/**
* The storage type of the code object reader used to load the loaded code object.
* The type of this attribute is ::uint32_t interpreted as a
* ::hsa_ven_amd_loader_code_object_storage_type_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
/**
* The memory address of the first byte of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
* attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
/**
* The memory size in bytes of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
* attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
/**
* The file descriptor of the code object that was loaaded.
* The value of this attribute is only defined if
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
* attribute is ::int.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
/**
* The signed byte address difference of the memory address at which the code
* object is loaded minus the virtual address specified in the code object
* that is loaded. The value of this attribute is only defined if the
* executable in which the code object is loaded is froozen. The type of this
* attribute is ::int64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
/**
* The base memory address at which the code object is loaded. This is the
* base address of the allocation for the lowest addressed segment of the code
* object that is loaded. Note that any non-loaded segments before the first
* loaded segment are ignored. The value of this attribute is only defined if
* the executable in which the code object is loaded is froozen. The type of
* this attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
/**
* The byte size of the loaded code objects contiguous memory allocation. The
* value of this attribute is only defined if the executable in which the code
* object is loaded is froozen. The type of this attribute is ::uint64_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
/**
* The length of the URI in bytes, not including the NUL terminator. The type
* of this attribute is uint32_t.
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
/**
* The URI name from which the code object was loaded. The type of this
* attribute is a NUL terminated \p char* with the length equal to the value
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
* The URI name syntax is defined by the following BNF syntax:
*
* code_object_uri ::== file_uri | memory_uri
* file_uri ::== "file://" file_path [ range_specifier ]
* memory_uri ::== "memory://" process_id range_specifier
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
* file_path ::== URI_ENCODED_OS_FILE_PATH
* process_id ::== DECIMAL_NUMBER
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
*
* ``number`` is a C integral literal where hexadecimal values are prefixed by
* "0x" or "0X", and octal values by "0".
*
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
* In URI encoding, every character that is not in the regular expression
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
* proceeded by "%". Directories in the path are separated by "/".
*
* ``offset`` is a 0-based byte offset to the start of the code object. For a
* file URI, it is from the start of the file specified by the ``file_path``,
* and if omitted defaults to 0. For a memory URI, it is the memory address
* and is required.
*
* ``size`` is the number of bytes in the code object. For a file URI, if
* omitted it defaults to the size of the file. It is required for a memory
* URI.
*
* ``process_id`` is the identity of the process owning the memory. For Linux
* it is the C unsigned integral decimal literal for the process ID (PID).
*
* For example:
*
* file:///dir1/dir2/file1
* file:///dir3/dir4/file2#offset=0x2000&size=3000
* memory://1234#offset=0x20000&size=3000
*/
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
} hsa_ven_amd_loader_loaded_code_object_info_t;
/**
* @brief Get the current value of an attribute for a given loaded code
* object.
*
* @param[in] loaded_code_object Loaded code object.
*
* @param[in] attribute Attribute to query.
*
* @param[out] value Pointer to an application-allocated buffer where to store
* the value of the attribute. If the buffer passed by the application is not
* large enough to hold the value of @p attribute, the behavior is undefined.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
* invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
* loaded code object attribute, or @p value is NULL.
*/
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
//===----------------------------------------------------------------------===//
/**
* @brief Create a code object reader to operate on a file with size and offset.
*
* @param[in] file File descriptor. The file must have been opened by
* application with at least read permissions prior calling this function. The
* file must contain a vendor-specific code object.
*
* The file is owned and managed by the application; the lifetime of the file
* descriptor must exceed that of any associated code object reader.
*
* @param[in] size Size of the code object embedded in @p file.
*
* @param[in] offset 0-based offset relative to the beginning of the @p file
* that denotes the beginning of the code object embedded within the @p file.
*
* @param[out] code_object_reader Memory location to store the newly created
* code object reader handle. Must not be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
* read permissions. This condition may also be reported as
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
* ::hsa_executable_load_agent_code_object function.
*
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
* do not form a valid code object. If file size is 0. Or offset > file size.
* This condition may also be reported as
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
* ::hsa_executable_load_agent_code_object function.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
* allocate the required resources.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
*/
hsa_status_t
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
//===----------------------------------------------------------------------===//
/**
* @brief Iterate over the available executables, and invoke an
* application-defined callback on every iteration. While
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
* ::hsa_executable_create, ::hsa_executable_create_alt, or
* ::hsa_executable_destroy will be blocked.
*
* @param[in] callback Callback to be invoked once per executable. The HSA
* runtime passes two arguments to the callback: the executable and the
* application data. If @p callback returns a status other than
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
* ::hsa_executable_destroy then the behavior is undefined.
*
* @param[in] data Application data that is passed to @p callback on every
* iteration. May be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t
hsa_ven_amd_loader_iterate_executables(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
//===----------------------------------------------------------------------===//
/**
* @brief Extension version.
*/
#define hsa_ven_amd_loader 001003
/**
* @brief Extension function table version 1.00.
*/
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
} hsa_ven_amd_loader_1_00_pfn_t;
/**
* @brief Extension function table version 1.01.
*/
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
} hsa_ven_amd_loader_1_01_pfn_t;
/**
* @brief Extension function table version 1.02.
*/
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
hsa_status_t
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
} hsa_ven_amd_loader_1_02_pfn_t;
/**
* @brief Extension function table version 1.03.
*/
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
const void *device_address,
const void **host_address);
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
size_t *num_segment_descriptors);
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
const void *device_address,
hsa_executable_t *executable);
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
hsa_executable_t executable,
hsa_status_t (*callback)(
hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object,
void *data),
void *data);
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
hsa_loaded_code_object_t loaded_code_object,
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
void *value);
hsa_status_t
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
hsa_file_t file,
size_t offset,
size_t size,
hsa_code_object_reader_t *code_object_reader);
hsa_status_t
(*hsa_ven_amd_loader_iterate_executables)(
hsa_status_t (*callback)(
hsa_executable_t executable,
void *data),
void *data);
} hsa_ven_amd_loader_1_03_pfn_t;
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* HSA_VEN_AMD_LOADER_H */
@@ -0,0 +1,416 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_VEN_AMD_PC_SAMPLING_H
#define HSA_VEN_AMD_PC_SAMPLING_H
#include "hsa.h"
#ifdef __cplusplus
extern "C" {
#endif /*__cplusplus*/
/**
* @brief HSA AMD Vendor PC Sampling APIs
* EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
* modified extensively in the future
*/
/**
* @brief PC Sampling sample data for hosttrap sampling method
*/
typedef struct {
uint64_t pc;
uint64_t exec_mask;
uint32_t workgroup_id_x;
uint32_t workgroup_id_y;
uint32_t workgroup_id_z;
uint32_t wave_in_wg : 6;
uint32_t chiplet : 3; // Currently not used
uint32_t reserved : 23;
uint32_t hw_id;
uint32_t reserved0;
uint64_t reserved1;
uint64_t timestamp;
uint64_t correlation_id;
} perf_sample_hosttrap_v1_t;
/**
* @brief PC Sampling sample data for stochastic sampling method
*/
typedef struct {
uint64_t pc;
uint64_t exec_mask;
uint32_t workgroup_id_x;
uint32_t workgroup_id_y;
uint32_t workgroup_id_z;
uint32_t wave_in_wg : 6;
uint32_t chiplet : 3; // Currently not used
uint32_t reserved : 23;
uint32_t hw_id;
uint32_t perf_snapshot_data;
uint32_t perf_snapshot_data1;
uint32_t perf_snapshot_data2;
uint64_t timestamp;
uint64_t correlation_id;
} perf_sample_snapshot_v1_t;
/**
* @brief PC Sampling method kinds
*/
typedef enum {
HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
} hsa_ven_amd_pcs_method_kind_t;
/**
* @brief PC Sampling interval unit type
*/
typedef enum {
HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
} hsa_ven_amd_pcs_units_t;
/**
* @brief HSA callback function to perform the copy onto a destination buffer
*
* If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
* buffers. Remaining contents of HSA internal buffers will be included in next
* hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
* hsa_ven_amd_pcs_flush.
*
* @param[in] hsa_callback_data private data to pass back to HSA. Provided in
* hsa_ven_amd_pcs_data_ready_callback_t
*
* @param[in] data_size size of destination buffer in bytes.
* @param[in] destination destination buffer
* @retval TBD: but could be used to indicate that there is no more data to be read.
* Or indicate an error and abort of current copy operations
*/
typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
size_t data_size, void* destination);
/**
* @brief HSA callback function to to indicate that there is data ready to be copied
*
* When the client receives this callback, the client should call back @p data_copy_callback for HSA
* to perform the copy operation into an available buffer. @p data_copy_callback can be called back
* multiple times with smaller @p data_size to split the copy operation.
*
* This callback must not call ::hsa_ven_amd_pcs_flush.
*
* @param[in] client_callback_data client private data passed in via
* hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
* @param[in] data_size size of data available to be copied
* @param[in] lost_sample_count number of lost samples since last call to
* hsa_ven_amd_pcs_data_ready_callback_t.
* @param[in] data_copy_callback callback function for HSA to perform the actual copy
* @param[in] hsa_callback_data private data to pass back to HSA
*/
typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
void* client_callback_data, size_t data_size, size_t lost_sample_count,
hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);
/**
* @brief Opaque handle representing a sampling session.
* Two sessions having same handle value represent the same session
*/
typedef struct {
uint64_t handle;
} hsa_ven_amd_pcs_t;
/**
* @brief PC Sampling configuration flag options
*/
typedef enum {
/* The interval for this sampling method have to be a power of 2 */
HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
} hsa_ven_amd_pcs_configuration_flags_t;
/**
* @brief PC Sampling method information
* Used to provide client with list of supported PC Sampling methods
*/
typedef struct {
hsa_ven_amd_pcs_method_kind_t method;
hsa_ven_amd_pcs_units_t units;
size_t min_interval;
size_t max_interval;
uint64_t flags;
} hsa_ven_amd_pcs_configuration_t;
/**
* @brief Callback function to iterate through list of supported PC Sampling configurations
*
* @param[in] configuration one entry for supported PC Sampling method and configuration options
* @param[in] callback_data client private callback data that was passed in when calling
* hsa_ven_amd_pcs_iterate_configuration
*/
typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);
/**
* @brief Iterate through list of current supported PC Sampling configurations for this @p agent
*
* HSA will callback @p configuration_callback for each currently available PC Sampling
* configuration. The list of currently available configurations may not be the complete list of
* configurations supported on the @p agent. The list of currently available configurations may be
* reduced if the @p agent is currently handling other PC sampling sessions.
*
* @param[in] agent target agent
* @param[in] configuration_callback callback function to iterate through list of configurations
* @param[in] callback_data client private callback data
**/
hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
void* callback_data);
/**
* @brief Create a PC Sampling session on @p agent
*
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
* parameters must be a legal configuration value, as described by the
* hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
* hsa_ven_amd_pcs_iterate_configuration for this @p agent.
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
* on what types of PC sampling they can perform concurrently.
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
* The session will be in a stopped/inactive state after this call
*
* @param[in] agent target agent
* @param[in] method method to use
* @param[in] units sampling units
* @param[in] interval sampling interval in @p units
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
* buffers are full. The value of latency can be 0.
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
* @param[in] data_ready_callback client callback function that will be called when:
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
* within @p latency period.
* OR
* 2. When hsa_ven_amd_pcs_flush is called.
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
* is called.
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
*
* @retval ::HSA_STATUS_SUCCESS session created successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
* cannot handle the type requested.
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
* @retval ::HSA_STATUS_ERROR Unexpected error
**/
hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
size_t buffer_size,
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);
/**
* @brief Creates a PC Sampling session on @p agent. Assumes that the caller provides the
* @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
* on the @p agent.
*
* Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
* PC sampling session that was previously created in the underlying driver.
*
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
* parameters must be a legal configuration value, and match the parameters that we used to create
* the underlying PC Sampling session in the underlying driver.
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
* on what types of PC sampling they can perform concurrently.
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
* The session will be in a stopped/inactive state after this call
*
* @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
* @param[in] agent target agent
* @param[in] method method to use
* @param[in] units sampling units
* @param[in] interval sampling interval in @p units
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
* buffers are full. The value of latency can be 0.
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
* @param[in] data_ready_callback client callback function that will be called when:
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
* within @p latency period.
* OR
* 2. When hsa_ven_amd_pcs_flush is called.
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
* is called.
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
*
* @retval ::HSA_STATUS_SUCCESS session created successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
* cannot handle the type requested.
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
* @retval ::HSA_STATUS_ERROR Unexpected error
**/
hsa_status_t hsa_ven_amd_pcs_create_from_id(
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
hsa_ven_amd_pcs_t* pc_sampling);
/**
* @brief Free a PC Sampling session on @p agent
*
* Free all the resources allocated for a PC Sampling session on @p agent
* Internal buffers for this session will be lost.
* If the session was active, the session will be stopped before it is destroyed.
*
* @param[in] pc_sampling PC sampling session handle
*
* @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
* @retval ::HSA_STATUS_ERROR unexpected error
*/
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
/**
* @brief Start a PC Sampling session
*
* Activate a PC Sampling session that was previous created.
* The session with be in a active state after this call
* If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
*
* @param[in] pc_sampling PC sampling session handle
*
* @retval ::HSA_STATUS_SUCCESS Session started successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
* @retval ::HSA_STATUS_ERROR unexpected error
*/
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
/**
* @brief Stop a PC Sampling session
*
* Stop a session that is currently active
* After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
* The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
* buffers are not drained and the session is started again, the internal buffers will be available
* on the next data_ready_callback.
* If the session was already inactive, this will result in a no-op and will return
* HSA_STATUS_SUCCESS
*
* @param[in] pc_sampling PC sampling session handle
*
* @retval ::HSA_STATUS_SUCCESS Session stopped successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
*/
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
/**
* @brief Flush internal buffers for a PC Sampling session
*
* Drain internal buffers for a PC Sampling session. If internal buffers have available data,
* this trigger a data_ready_callback.
*
* The function blocks until all PC samples associated with the @p pc_sampling session
* generated prior to the function call have been communicated by invocations of
* @p data_ready_callback having completed execution.
*
* @param[in] pc_sampling PC sampling session handle
*
* @retval ::HSA_STATUS_SUCCESS Session flushed successfully
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
*/
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
#define hsa_ven_amd_pc_sampling_1_00
/**
* @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
* ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
*/
typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
void* callback_data);
hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
hsa_ven_amd_pcs_units_t units, size_t interval,
size_t latency, size_t buffer_size,
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
void* client_callback_data,
hsa_ven_amd_pcs_t* pc_sampling);
hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
hsa_ven_amd_pcs_t* pc_sampling);
hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);
hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);
} hsa_ven_amd_pc_sampling_1_00_pfn_t;
#ifdef __cplusplus
} // end extern "C" block
#endif /*__cplusplus*/
#endif /* HSA_VEN_AMD_PC_SAMPLING_H */
A apresentação das diferenças no ficheiro foi suprimida por ser demasiado grande Carregar diff
+363
Ver ficheiro
@@ -0,0 +1,363 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// This file is used only for open source cmake builds, if we hardcode the
// register values in amd_aql_queue.cpp then this file won't be required. For
// now we are using this file where register details are spelled out in the
// structs/unions below.
#ifndef _WSL_INC_REGISTERS_H_
#define _WSL_INC_REGISTERS_H_
typedef enum SQ_RSRC_BUF_TYPE {
SQ_RSRC_BUF = 0x00000000,
SQ_RSRC_BUF_RSVD_1 = 0x00000001,
SQ_RSRC_BUF_RSVD_2 = 0x00000002,
SQ_RSRC_BUF_RSVD_3 = 0x00000003,
} SQ_RSRC_BUF_TYPE;
typedef enum BUF_DATA_FORMAT {
BUF_DATA_FORMAT_INVALID = 0x00000000,
BUF_DATA_FORMAT_8 = 0x00000001,
BUF_DATA_FORMAT_16 = 0x00000002,
BUF_DATA_FORMAT_8_8 = 0x00000003,
BUF_DATA_FORMAT_32 = 0x00000004,
BUF_DATA_FORMAT_16_16 = 0x00000005,
BUF_DATA_FORMAT_10_11_11 = 0x00000006,
BUF_DATA_FORMAT_11_11_10 = 0x00000007,
BUF_DATA_FORMAT_10_10_10_2 = 0x00000008,
BUF_DATA_FORMAT_2_10_10_10 = 0x00000009,
BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a,
BUF_DATA_FORMAT_32_32 = 0x0000000b,
BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c,
BUF_DATA_FORMAT_32_32_32 = 0x0000000d,
BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e,
BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f,
} BUF_DATA_FORMAT;
typedef enum BUF_NUM_FORMAT {
BUF_NUM_FORMAT_UNORM = 0x00000000,
BUF_NUM_FORMAT_SNORM = 0x00000001,
BUF_NUM_FORMAT_USCALED = 0x00000002,
BUF_NUM_FORMAT_SSCALED = 0x00000003,
BUF_NUM_FORMAT_UINT = 0x00000004,
BUF_NUM_FORMAT_SINT = 0x00000005,
BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006,
BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006,
BUF_NUM_FORMAT_FLOAT = 0x00000007,
} BUF_NUM_FORMAT;
typedef enum BUF_FORMAT {
BUF_FORMAT_32_UINT = 0x00000014,
} BUF_FORMAT;
typedef enum SQ_SEL_XYZW01 {
SQ_SEL_0 = 0x00000000,
SQ_SEL_1 = 0x00000001,
SQ_SEL_RESERVED_0 = 0x00000002,
SQ_SEL_RESERVED_1 = 0x00000003,
SQ_SEL_X = 0x00000004,
SQ_SEL_Y = 0x00000005,
SQ_SEL_Z = 0x00000006,
SQ_SEL_W = 0x00000007,
} SQ_SEL_XYZW01;
union COMPUTE_TMPRING_SIZE {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 13;
unsigned int : 7;
#elif defined(BIGENDIAN_CPU)
unsigned int : 7;
unsigned int WAVESIZE : 13;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union COMPUTE_TMPRING_SIZE_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 15;
unsigned int : 5;
#elif defined(BIGENDIAN_CPU)
unsigned int : 5;
unsigned int WAVESIZE : 15;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union COMPUTE_TMPRING_SIZE_GFX12 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 18;
unsigned int : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int : 2;
unsigned int WAVESIZE : 18;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD0 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int BASE_ADDRESS : 32;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD1 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : 16;
unsigned int STRIDE : 14;
unsigned int CACHE_SWIZZLE : 1;
unsigned int SWIZZLE_ENABLE : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int SWIZZLE_ENABLE : 1;
unsigned int CACHE_SWIZZLE : 1;
unsigned int STRIDE : 14;
unsigned int BASE_ADDRESS_HI : 16;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD1_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : 16;
unsigned int STRIDE : 14;
unsigned int SWIZZLE_ENABLE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int SWIZZLE_ENABLE : 2;
unsigned int STRIDE : 14;
unsigned int BASE_ADDRESS_HI : 16;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD2 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int NUM_RECORDS : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int NUM_RECORDS : 32;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD3 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int NUM_FORMAT : 3;
unsigned int DATA_FORMAT : 4;
unsigned int ELEMENT_SIZE : 2;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int ATC__CI__VI : 1;
unsigned int HASH_ENABLE : 1;
unsigned int HEAP : 1;
unsigned int MTYPE__CI__VI : 3;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int MTYPE__CI__VI : 3;
unsigned int HEAP : 1;
unsigned int HASH_ENABLE : 1;
unsigned int ATC__CI__VI : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int ELEMENT_SIZE : 2;
unsigned int DATA_FORMAT : 4;
unsigned int NUM_FORMAT : 3;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD3_GFX10 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 7;
unsigned int RESERVED1 : 2;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int RESOURCE_LEVEL : 1;
unsigned int RESERVED2 : 3;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int RESERVED2 : 3;
unsigned int RESOURCE_LEVEL : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 2;
unsigned int FORMAT : 7;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
// From V# Table
union SQ_BUF_RSRC_WORD3_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 6;
unsigned int RESERVED1 : 3;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int RESERVED2 : 4;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int RESERVED2 : 4;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 3;
unsigned int FORMAT : 6;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
// From V# Table
union SQ_BUF_RSRC_WORD3_GFX12 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 6;
unsigned int RESERVED1 : 3;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int WRITE_COMPRESS_ENABLE : 1;
unsigned int COMPRESSION_EN : 1;
unsigned int COMPRESSION_ACCESS_MODE : 2;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int COMPRESSION_ACCESS_MODE : 2;
unsigned int COMPRESSION_EN : 1;
unsigned int WRITE_COMPRESS_ENABLE : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 3;
unsigned int FORMAT : 6;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
#endif // header guard
@@ -0,0 +1,122 @@
#ifndef _WSL_INC_THUNK_PROXY_H_
#define _WSL_INC_THUNK_PROXY_H_
#include <vector>
namespace thunk_proxy {
enum AllocDomain {
kSystem,
kLocal,
kUserMemory,
kUserQueue,
kDomainCount,
};
enum MemFlag {
kFineGrain = (1ULL << 0),
kKernarg = (1ULL << 1),
};
enum EngineFlag {
KCOMPUTE0 = (1ULL << 0),
KDRMDMA = (1ULL << 1),
KDRMDMA1 = (1ULL << 2),
};
enum SchedLevel {
kLow = 0,
kNormal = 1,
kHigh = 2,
};
struct HwsInfo {
union {
struct {
uint32_t gfxHwsEnabled : 1;
uint32_t computeHwsEnabled : 1;
uint32_t dmaHwsEnabled : 1;
uint32_t dma1HwsEnabled : 1;
uint32_t reserved : 28;
} hwsMask;
uint32_t osHwsEnableFlags;
};
uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS
};
typedef struct {
int major;
int minor;
int stepping;
bool is_dgpu;
char product_name[MAX_PATH];
uint64_t uuid;
uint32_t family;
uint32_t device_id;
uint32_t wavefront_size;
uint32_t compute_unit_count;
uint32_t max_engine_clock_mhz;
uint32_t watch_points_num;
uint32_t pci_bus_addr;
uint32_t memory_bus_width;
uint32_t max_memory_clock_mhz;
uint64_t gpu_counter_frequency;
uint32_t wave_per_cu;
uint32_t simd_per_cu;
uint32_t max_scratch_slots_per_cu;
uint32_t num_shader_engine;
uint32_t shader_array_per_shader_engine;
uint32_t domain;
uint32_t num_gws;
uint32_t asic_revision;
uint64_t local_visible_heap_size;
uint64_t local_invisible_heap_size;
uint64_t non_local_heap_size;
uint64_t private_aperture_base;
uint64_t private_aperture_size;
uint64_t shared_aperture_base;
uint64_t shared_aperture_size;
uint32_t user_queue_size;
uint32_t lds_size;
uint32_t big_page_alignment_size;
uint32_t hw_big_page_min_alignment_size;
uint32_t hw_big_page_alignment_size;
bool enable_big_page_alignment;
uint32_t mec_fw_version;
uint32_t sdma_fw_version;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
uint32_t l3_cache_size;
uint32_t gl2_cacheline_size;
uint32_t num_cp_queues;
HwsInfo hwsInfo;
std::vector<int> sdma_schedid;
uint32_t compute_schedid;
bool state_shadowing_by_cpfw;
bool platform_atomic_support;
void *adapter_info;
uint32_t kmd_version;
} DeviceInfo;
int EngineOrdinal(int engine, DeviceInfo *device_info);
bool GetHwsEnabled(int engine, DeviceInfo *device_info);
bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info);
bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info);
bool QueryAdapterSupported(unsigned int device_id);
uint32_t QueueEngine2EngineFlag(uint32_t queue_engine);
void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain,
uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info);
void GetAllocPrivDataSize(int *priv_drv_data_size, int *priv_alloc_data_size);
void FillinAllocPrivDrvData(void *drv_priv, int priv_alloc_data_size);
int GetSubmitPrivDataSize();
void FillinSubmitPrivData(void *priv_data, D3DKMT_HANDLE queue, uint64_t command_addr,
uint64_t command_size, bool is_hw_queue);
int GetHwQueuePrivDataSize();
void FillinHwQueuePrivData(void *priv_data, bool FwManagedGfxState, SchedLevel level = kNormal);
int GetContextPrivDataSize();
void FillinContextPrivData(void *priv_data, bool FwManagedGfxState);
int GetPowerOptPrivDataSize();
void FillinPowerOptPrivData(void *priv_data, bool restore);
}
#endif
@@ -0,0 +1,169 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
#define _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
#include <stdint.h>
#include <no_sal2.h>
typedef uint32_t UINT, *UINT_PTR;
typedef int32_t INT32;
typedef int32_t LONG;
typedef uint32_t ULONG, *ULONG_PTR;
typedef int64_t LONGLONG;
typedef int64_t LONG64;
typedef uint64_t ULONGLONG;
typedef uint64_t ULONG64, *ULONG64_PTR;
typedef uint8_t BYTE;
typedef uint16_t WORD;
typedef uint32_t DWORD;
typedef int32_t BOOL;
typedef int32_t NTSTATUS;
typedef uint16_t USHORT;
typedef uint16_t UINT16;
typedef uint32_t UINT32;
typedef uint64_t UINT64;
typedef int32_t INT;
typedef uint64_t SIZE_T;
typedef void VOID;
typedef float FLOAT;
typedef char CHAR;
typedef unsigned char UCHAR;
typedef UCHAR BOOLEAN;
typedef int16_t WCHAR;
typedef void *HANDLE;
typedef void *PVOID;
typedef void *LPVOID;
typedef const int16_t *PCWSTR;
#define ULONG ULONG
#define ULONG_PTR ULONG_PTR
#define USHORT USHORT
#define DECLARE_HANDLE(name) struct name##__{int unused;}; typedef struct name##__ *name
#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1]
DECLARE_HANDLE(HWND);
DECLARE_HANDLE(HDC);
DECLARE_HANDLE(PALETTEENTRY);
typedef struct tagPOINT {
LONG x;
LONG y;
} POINT;
typedef struct tagRECT {
LONG left;
LONG top;
LONG right;
LONG bottom;
} RECT;
typedef struct tagRECTL {
LONG left;
LONG top;
LONG right;
LONG bottom;
} RECTL;
typedef union _LARGE_INTEGER {
struct {
DWORD LowPart;
DWORD HighPart;
} u;
LONGLONG QuadPart;
} LARGE_INTEGER;
typedef LARGE_INTEGER *PLARGE_INTEGER;
typedef union _ULARGE_INTEGER {
struct {
ULONG LowPart;
ULONG HighPart;
} DUMMYSTRUCTNAME;
struct {
ULONG LowPart;
ULONG HighPart;
} u;
ULONGLONG QuadPart;
} ULARGE_INTEGER;
typedef ULARGE_INTEGER *PULARGE_INTEGER;
typedef struct _LUID {
ULONG LowPart;
LONG HighPart;
} LUID, *PLUID;
typedef enum _DEVICE_POWER_STATE {
PowerDeviceUnspecified = 0,
PowerDeviceD0,
PowerDeviceD1,
PowerDeviceD2,
PowerDeviceD3,
PowerDeviceMaximum
} DEVICE_POWER_STATE, *PDEVICE_POWER_STATE;
#define _Check_return_
#define APIENTRY
#define CONST const
#define IN
#define OUT
#define FAR
#define MAX_PATH 260
#define __stdcall
#ifndef GUID_DEFINED
#define GUID_DEFINED
typedef struct _GUID {
uint32_t Data1;
uint16_t Data2;
uint16_t Data3;
uint8_t Data4[ 8 ];
} GUID;
#endif
#include <guiddef.h>
#endif
@@ -0,0 +1,82 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */
#ifndef _WSL_INC_WDDM_CMD_UTIL_H_
#define _WSL_INC_WDDM_CMD_UTIL_H_
#include <string.h>
#include "impl/hsa/hsa.h"
#include "impl/hsa/amd_hsa_queue.h"
#include "impl/hsa/amd_hsa_kernel_code.h"
#include "impl/pm4_cmds.h"
#include "util/utils.h"
namespace wsl {
namespace thunk {
struct DispatchInfo {
uint8_t major;
hsa_kernel_dispatch_packet_t *pPacket;
void *pEntry;
const amd_kernel_code_t *pKernelObject;
uint32_t ldsBlks;
amd_queue_v2_t *pAmdQueue;
bool wave32;
uint32_t srd;
void *pScratchBase;
uint32_t scratchSizePerWave;
uint32_t scratchBaseOffset[2];
uint32_t offsetCnt;
};
class CmdUtil {
public:
CmdUtil() {};
~CmdUtil() {};
static size_t BuildCopyData(
uint64_t *pDstAddr,
void *pBuffer,
uint32_t dstSel = dst_sel__mec_copy_data__tc_l2,
uint32_t dstCachePolicy = dst_cache_policy__mec_copy_data__stream,
uint32_t srcSel = src_sel__mec_copy_data__gpu_clock_count,
uint32_t srcCachePolicy = src_cache_policy__mec_copy_data__lru,
uint32_t countSel = count_sel__mec_copy_data__64_bits_of_data,
uint32_t wrConfirm = wr_confirm__mec_copy_data__wait_for_confirmation);
static size_t BuildBarrier(
void *pBuffer,
uint32_t eventIndex = event_index__mec_event_write__cs_partial_flush,
uint32_t eventType = CS_PARTIAL_FLUSH);
static size_t BuildWriteData64Command(
void *pBuffer,
uint64_t* write_addr,
uint64_t write_value);
static size_t BuildAcquireMem(
uint8_t major,
void *pBuffer);
static size_t BuildScratch(
void *pScratchBase,
void *pBuffer);
static size_t BuildComputeShaderParams(
void *pBuffer);
static size_t BuildDispatch(
struct DispatchInfo *pInfo,
void *pBuffer);
static size_t BuildAtomicMem(
uint64_t *pAddr,
uint32_t atomic,
void *pBuffer,
uint32_t cachePolicy = cache_policy__mec_atomic_mem__stream,
uint64_t srcData = 1);
};
} // namespace thunk
} // namespace wsl
#endif
+246
Ver ficheiro
@@ -0,0 +1,246 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_DEVICE_H_
#define _WSL_INC_WDDM_DEVICE_H_
#include <cassert>
#include <ntstatus.h>
#include <atomic>
#include <memory>
#include <vector>
#include "impl/wddm/types.h"
#include "impl/thunk_proxy/thunk_proxy.h"
#include "impl/wddm/va_mgr.h"
#include "impl/wddm/status.h"
#include "impl/wddm/types.h"
#include "impl/wddm/gpu_memory.h"
#include "impl/wddm/cmd_util.h"
namespace wsl {
namespace thunk {
//class Queue;
class WDDMQueue;
// WSL2 hyperv GPADL protocol limitation
#define MAX_USERPTR_BLOCK_SIZE 0xf0000000
#define START_NON_CANONICAL_ADDR (1ULL << 47)
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))
#define IS_OVERLAPPING(start1, size1, start2, size2) \
((start1 < (start2 + size2)) && (start2 < (start1 + size1)))
struct SegmentInfo {
uint32_t segment_id;
uint32_t segment_type; // 0=aperture, 1=gpu memory, 2=system memory
bool aperture;
bool system_memory;
uint64_t commit_limit;
SegmentInfo()
: segment_id(0), segment_type(0), aperture(false),
system_memory(false), commit_limit(0) {}
};
class WDDMDevice {
public:
static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB
WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_id);
~WDDMDevice();
int NodeId() const { return node_id_; }
int Major() { return device_info_.major; }
int Minor() { return device_info_.minor; }
int Stepping() { return device_info_.stepping; }
bool IsDgpu() { return device_info_.is_dgpu; }
const char *ProductName() { return device_info_.product_name; }
uint64_t Uuid() { return device_info_.uuid; }
uint32_t GfxFamily() { return device_info_.family; }
uint32_t DeviceId() { return device_info_.device_id; }
uint32_t WavefrontSize() { return device_info_.wavefront_size; }
uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; }
uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; }
uint32_t WatchPointsNum() { return device_info_.watch_points_num; }
uint32_t PciBusAddr() { return device_info_.pci_bus_addr; }
uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; }
uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; }
uint32_t WavePerCu() { return device_info_.wave_per_cu; }
uint32_t SimdPerCu() { return device_info_.simd_per_cu; }
uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; }
uint32_t NumShaderEngine() { return device_info_.num_shader_engine; }
uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; }
uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); }
uint32_t Domain() { return device_info_.domain; }
uint32_t NumGws() { return device_info_.num_gws; }
uint32_t AsicRevision() { return device_info_.asic_revision; }
uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; }
uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; }
uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; }
uint64_t NonLocalHeapSize() { return device_info_.non_local_heap_size; }
uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; }
uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; }
uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; }
uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; }
uint32_t LdsSize() { return device_info_.lds_size; }
uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; }
uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; }
uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; }
uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; }
uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; }
uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; }
uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; }
uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; }
bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; }
bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; }
uint32_t GetSdmaEngine(uint32_t idx) {
assert(idx < NumSdmaEngine());
return device_info_.sdma_schedid[idx];
}
uint32_t GetComputeEngine() { return device_info_.compute_schedid; }
uint64_t VramAvail();
void GetClockCounters(uint64_t *gpu, uint64_t *cpu);
uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; }
bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr);
void DestroySyncobj(D3DKMT_HANDLE handle);
bool CreateQueue(WDDMQueue *queue);
void DestroyQueue(WDDMQueue *queue);
bool CreateHwQueue(WDDMQueue *queue);
bool DestroyHwQueue(WDDMQueue *queue);
bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
uint64_t command_size, uint64_t fence_value);
bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
uint64_t command_size, uint64_t fence_value);
bool WaitPagingFence(WDDMQueue *queue) {
uint64_t value = page_fence_value_;
if (*page_fence_addr_ < value &&
!GpuWait(queue, &page_syncobj_, &value, 1))
return false;
return true;
}
bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
uint64_t *values, int count);
bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
uint64_t *value, int count);
bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
int count, bool wait_any);
bool WaitOnPagingFenceFromCpu();
uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt);
uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; }
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; }
static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; }
// Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory,
// return false by default
bool AllocUserQueueMemFromUMD(void) const { return false; }
bool IsHwsEnabled(int engine) {
return thunk_proxy::GetHwsEnabled(engine, &device_info_);
}
void UpdatePageFence(uint64_t fence_value);
D3DKMT_HANDLE PagingQueue() const { return page_queue_; }
D3DKMT_HANDLE PagingFence() const { return page_syncobj_; }
D3DKMT_HANDLE DeviceHandle() const { return device_; }
LUID GetLuid() const { return adapter_luid_; }
D3DKMT_HANDLE GetAdapter() const { return adapter_; }
const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; }
ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem, gpusize *gpu_va = nullptr);
private:
bool ParseDeviceInfo(void);
void DestroyDeviceInfo(void);
bool CreateDevice(void);
bool DestroyDevice(void);
bool CreatePagingQueue(void);
bool DestroyPagingQueue(void);
void *Lock(D3DKMT_HANDLE handle);
bool Unlock(D3DKMT_HANDLE handle);
bool CreateContext(int engine, D3DKMT_HANDLE *handle);
bool DestroyContext(D3DKMT_HANDLE handle);
void SetPowerOptimization(bool restore);
void InitCmdbufInfo(void);
bool QuerySegmentInfo();
bool GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type, uint32_t &segment_id);
D3DKMT_HANDLE adapter_;
LUID adapter_luid_;
D3DKMT_HANDLE device_;
D3DKMT_HANDLE page_queue_;
D3DKMT_HANDLE page_syncobj_;
uint64_t *page_fence_addr_;
std::atomic<uint64_t> page_fence_value_;
uint32_t cmdbuf_size_;
uint32_t cmdbuf_aql_frame_size_;
static const uint32_t cmdbuf_aql_frame_num_;
uint32_t node_id_;
// device info
thunk_proxy::DeviceInfo device_info_;
std::vector<struct SegmentInfo> segment_infos_;
//CmdUtil cmd_util;
};
NTSTATUS WDDMCreateDevices(std::vector<WDDMDevice *> &devices);
} // namespace thunk
} // namespace wsl
#endif
@@ -0,0 +1,249 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_GPU_MEMORY_H_
#define _WSL_INC_WDDM_GPU_MEMORY_H_
#include <cstddef>
#include <cstdint>
#include "util/utils.h"
#include "impl/wddm/types.h"
#include "impl/wddm/thunks.h"
#include "impl/thunk_proxy/thunk_proxy.h"
namespace wsl {
namespace thunk {
class WDDMDevice;
union GpuMemoryCreateFlags {
struct {
uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer
uint64_t physical_only : 1; // only allocate physical buffer, without virutal address
uint64_t interprocess : 1; // physical buffer need share info between exporter and importer
uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area
uint64_t physical_contiguous : 1; // contiguous physical pages
uint64_t sysmem_ipc_sig_importer : 1; // allocate system memory for IPC signal
uint64_t sysmem_ipc_sig_exporter : 1; // allocate system memory for IPC signal, prepare to export
uint64_t alloc_va : 1; // allocate va. 0 for vmem import
uint64_t blit_kernel_object : 1; // allocate executable blit kernel object
uint64_t unused : 55;
};
uint64_t reserved;
};
union GpuMemoryDescFlags {
struct {
uint32_t is_virtual : 1;
uint32_t is_shared : 1;
uint32_t is_external : 1;
uint32_t is_physical_only : 1;
uint32_t is_locked : 1;
uint32_t is_queue_referenced : 1;
uint32_t is_physical_contiguous : 1;
uint32_t is_imported_sys_memfd : 1; // 0 - ignored; 1 - va from system heap
uint32_t is_sysmem_exporter : 1; // allocate system memory for IPC signal, prepare to export
uint32_t is_va_required :1;
uint32_t is_imported_vram_vmem :1;
uint32_t is_imported_vram_ipc :1;
uint32_t is_imported_from_same_process : 3; // imported from same process, record shared cnt
uint32_t is_blit_kernel_object : 1; // blit kernel object
uint32_t unused : 16;
};
uint32_t reserved;
};
struct GpuMemoryCreateInfo {
GpuMemoryCreateInfo() {
flags.reserved = 0;
domain = thunk_proxy::kLocal;
size = 0;
alignment = 0;
mem_flags = 0;
engine_flag = 0;
va_hint = 0;
user_ptr = nullptr;
dmabuf_fd = -1;
}
GpuMemoryCreateFlags flags;
thunk_proxy::AllocDomain domain;
gpusize size;
gpusize alignment;
int mem_flags;
int engine_flag;
int dmabuf_fd; // Import from dmabuf
void *user_ptr;
gpusize va_hint;
};
struct GpuMemoryDesc {
GpuMemoryDesc() {
gpu_addr = 0;
cpu_addr = nullptr;
client_size = 0;
size = alignment = 0;
flags.reserved = 0;
mem_flags = 0;
engine_flag = 0;
handle_ape_addr = 0;
}
thunk_proxy::AllocDomain domain;
LUID adapter_luid; // Where is the backing store location
gpusize gpu_addr;
void *cpu_addr;
gpusize client_size; // user request size
gpusize size;
gpusize alignment;
gpusize handle_ape_addr;
GpuMemoryDescFlags flags;
int mem_flags;
int engine_flag;
};
struct SharedHandleInfo {
thunk_proxy::AllocDomain domain;
LUID adapter_luid;
gpusize client_size; // user request size
uint64_t size;
uint32_t flags;
int mem_flags;
pid_t pid;
gpusize gpu_addr;
};
using GpuMemoryHandle = void *;
class GpuMemory {
public:
static size_t CalcChunkNumbers(gpusize size);
ErrorCode Init(const GpuMemoryCreateInfo &create_info);
WDDMDevice *GetDevice() const { return device_; }
gpusize Size() const { return desc_.size; }
gpusize ClientSize() const { return desc_.client_size; }
uint64_t GpuAddress() const { return desc_.gpu_addr; }
void *CpuAddress() const { return desc_.cpu_addr; }
uint64_t HandleApeAddress() const { return desc_.handle_ape_addr; }
inline bool IsLocal() const { return desc_.domain == thunk_proxy::kLocal; }
inline bool IsUserMemory() const { return desc_.domain == thunk_proxy::kUserMemory; }
inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; }
inline bool IsSysMemFd() const { return desc_.flags.is_imported_sys_memfd; }
inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; }
inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; }
inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; }
inline bool IsVirtual() const { return desc_.flags.is_virtual; }
inline bool IsShared() const { return desc_.flags.is_shared; }
inline bool IsExternal() const { return desc_.flags.is_external; }
inline bool IsVaAllocated() const { return desc_.flags.is_va_required; }
inline bool IsBlitKernelObject() const { return desc_.flags.is_blit_kernel_object; }
inline uint32_t Flags() const { return desc_.flags.reserved; }
inline int GetAllocInfo() const { return desc_.mem_flags; }
inline bool IsFineGrain() const { return (desc_.mem_flags & thunk_proxy::kFineGrain); }
inline bool IsSameAdapter(const LUID &luid) const {
return (desc_.adapter_luid.HighPart == luid.HighPart &&
desc_.adapter_luid.LowPart == luid.LowPart);
}
inline void GetQueueReference() { desc_.flags.is_queue_referenced = 1; }
inline void PutQueueReference() { desc_.flags.is_queue_referenced = 0; }
inline bool IsQueueReferenced() const { return desc_.flags.is_queue_referenced; }
inline void IncSharedReference() { desc_.flags.is_imported_from_same_process++; }
inline uint32_t DecSharedReference() { return (desc_.flags.is_imported_from_same_process == 0) ? 0 : --desc_.flags.is_imported_from_same_process; }
inline bool IsSharedFromSameProcess() const { return desc_.flags.is_imported_from_same_process > 0; }
WinAllocationHandle GetAllocationHandle(size_t index) const { return alloc_handles_ptr_[index]; }
size_t NumChunks() const { return num_allocations_; }
const GpuMemoryHandle GetGpuMemoryHandle() const {
return reinterpret_cast<GpuMemoryHandle>(const_cast<GpuMemory*>(this));
}
static GpuMemory *Convert(GpuMemoryHandle handle) { return reinterpret_cast<GpuMemory *>(handle); }
ErrorCode ReserveGpuVirtualAddress(gpusize base_virt_addr, gpusize va_size, gpusize alignment);
ErrorCode FreeGpuVirtualAddress(gpusize va_start_address, gpusize va_size);
ErrorCode MapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
ErrorCode UnmapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
ErrorCode MakeResident();
ErrorCode Evict();
ErrorCode ExportPhysicalHandle(int* dmabuf_fd, uint32_t flags = SHARED_ALLOCATION_ALL_ACCESS);
ErrorCode ImportPhysicalHandle(const GpuMemoryCreateInfo &create_info, gpusize *gpu_addr = nullptr);
~GpuMemory();
protected:
explicit GpuMemory(WDDMDevice *device);
private:
ErrorCode CreatePhysicalMemory();
ErrorCode FreePhysicalMemory();
uint64_t AdjustSize(gpusize size) const;
private:
friend class WDDMDevice;
WDDMDevice *const device_;
GpuMemoryDesc desc_;
size_t num_allocations_;
WinAllocationHandle *alloc_handles_ptr_;
WinAllocationHandle alloc_handle_; // Optimization for num_allocations_ is 1
WinResourceHandle resource_; // Handle to a resource object that wraps the allocation. Used for shared resources
int mem_fd_; // IPC sigal's sys mem fd
DISALLOW_COPY_AND_ASSIGN(GpuMemory);
};
} // namespace thunk
} // namespace wsl
#endif
+370
Ver ficheiro
@@ -0,0 +1,370 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_QUEUE_H_
#define _WSL_INC_WDDM_QUEUE_H_
#include <cinttypes>
#include <condition_variable>
#include <iostream>
#include <queue>
#include <utility>
#include "impl/wddm/types.h"
#include "impl/wddm/device.h"
#include "impl/wddm/gpu_memory.h"
#include "impl/hsa/hsa_ext_amd.h"
#include "impl/hsa/amd_hsa_queue.h"
#include "impl/hsa/amd_hsa_signal.h"
#include "impl/wddm/cmd_util.h"
namespace wsl {
namespace thunk {
class Queue;
class WDDMDevice;
class WDDMQueue {
public:
WDDMQueue(WDDMDevice *device,
uint64_t cmdbuf_addr,
uint32_t cmdbuf_size,
uint32_t engine,
bool use_hws = true) :
device(device),
context(0),
queue(0),
syncobj(0),
sync_addr(NULL),
cmdbuf(0),
cmdbuf_addr(cmdbuf_addr),
cmdbuf_size(cmdbuf_size),
queue_engine(engine),
use_hws(use_hws),
prio(thunk_proxy::kNormal) {
}
virtual ~WDDMQueue() { }
virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; }
virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; }
virtual void RingDoorbell() { }
virtual void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
hsa_status_t SwsInit(void);
hsa_status_t SwsFini(void);
hsa_status_t SwsSubmit(uint64_t command_addr,
uint64_t command_size,
uint64_t fence_value);
hsa_status_t HwsInit(void);
hsa_status_t HwsFini(void);
hsa_status_t HwsSubmit(uint64_t command_addr,
uint64_t command_size,
uint64_t fence_value);
hsa_status_t SetPriority(hsa_amd_queue_priority_t priority);
uint64_t *GetSyncAddr(void) const { return sync_addr; }
uint64_t GetCmdbufAddr(void) const { return cmdbuf_addr; }
thunk_proxy::SchedLevel ConvertSchedLevel(hsa_amd_queue_priority_t prio) const {
switch (prio) {
case HSA_AMD_QUEUE_PRIORITY_LOW:
return thunk_proxy::kLow;
case HSA_AMD_QUEUE_PRIORITY_HIGH:
return thunk_proxy::kHigh;
case HSA_AMD_QUEUE_PRIORITY_NORMAL:
default:
return thunk_proxy::kNormal;
}
}
WDDMDevice *device;
D3DKMT_HANDLE context;
D3DKMT_HANDLE queue;
D3DKMT_HANDLE syncobj;
uint64_t *sync_addr;
GpuMemoryHandle cmdbuf;
uint64_t cmdbuf_addr;
uint32_t cmdbuf_size;
GpuMemoryHandle queue_mem;
uint64_t queue_addr;
uint32_t queue_engine;
bool use_hws;
thunk_proxy::SchedLevel prio;
};
class ComputeQueue : public WDDMQueue {
public:
ComputeQueue(WDDMDevice *device,
void *ring,
uint64_t ring_size,
std::atomic<uint64_t> *ring_wptr,
std::atomic<uint64_t> *ring_rptr,
volatile int64_t *error_addr,
uint32_t cmdbuf_size,
uint32_t engine,
bool use_hws = true);
~ComputeQueue();
virtual hsa_status_t Init(void);
virtual hsa_status_t Fini(void);
virtual hsa_status_t Submit(void);
void* GetRing(void) const { return ring; }
uint64_t GetRingSize(void) const { return ring_size; }
std::atomic<uint64_t>* GetRingWptr(void) const { return ring_wptr; }
std::atomic<uint64_t>* GetRingRptr(void) const { return ring_rptr; }
uint64_t GetAqlWriteIndex(void) const { return cmdbuf_aql_frame_write_index; }
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size; }
void* GetHsaQueueAddr(void) const { return ring; }
bool IsInvalidPacket(void) const {
uint16_t *packet = (uint16_t *)((char *)ring +
(cmdbuf_aql_frame_write_index % ring_size) * 64);
return ((*packet >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1))
== HSA_PACKET_TYPE_INVALID;
}
hsa_status_t Process(void);
uint64_t * GetDoorbellPtr() const { return (uint64_t *)&doorbell_signal_value_; }
void RingDoorbell();
private:
hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet);
hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false);
uint64_t CalcDispatchGroups(hsa_kernel_dispatch_packet_t *packet);
uint64_t CalcDispatchWavesPerGroup(hsa_kernel_dispatch_packet_t *packet, bool wave32);
struct amd_aql_pm4_ib {
uint16_t header;
uint16_t ven_hdr;
uint32_t ib_jump_cmd[4];
uint32_t dw_cnt_remain;
uint32_t reserved[8];
hsa_signal_t completion_signal;
};
hsa_status_t VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *packet);
hsa_status_t SwitchAql2PM4(void);
hsa_status_t PreSubmit(void);
hsa_status_t EndSubmit(void);
void *ring;
uint64_t ring_size;
std::atomic<uint64_t> *ring_wptr;
std::atomic<uint64_t> *ring_rptr;
// ib_start_addr is the current ib start address
uint64_t ib_start_addr;
// ib_size is the current ib size.
uint64_t ib_size;
// record the last submitted aql frame write index
uint64_t sync_point;
uint64_t cmdbuf_aql_frame_write_index;
uint32_t cmdbuf_aql_frame_size;
uint64_t *signal_addr_;
bool platform_atomic_support_;
bool needs_barrier;
bool ready_to_submit;
CmdUtil cmd_util;
private:
bool EnableProfiling() {
return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING);
}
void HandleError(hsa_status_t status);
bool UpdateScratch(hsa_kernel_dispatch_packet_t *packet, bool wave32);
uint32_t UpdateIndexStride(uint32_t srd, bool wave32);
void *ScratchBase() { return scratch_base_; }
void AppendCmdbufSratchBaseOffset(int offset) {
scratch_base_offset_array_.push_back(offset);
}
bool RelocateCmdbufScratchBase(uint64_t addr);
uint32_t ScratchSizePerWave() { return scratch_size_per_wave_; }
uint64_t GetKernelObjAddr(uint64_t addr) const;
void InitScratchSRD();
GpuMemoryHandle amd_queue_mem_;
amd_queue_v2_t *amd_queue_;
amd_queue_v2_t *amd_queue_rocr_;
uint64_t doorbell_signal_value_;
volatile std::atomic<int64_t> *error_code_;
std::thread aql_to_pm4_thread_;
bool thread_stop_;
std::mutex thread_cond_lock_;
std::condition_variable thread_cond_;
static void AqlToPm4Thread(ComputeQueue *queue);
uint64_t max_scratch_waves_;
uint64_t dispatch_waves_;
uint64_t scratch_size_per_wave_;
uint64_t scratch_size_;
uint64_t total_scratch_size_;
void *scratch_base_;
uint32_t scratch_mem_alignment_size_;
GpuMemoryHandle scratch_mem_;
std::vector<int> scratch_base_offset_array_;
};
class SDMAQueue : public WDDMQueue {
public:
SDMAQueue(WDDMDevice *device,
void *ring,
uint64_t cmdbuf_size,
uint32_t engine,
bool use_hws = true);
virtual ~SDMAQueue();
hsa_status_t Init(void);
hsa_status_t Fini(void);
hsa_status_t Submit(void);
int PreparePacket(uint32_t offset, uint64_t size);
void WaitQueue(void) {
device->CpuWait(&syncobj, &rptr_next, 1, false);
}
uint64_t * GetRingWptr(void) { return &wptr_next_; }
uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); }
uint64_t * GetDoorbellPtr() { return &doorbell_; }
void RingDoorbell();
void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
private:
uint64_t wptr_next_;
uint64_t wptr_pre_;
uint64_t rptr_next;
uint64_t doorbell_;
std::vector<std::pair<uint64_t, uint64_t>> wptr_queue_;
uint64_t ib_size;
uint64_t ib_start_addr;
std::thread thread_;
bool thread_stop_;
std::mutex thread_cond_lock_;
std::condition_variable thread_cond_;
static void SdmaThread(SDMAQueue *queue);
struct SDMA_PKT_POLL_REGMEM {
union {
struct {
unsigned int op : 8;
unsigned int sub_op : 8;
unsigned int reserved_0 : 10;
unsigned int hdp_flush : 1;
unsigned int reserved_1 : 1;
unsigned int func : 3;
unsigned int mem_poll : 1;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union {
struct {
unsigned int addr_31_0 : 32;
};
unsigned int DW_1_DATA;
} ADDR_LO_UNION;
union {
struct {
unsigned int addr_63_32 : 32;
};
unsigned int DW_2_DATA;
} ADDR_HI_UNION;
union {
struct {
unsigned int value : 32;
};
unsigned int DW_3_DATA;
} VALUE_UNION;
union {
struct {
unsigned int mask : 32;
};
unsigned int DW_4_DATA;
} MASK_UNION;
union {
struct {
unsigned int interval : 16;
unsigned int retry_count : 12;
unsigned int reserved_0 : 4;
};
unsigned int DW_5_DATA;
} DW5_UNION;
};
const unsigned int SDMA_OP_POLL_REGMEM = 8;
bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) {
return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM &&
pkt->HEADER_UNION.mem_poll == 1 &&
pkt->HEADER_UNION.func == 3;
}
uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); }
};
} // namespace thunk
} // namespace wsl
#endif
+61
Ver ficheiro
@@ -0,0 +1,61 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_STATUS_H
#define _WSL_INC_WDDM_STATUS_H
enum class ErrorCode {
Success,
DeviceLost,
UnSupported,
NotReady,
OutOfMemory,
OutOfGpuMemory,
OutOfHandleApeMemory,
Timeout,
SyscallFail,
InvalidateParams,
SameProcessSameDevice,
Unknown,
};
#endif
+233
Ver ficheiro
@@ -0,0 +1,233 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_THUNKS_H
#define _WSL_INC_WDDM_THUNKS_H
#include "impl/wddm/status.h"
#include "impl/wddm/types.h"
#include "dxcore_loader.h"
namespace wsl {
namespace thunk {
inline ErrorCode TranslateNtStatus(NTSTATUS status) {
switch (status) {
case STATUS_SUCCESS:
return ErrorCode::Success;
case STATUS_PENDING:
return ErrorCode::NotReady;
case STATUS_NO_MEMORY:
return ErrorCode::OutOfMemory;
case STATUS_DEVICE_REMOVED:
return ErrorCode::DeviceLost;
case STATUS_GRAPHICS_NO_VIDEO_MEMORY:
return ErrorCode::OutOfGpuMemory;
case STATUS_TIMEOUT:
return ErrorCode::Timeout;
case STATUS_INVALID_PARAMETER:
return ErrorCode::InvalidateParams;
default:
break;
}
return ErrorCode::Unknown;
}
namespace d3dthunk {
typedef D3DKMT_CREATEALLOCATION CreateAllocationArgs;
typedef D3DKMT_CREATECONTEXT CreateContextArgs;
typedef D3DKMT_CREATECONTEXTVIRTUAL CreateContextVirtualArgs;
typedef D3DKMT_CREATEPAGINGQUEUE CreatePagingQueueArgs;
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT CreateSynchronizationObjectArgs;
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject2Args;
typedef D3DKMT_ESCAPE EscapeArgs;
typedef D3DKMT_EVICT EvictArgs;
typedef D3DKMT_FREEGPUVIRTUALADDRESS FreeGpuVirtualAddressArgs;
typedef D3DKMT_LOCK LockArgs;
typedef D3DKMT_LOCK2 Lock2Args;
typedef D3DKMT_OPENRESOURCE OpenResourceArgs;
typedef D3DKMT_OPENRESOURCEFROMNTHANDLE OpenResourceFromNtHandleArgs;
typedef D3DKMT_QUERYADAPTERINFO QueryAdapterInfoArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT SignalSynchronizationObjectArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 SignalSynchronizationObject2Args;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU SignalSynchronizationObjectFromCpuArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU2 SignalSynchronizationObjectFromGpuArgs;
typedef D3DKMT_SUBMITCOMMAND SubmitCommandArgs;
typedef D3DKMT_UNLOCK UnlockArgs;
typedef D3DKMT_UNLOCK2 Unlock2Args;
typedef D3DKMT_UPDATEGPUVIRTUALADDRESS UpdateGpuVirtualAddressArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT WaitForSynchronizationObjectArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT2 WaitForSynchronizationObject2Args;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU WaitForSynchronizationObjectFromCpuArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitForSynchronizationObjectFromGpuArgs;
typedef D3DKMT_ACQUIREKEYEDMUTEX AcquireKeyedMutexArgs;
typedef D3DKMT_RELEASEKEYEDMUTEX ReleaseKeyedMutexArgs;
typedef D3DKMT_OPENKEYEDMUTEX OpenKeyedMutexArgs;
typedef D3DKMT_DESTROYKEYEDMUTEX DestroyKeyedMutexArgs;
typedef D3DKMT_QUERYVIDEOMEMORYINFO QueryVideoMemoryInfoArgs;
typedef D3DKMT_CREATEHWQUEUE CreateHwQueueArgs;
typedef D3DKMT_DESTROYHWQUEUE DestroyHwQueueArgs;
typedef D3DKMT_SUBMITCOMMANDTOHWQUEUE SubmitCommandToHwQueueArgs;
typedef D3DKMT_SUBMITPRESENTTOHWQUEUE SubmitPresentToHwQueueArgs;
typedef D3DKMT_SUBMITSIGNALSYNCOBJECTSTOHWQUEUE SubmitSignalSyncObjectsToHwQueueArgs;
typedef D3DKMT_SUBMITWAITFORSYNCOBJECTSTOHWQUEUE SubmitWaitForSyncObjectsToHwQueueArgs;
typedef D3DKMT_CREATESYNCFILE CreateSyncFileArgs;
inline ErrorCode MapGpuVirtualAddress(D3DDDI_MAPGPUVIRTUALADDRESS *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTMapGpuVirtualAddress(args)));
}
inline ErrorCode CreateAllocation(CreateAllocationArgs *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTCreateAllocation2(args)));
}
inline ErrorCode DestroyAllocation(
WinDeviceHandle device,
WinResourceHandle resource,
size_t num_allocations,
const WinAllocationHandle *alloc_handles) {
D3DKMT_DESTROYALLOCATION2 args{};
memset(&args, 0, sizeof(args));
args.hDevice = device;
if (resource) {
args.hResource = resource;
} else {
args.phAllocationList = alloc_handles;
args.AllocationCount = num_allocations;
}
return TranslateNtStatus(DXCORE_CALL(D3DKMTDestroyAllocation2(&args)));
}
inline ErrorCode ReserveGpuVirtualAddress(D3DDDI_RESERVEGPUVIRTUALADDRESS *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTReserveGpuVirtualAddress(args)));
}
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
gpusize size,
gpusize base_address,
gpusize *out_addr) {
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
args.hPagingQueue = handle;
args.Size = size;
args.BaseAddress = base_address;
auto code = ReserveGpuVirtualAddress(&args);
if (code == ErrorCode::Success)
*out_addr = args.VirtualAddress;
return code;
}
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
gpusize size,
gpusize minimum_address,
gpusize maximum_address,
gpusize *out_addr) {
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
args.hPagingQueue = handle;
args.Size = size;
args.MinimumAddress = minimum_address;
args.MaximumAddress = maximum_address;
auto code = ReserveGpuVirtualAddress(&args);
if (code == ErrorCode::Success)
*out_addr = args.VirtualAddress;
return code;
}
inline ErrorCode FreeGpuVirtualAddress(FreeGpuVirtualAddressArgs *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTFreeGpuVirtualAddress(args)));
}
inline ErrorCode FreeGpuVirtualAddress(WinAdapterHandle handle,
gpusize base_address,
gpusize size) {
FreeGpuVirtualAddressArgs args{};
args.hAdapter = handle;
args.Size = size;
args.BaseAddress = base_address;
return FreeGpuVirtualAddress(&args);
}
inline ErrorCode MakeResident(D3DDDI_MAKERESIDENT *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTMakeResident(args)));
}
inline ErrorCode Evict(EvictArgs *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTEvict(args)));
}
inline ErrorCode ShareObjects(size_t num_allocations,
WinResourceHandle resource,
uint32_t flags,
int* dmabuf_fd) {
OBJECT_ATTRIBUTES obj_attr;
HANDLE nt_handle;
ErrorCode ret;
InitializeObjectAttributes(&obj_attr, nullptr, OBJ_INHERIT, nullptr, nullptr);
ret = TranslateNtStatus(DXCORE_CALL(D3DKMTShareObjects(num_allocations,
&resource, &obj_attr, flags, &nt_handle)));
if (ret == ErrorCode::Success)
*dmabuf_fd = *(reinterpret_cast<int*>(&nt_handle));
else
*dmabuf_fd = -1;
return ret;
}
inline ErrorCode QueryResourceInfoFromNtHandle(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTQueryResourceInfoFromNtHandle(args)));
}
inline ErrorCode OpenResourceFromNtHandle(D3DKMT_OPENRESOURCEFROMNTHANDLE *args) {
return TranslateNtStatus(DXCORE_CALL(D3DKMTOpenResourceFromNtHandle(args)));
}
} // namespace d3dthunk
} // namespace thunk
} // namespace wsl
#endif
+101
Ver ficheiro
@@ -0,0 +1,101 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_TYPES_H_
#define _WSL_INC_WDDM_TYPES_H_
#include <cstdint>
#include <ntstatus.h>
#include "impl/thunk_proxy/wddm_types.h"
// windows wchar is 16bit, but linux is 32bit
// seems libdxcore (not dxgkrnl.ko) convert thunk windows wchar to linux one
// so only accept 32bit wchar args. note driver private data structure still
// use 16bit wchar
#define WCHAR wchar_t
#define PCWSTR const wchar_t *
#include <d3dkmthk.h>
#undef WCHAR
#undef PCWSTR
using gpusize = uint64_t; // Used to specify GPU addresses and sizes of GPU allocations
using WinAllocationHandle = D3DKMT_HANDLE;
using WinResourceHandle = D3DKMT_HANDLE;
using WinContextHandle = D3DKMT_HANDLE;
using WinDeviceHandle = D3DKMT_HANDLE;
using WinAdapterHandle = D3DKMT_HANDLE;
//reference dk/winnt.h
#define STANDARD_RIGHTS_REQUIRED (0x000F0000L)
//reference dk/ntdef.h
#define OBJ_INHERIT (0x00000002L)
typedef WCHAR *PWCHAR, *LPWCH, *PWCH;
typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
#ifdef MIDL_PASS
[size_is(MaximumLength / 2), length_is((Length) / 2) ] USHORT * Buffer;
#else // MIDL_PASS
_Field_size_bytes_part_opt_(MaximumLength, Length) PWCH Buffer;
#endif // MIDL_PASS
} UNICODE_STRING;
typedef UNICODE_STRING *PUNICODE_STRING;
typedef const UNICODE_STRING *PCUNICODE_STRING;
typedef struct _OBJECT_ATTRIBUTES {
ULONG Length;
HANDLE RootDirectory;
PUNICODE_STRING ObjectName;
ULONG Attributes;
PVOID SecurityDescriptor;
PVOID SecurityQualityOfService;
} OBJECT_ATTRIBUTES;
#define InitializeObjectAttributes( p, n, a, r, s ) { \
(p)->Length = sizeof( OBJECT_ATTRIBUTES ); \
(p)->RootDirectory = r; \
(p)->Attributes = a; \
(p)->ObjectName = n; \
(p)->SecurityDescriptor = s; \
(p)->SecurityQualityOfService = NULL; \
}
#endif
+86
Ver ficheiro
@@ -0,0 +1,86 @@
#ifndef _WSL_INC_WDDM_VA_MGR_H_
#define _WSL_INC_WDDM_VA_MGR_H_
#include <mutex>
#include <map>
#include "util/utils.h"
namespace wsl {
namespace thunk {
class VaMgr {
public:
VaMgr(uint64_t start, uint64_t size, uint64_t min_align);
~VaMgr();
/* Allocate `bytes` VA, if `align` is not zero, the returned address is aligned by `align`.
* If `addr` parameter is not zero, try best to allocate VA from fixed address `addr`.
*/
uint64_t Alloc(uint64_t bytes, uint64_t align, uint64_t addr = 0);
void Free(uint64_t addr);
private:
uint64_t AllocImpl(uint64_t bytes, uint64_t align);
struct Fragment {
using ptr = std::multimap<uint64_t, uint64_t>::iterator;
ptr free_list_entry_;
struct {
uint64_t size : 63;
bool is_free : 1;
};
Fragment() : size(0), is_free(false) {}
Fragment(ptr iterator, uint64_t len, bool is_free)
: free_list_entry_(iterator), size(len), is_free(is_free) {}
};
static inline Fragment make_fragment(typename Fragment::ptr iter, uint64_t len) {
return {iter, len, true};
}
inline Fragment make_fragment(uint64_t len) { return {free_list_.end(), len, false}; }
static inline bool is_free(const Fragment& f) { return f.is_free; }
void set_used(Fragment& f) {
f.is_free = false;
f.free_list_entry_ = free_list_.end();
}
static void set_free(Fragment& f, typename Fragment::ptr iter) {
f.free_list_entry_ = iter;
f.is_free = true;
}
inline void remove_free_list_entry(Fragment& frag) {
if (frag.free_list_entry_ != free_list_.end()) {
free_list_.erase(frag.free_list_entry_);
frag.free_list_entry_ = free_list_.end();
}
}
inline void add_free_fragment(uint64_t size, uint64_t base) {
auto it = free_list_.insert(std::make_pair(size, base));
frag_map_[base] = make_fragment(it, size);
}
inline void add_used_fragment(uint64_t size, uint64_t base) {
frag_map_[base] = make_fragment(size);
}
// Indexed by size
std::multimap<uint64_t, uint64_t> free_list_;
// Indexed by VA, each fragment has no overlap
std::map<uint64_t, Fragment> frag_map_;
uint64_t min_align_;
std::mutex lock_; // Mutex protecting allocation and free of va
DISALLOW_COPY_AND_ASSIGN(VaMgr);
};
} // namespace thunk
} // namespace wsl
#endif