librocdxg: add rocr header files
Signed-off-by: Flora Cui <flora.cui@amd.com>
Esse commit está contido em:
+1131
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -0,0 +1,91 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// The following set of header files provides definitions for AMD GPU
|
||||
// Architecture:
|
||||
// - amd_hsa_common.h
|
||||
// - amd_hsa_elf.h
|
||||
// - amd_hsa_kernel_code.h
|
||||
// - amd_hsa_queue.h
|
||||
// - amd_hsa_signal.h
|
||||
//
|
||||
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
|
||||
// information.
|
||||
|
||||
#ifndef AMD_HSA_COMMON_H
|
||||
#define AMD_HSA_COMMON_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Descriptive version of the HSA Application Binary Interface.
|
||||
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
|
||||
|
||||
// Alignment attribute that specifies a minimum alignment (in bytes) for
|
||||
// variables of the specified type.
|
||||
#if defined(__GNUC__)
|
||||
# define __ALIGNED__(x) __attribute__((aligned(x)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define __ALIGNED__(x) __declspec(align(x))
|
||||
#elif defined(RC_INVOKED)
|
||||
# define __ALIGNED__(x)
|
||||
#else
|
||||
# error
|
||||
#endif
|
||||
|
||||
// Creates enumeration entries for packed types. Enumeration entries include
|
||||
// bit shift amount, bit width, and bit mask.
|
||||
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
|
||||
name##_SHIFT = (shift), \
|
||||
name##_WIDTH = (width), \
|
||||
name = (((1 << (width)) - 1) << (shift)) \
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT) \
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask##_SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask##_SHIFT) & mask) \
|
||||
|
||||
#endif // AMD_HSA_COMMON_H
|
||||
@@ -0,0 +1,467 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Undefine the macro in case it is defined in the system elf.h.
|
||||
#undef EM_AMDGPU
|
||||
|
||||
#ifndef AMD_HSA_ELF_H
|
||||
#define AMD_HSA_ELF_H
|
||||
|
||||
// AMD GPU Specific ELF Header Enumeration Values.
|
||||
//
|
||||
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
|
||||
// code object V1 defintions which are not part of the LLVM header. Code object
|
||||
// V1 was only supported by the Finalizer which is now deprecated and removed.
|
||||
//
|
||||
// TODO: Deprecate and remove V1 support and replace this header with using the
|
||||
// LLVM header.
|
||||
namespace ELF {
|
||||
|
||||
// Machine architectures
|
||||
// See current registered ELF machine architectures at:
|
||||
// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
|
||||
enum {
|
||||
EM_AMDGPU = 224, // AMD GPU architecture
|
||||
};
|
||||
|
||||
// OS ABI identification.
|
||||
enum {
|
||||
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
|
||||
};
|
||||
|
||||
// AMDGPU OS ABI Version identification.
|
||||
enum {
|
||||
// ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
|
||||
// was never defined for V1.
|
||||
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
|
||||
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
|
||||
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
|
||||
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
|
||||
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
|
||||
};
|
||||
|
||||
// AMDGPU specific e_flags.
|
||||
enum : unsigned {
|
||||
// Processor selection mask for EF_AMDGPU_MACH_* values.
|
||||
EF_AMDGPU_MACH = 0x0ff,
|
||||
|
||||
// Not specified processor.
|
||||
EF_AMDGPU_MACH_NONE = 0x000,
|
||||
|
||||
// AMDGCN-based processors.
|
||||
// clang-format off
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f,
|
||||
// clang-format on
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
|
||||
// Indicates if the trap handler is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
|
||||
// Indicates if the "sramecc" target feature is enabled for all code
|
||||
// contained in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
|
||||
|
||||
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
|
||||
// XNACK is not supported.
|
||||
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
|
||||
// XNACK is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
|
||||
// XNACK is off.
|
||||
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
|
||||
// XNACK is on.
|
||||
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
|
||||
|
||||
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
|
||||
// SRAMECC is not supported.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
|
||||
// SRAMECC is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
|
||||
// SRAMECC is off.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
|
||||
// SRAMECC is on.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
|
||||
|
||||
// Generic target versioning. This is contained in the list byte of EFLAGS.
|
||||
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
|
||||
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
|
||||
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
|
||||
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
|
||||
};
|
||||
|
||||
// ELF Relocation types for AMDGPU.
|
||||
enum : unsigned {
|
||||
R_AMDGPU_ABS32_LO = 1,
|
||||
R_AMDGPU_ABS32_HI = 2,
|
||||
R_AMDGPU_ABS64 = 3,
|
||||
R_AMDGPU_ABS32 = 6,
|
||||
R_AMDGPU_RELATIVE64 = 13,
|
||||
};
|
||||
|
||||
} // end namespace ELF
|
||||
|
||||
// ELF Section Header Flag Enumeration Values.
|
||||
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
|
||||
|
||||
//
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
|
||||
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
|
||||
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
|
||||
AMDGPU_HSA_SEGMENT_LAST,
|
||||
} amdgpu_hsa_elf_segment_t;
|
||||
|
||||
// ELF Program Header Type Enumeration Values.
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
|
||||
|
||||
// ELF Symbol Type Enumeration Values.
|
||||
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
|
||||
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
|
||||
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
|
||||
|
||||
// ELF Symbol Binding Enumeration Values.
|
||||
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
|
||||
|
||||
// ELF Symbol Other Information Creation/Retrieval.
|
||||
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
|
||||
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
|
||||
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
|
||||
} amdgpu_hsa_symbol_allocation_t;
|
||||
|
||||
// ELF Symbol Allocation Enumeration Values.
|
||||
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
|
||||
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
|
||||
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
|
||||
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_LAST,
|
||||
} amdgpu_hsa_symbol_flag_t;
|
||||
|
||||
// ELF Symbol Flag Enumeration Values.
|
||||
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
|
||||
|
||||
// Legacy/V1 AMD GPU Relocation Type Enumeration Values.
|
||||
#define R_AMDGPU_V1_NONE 0
|
||||
#define R_AMDGPU_V1_32_LOW 1
|
||||
#define R_AMDGPU_V1_32_HIGH 2
|
||||
#define R_AMDGPU_V1_64 3
|
||||
#define R_AMDGPU_V1_INIT_SAMPLER 4
|
||||
#define R_AMDGPU_V1_INIT_IMAGE 5
|
||||
#define R_AMDGPU_V1_RELATIVE64 13
|
||||
|
||||
// AMD GPU Note Type Enumeration Values.
|
||||
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
|
||||
#define NT_AMD_HSA_HSAIL 2
|
||||
#define NT_AMD_HSA_ISA_VERSION 3
|
||||
#define NT_AMD_HSA_PRODUCER 4
|
||||
#define NT_AMD_HSA_PRODUCER_OPTIONS 5
|
||||
#define NT_AMD_HSA_EXTENSION 6
|
||||
#define NT_AMD_HSA_ISA_NAME 11
|
||||
/* AMDGPU snapshots of runtime, agent and queues state for use in core dump */
|
||||
#define NT_AMDGPU_CORE_STATE 33
|
||||
#define NT_AMD_HSA_HLDEBUG_DEBUG 101
|
||||
#define NT_AMD_HSA_HLDEBUG_TARGET 102
|
||||
|
||||
// AMD GPU Metadata Kind Enumeration Values.
|
||||
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_METADATA_KIND_NONE = 0,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
|
||||
} amdgpu_hsa_metadata_kind_t;
|
||||
|
||||
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
|
||||
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
|
||||
} amdgpu_hsa_sampler_coord_t;
|
||||
|
||||
// AMD GPU Sampler Filter Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
|
||||
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
|
||||
} amdgpu_hsa_sampler_filter_t;
|
||||
|
||||
// AMD GPU Sampler Addressing Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
|
||||
} amdgpu_hsa_sampler_addressing_t;
|
||||
|
||||
// AMD GPU Sampler Descriptor.
|
||||
typedef struct amdgpu_hsa_sampler_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_sampler_coord8_t coord;
|
||||
amdgpu_hsa_sampler_filter8_t filter;
|
||||
amdgpu_hsa_sampler_addressing8_t addressing;
|
||||
uint8_t reserved1;
|
||||
} amdgpu_hsa_sampler_descriptor_t;
|
||||
|
||||
// AMD GPU Image Geometry Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_geometry8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
|
||||
} amdgpu_hsa_image_geometry_t;
|
||||
|
||||
// AMD GPU Image Channel Order Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
|
||||
} amdgpu_hsa_image_channel_order_t;
|
||||
|
||||
// AMD GPU Image Channel Type Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
|
||||
} amdgpu_hsa_image_channel_type_t;
|
||||
|
||||
// AMD GPU Image Descriptor.
|
||||
typedef struct amdgpu_hsa_image_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_image_geometry8_t geometry;
|
||||
amdgpu_hsa_image_channel_order8_t channel_order;
|
||||
amdgpu_hsa_image_channel_type8_t channel_type;
|
||||
uint8_t reserved1;
|
||||
uint64_t width;
|
||||
uint64_t height;
|
||||
uint64_t depth;
|
||||
uint64_t array;
|
||||
} amdgpu_hsa_image_descriptor_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_code_object_version_s {
|
||||
uint32_t major_version;
|
||||
uint32_t minor_version;
|
||||
} amdgpu_hsa_note_code_object_version_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_hsail_s {
|
||||
uint32_t hsail_major_version;
|
||||
uint32_t hsail_minor_version;
|
||||
uint8_t profile;
|
||||
uint8_t machine_model;
|
||||
uint8_t default_float_round;
|
||||
} amdgpu_hsa_note_hsail_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_isa_s {
|
||||
uint16_t vendor_name_size;
|
||||
uint16_t architecture_name_size;
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t stepping;
|
||||
char vendor_and_architecture_name[1];
|
||||
} amdgpu_hsa_note_isa_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_s {
|
||||
uint16_t producer_name_size;
|
||||
uint16_t reserved;
|
||||
uint32_t producer_major_version;
|
||||
uint32_t producer_minor_version;
|
||||
char producer_name[1];
|
||||
} amdgpu_hsa_note_producer_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_options_s {
|
||||
uint16_t producer_options_size;
|
||||
char producer_options[1];
|
||||
} amdgpu_hsa_note_producer_options_t;
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_RODATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_DATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_DATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_BSS_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_BSS_READONLY_AGENT,
|
||||
AMDGPU_HSA_SECTION_LAST,
|
||||
} amdgpu_hsa_elf_section_t;
|
||||
|
||||
#endif // AMD_HSA_ELF_H
|
||||
@@ -0,0 +1,270 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_KERNEL_CODE_H
|
||||
#define AMD_HSA_KERNEL_CODE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Kernel Code Version Enumeration Values.
|
||||
typedef uint32_t amd_kernel_code_version32_t;
|
||||
enum amd_kernel_code_version_t {
|
||||
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
|
||||
AMD_KERNEL_CODE_VERSION_MINOR = 1
|
||||
};
|
||||
|
||||
// AMD Machine Kind Enumeration Values.
|
||||
typedef uint16_t amd_machine_kind16_t;
|
||||
enum amd_machine_kind_t {
|
||||
AMD_MACHINE_KIND_UNDEFINED = 0,
|
||||
AMD_MACHINE_KIND_AMDGPU = 1
|
||||
};
|
||||
|
||||
// AMD Machine Version.
|
||||
typedef uint16_t amd_machine_version16_t;
|
||||
|
||||
// AMD Float Round Mode Enumeration Values.
|
||||
enum amd_float_round_mode_t {
|
||||
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
|
||||
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
|
||||
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
|
||||
AMD_FLOAT_ROUND_MODE_ZERO = 3
|
||||
};
|
||||
|
||||
// AMD Float Denorm Mode Enumeration Values.
|
||||
enum amd_float_denorm_mode_t {
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
|
||||
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register One.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
|
||||
enum amd_compute_pgm_rsrc_one_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
|
||||
};
|
||||
|
||||
// AMD System VGPR Workitem ID Enumeration Values.
|
||||
enum amd_system_vgpr_workitem_id_t {
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register Two.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
|
||||
enum amd_compute_pgm_rsrc_two_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
|
||||
};
|
||||
|
||||
// AMD Element Byte Size Enumeration Values.
|
||||
enum amd_element_byte_size_t {
|
||||
AMD_ELEMENT_BYTE_SIZE_2 = 0,
|
||||
AMD_ELEMENT_BYTE_SIZE_4 = 1,
|
||||
AMD_ELEMENT_BYTE_SIZE_8 = 2,
|
||||
AMD_ELEMENT_BYTE_SIZE_16 = 3
|
||||
};
|
||||
|
||||
// AMD Kernel Code Properties.
|
||||
typedef uint32_t amd_kernel_code_properties32_t;
|
||||
enum amd_kernel_code_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
|
||||
};
|
||||
|
||||
// AMD Power Of Two Enumeration Values.
|
||||
typedef uint8_t amd_powertwo8_t;
|
||||
enum amd_powertwo_t {
|
||||
AMD_POWERTWO_1 = 0,
|
||||
AMD_POWERTWO_2 = 1,
|
||||
AMD_POWERTWO_4 = 2,
|
||||
AMD_POWERTWO_8 = 3,
|
||||
AMD_POWERTWO_16 = 4,
|
||||
AMD_POWERTWO_32 = 5,
|
||||
AMD_POWERTWO_64 = 6,
|
||||
AMD_POWERTWO_128 = 7,
|
||||
AMD_POWERTWO_256 = 8
|
||||
};
|
||||
|
||||
// AMD Enabled Control Directive Enumeration Values.
|
||||
typedef uint64_t amd_enabled_control_directive64_t;
|
||||
enum amd_enabled_control_directive_t {
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
|
||||
};
|
||||
|
||||
// AMD Exception Kind Enumeration Values.
|
||||
typedef uint16_t amd_exception_kind16_t;
|
||||
enum amd_exception_kind_t {
|
||||
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
|
||||
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
|
||||
AMD_EXCEPTION_KIND_OVERFLOW = 4,
|
||||
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
|
||||
AMD_EXCEPTION_KIND_INEXACT = 16
|
||||
};
|
||||
|
||||
// AMD Control Directives.
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
|
||||
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
|
||||
amd_enabled_control_directive64_t enabled_control_directives;
|
||||
uint16_t enable_break_exceptions;
|
||||
uint16_t enable_detect_exceptions;
|
||||
uint32_t max_dynamic_group_size;
|
||||
uint64_t max_flat_grid_size;
|
||||
uint32_t max_flat_workgroup_size;
|
||||
uint8_t required_dim;
|
||||
uint8_t reserved1[3];
|
||||
uint64_t required_grid_size[3];
|
||||
uint32_t required_workgroup_size[3];
|
||||
uint8_t reserved2[60];
|
||||
} amd_control_directives_t;
|
||||
|
||||
// AMD Kernel Code.
|
||||
#define AMD_ISA_ALIGN_BYTES 256
|
||||
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
|
||||
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
|
||||
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_major;
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_minor;
|
||||
amd_machine_kind16_t amd_machine_kind;
|
||||
amd_machine_version16_t amd_machine_version_major;
|
||||
amd_machine_version16_t amd_machine_version_minor;
|
||||
amd_machine_version16_t amd_machine_version_stepping;
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
|
||||
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
|
||||
amd_kernel_code_properties32_t kernel_code_properties;
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
uint32_t gds_segment_byte_size;
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
uint16_t wavefront_sgpr_count;
|
||||
uint16_t workitem_vgpr_count;
|
||||
uint16_t reserved_vgpr_first;
|
||||
uint16_t reserved_vgpr_count;
|
||||
uint16_t reserved_sgpr_first;
|
||||
uint16_t reserved_sgpr_count;
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
amd_powertwo8_t kernarg_segment_alignment;
|
||||
amd_powertwo8_t group_segment_alignment;
|
||||
amd_powertwo8_t private_segment_alignment;
|
||||
amd_powertwo8_t wavefront_size;
|
||||
int32_t call_convention;
|
||||
uint8_t reserved1[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
amd_control_directives_t control_directives;
|
||||
} amd_kernel_code_t;
|
||||
|
||||
// TODO: this struct should be completely gone once debugger designs/implements
|
||||
// Debugger APIs.
|
||||
typedef struct amd_runtime_loader_debug_info_s {
|
||||
const void* elf_raw;
|
||||
size_t elf_size;
|
||||
const char *kernel_name;
|
||||
const void *owning_segment;
|
||||
} amd_runtime_loader_debug_info_t;
|
||||
|
||||
#endif // AMD_HSA_KERNEL_CODE_H
|
||||
@@ -0,0 +1,154 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_QUEUE_H
|
||||
#define AMD_HSA_QUEUE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Queue Properties.
|
||||
typedef uint32_t amd_queue_properties32_t;
|
||||
enum amd_queue_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
|
||||
};
|
||||
|
||||
// AMD Queue.
|
||||
#define AMD_QUEUE_ALIGN_BYTES 64
|
||||
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
|
||||
|
||||
// AMD Queue Capabilities.
|
||||
typedef uint32_t amd_queue_capabilities32_t;
|
||||
enum amd_queue_capabilities_t {
|
||||
/* This version of CP FW supports dual-scratch and async-reclaim */
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM, 0, 1),
|
||||
|
||||
/*
|
||||
* This version of ROCr supports async-reclaim and CP FW may access the
|
||||
* V2 fields.
|
||||
*/
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM, 1, 1),
|
||||
};
|
||||
|
||||
/* This is the original amd_queue_t structure. The definition is only kept
|
||||
* for reference purposes. This structure should not be used. */
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t caps;
|
||||
uint32_t reserved1[3];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint32_t reserved3[2];
|
||||
uint32_t scratch_wave64_lane_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
uint32_t reserved4[2];
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
uint32_t reserved5[14];
|
||||
} amd_queue_t;
|
||||
|
||||
/*
|
||||
* AMD_QUEUE Version 2
|
||||
* amd_queue_v2_t is backwards compatible with amd_queue_t structure and can
|
||||
* be used with previous versions of CP FW. The added fields tagged as V2 are
|
||||
* ignored when running previous versions of CP FW.
|
||||
* CP FW will not try to access elements beyond the original 64-bytes
|
||||
* (sizeof(amd_queue_t)) unless the AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM bit is set.
|
||||
*/
|
||||
|
||||
#define MAX_NUM_XCC 128
|
||||
typedef struct scratch_last_used_index_xcc_s {
|
||||
volatile uint64_t main;
|
||||
volatile uint64_t alt;
|
||||
} scratch_last_used_index_xcc_t;
|
||||
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_v2_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t caps;
|
||||
uint32_t reserved1[3];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint64_t scratch_backing_memory_byte_size;
|
||||
uint32_t scratch_wave64_lane_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
volatile uint64_t scratch_max_use_index; /* V2 */
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
volatile uint64_t alt_scratch_max_use_index; /* V2 */
|
||||
uint32_t alt_scratch_resource_descriptor[4]; /* V2 */
|
||||
uint64_t alt_scratch_backing_memory_location; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_x; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_y; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_z; /* V2 */
|
||||
uint32_t alt_scratch_wave64_lane_byte_size; /* V2 */
|
||||
uint32_t alt_compute_tmpring_size; /* V2 */
|
||||
uint32_t reserved5;
|
||||
|
||||
scratch_last_used_index_xcc_t scratch_last_used_index[MAX_NUM_XCC];
|
||||
} amd_queue_v2_t;
|
||||
|
||||
#endif // AMD_HSA_QUEUE_H
|
||||
@@ -0,0 +1,79 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_SIGNAL_H
|
||||
#define AMD_HSA_SIGNAL_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "amd_hsa_queue.h"
|
||||
|
||||
// AMD Signal Kind Enumeration Values.
|
||||
typedef int64_t amd_signal_kind64_t;
|
||||
enum amd_signal_kind_t {
|
||||
AMD_SIGNAL_KIND_INVALID = 0,
|
||||
AMD_SIGNAL_KIND_USER = 1,
|
||||
AMD_SIGNAL_KIND_DOORBELL = -1,
|
||||
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
|
||||
};
|
||||
|
||||
// AMD Signal.
|
||||
#define AMD_SIGNAL_ALIGN_BYTES 64
|
||||
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
|
||||
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
|
||||
amd_signal_kind64_t kind;
|
||||
union {
|
||||
volatile int64_t value;
|
||||
volatile uint64_t* hardware_doorbell_ptr;
|
||||
};
|
||||
uint64_t event_mailbox_ptr;
|
||||
uint32_t event_id;
|
||||
uint32_t reserved1;
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
union {
|
||||
amd_queue_v2_t* queue_ptr;
|
||||
uint64_t reserved2;
|
||||
};
|
||||
uint32_t reserved3[2];
|
||||
} amd_signal_t;
|
||||
|
||||
#endif // AMD_HSA_SIGNAL_H
|
||||
+5752
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -0,0 +1,91 @@
|
||||
#ifndef HSA_RUNTIME_AMD_TOOL_EVENTS_H_
|
||||
#define HSA_RUNTIME_AMD_TOOL_EVENTS_H_
|
||||
|
||||
// Insert license header
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "hsa.h"
|
||||
|
||||
|
||||
typedef enum {
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE = 0,
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE =
|
||||
(1 << 0), // This scratch allocation is only valid for 1 dispatch.
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT =
|
||||
(1 << 1), // Used alternate scratch instead of main scratch
|
||||
} hsa_amd_event_scratch_alloc_flag_t;
|
||||
|
||||
typedef enum {
|
||||
HSA_AMD_TOOL_EVENT_MIN = 0,
|
||||
|
||||
// Scratch memory tracking
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,
|
||||
|
||||
// Add new events above ^
|
||||
HSA_AMD_TOOL_EVENT_MAX
|
||||
} hsa_amd_tool_event_kind_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
} hsa_amd_tool_event_none_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
|
||||
} hsa_amd_event_scratch_alloc_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
|
||||
size_t size; // Amount of scratch allocated - in bytes
|
||||
size_t num_slots; // limit of number of waves
|
||||
} hsa_amd_event_scratch_alloc_end_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_free_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_free_end_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_async_reclaim_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_async_reclaim_end_t;
|
||||
|
||||
typedef union {
|
||||
const hsa_amd_tool_event_none_t* none;
|
||||
const hsa_amd_event_scratch_alloc_start_t* scratch_alloc_start;
|
||||
const hsa_amd_event_scratch_alloc_end_t* scratch_alloc_end;
|
||||
const hsa_amd_event_scratch_free_start_t* scratch_free_start;
|
||||
const hsa_amd_event_scratch_free_end_t* scratch_free_end;
|
||||
const hsa_amd_event_scratch_async_reclaim_start_t* scratch_async_reclaim_start;
|
||||
const hsa_amd_event_scratch_async_reclaim_end_t* scratch_async_reclaim_end;
|
||||
} hsa_amd_tool_event_t;
|
||||
|
||||
typedef hsa_status_t (*hsa_amd_tool_event)(hsa_amd_tool_event_t);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,585 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
#define HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_api_trace_version.h"
|
||||
#ifdef AMD_INTERNAL_BUILD
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "hsa_amd_tool.h"
|
||||
#include "hsa_ven_amd_pc_sampling.h"
|
||||
#else
|
||||
#include "inc/hsa_ext_image.h"
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
#include "inc/hsa_ext_finalize.h"
|
||||
#include "inc/hsa_amd_tool.h"
|
||||
#include "inc/hsa_ven_amd_pc_sampling.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h
|
||||
|
||||
// Min function used to copy Api Tables
|
||||
static inline uint32_t Min(const uint32_t a, const uint32_t b) {
|
||||
return (a > b) ? b : a;
|
||||
}
|
||||
|
||||
// Declarations of APIs intended for use only by tools.
|
||||
|
||||
// An AQL packet that can be put in an intercept queue to cause a callback to
|
||||
// be invoked when the packet is about to be submitted to the underlying
|
||||
// hardware queue. These packets are not copied to the underlying hardware
|
||||
// queue. These packets should come immediately before the regular AQL packet
|
||||
// they relate to. This implies that packet rewriters should always keep these
|
||||
// packets adjacent to the regular AQL packet that follows them.
|
||||
const uint32_t AMD_AQL_FORMAT_INTERCEPT_MARKER = 0xFE;
|
||||
|
||||
struct amd_aql_intercept_marker_s;
|
||||
|
||||
// When an intercept queue is processing rewritten packets to put them on the
|
||||
// underlying hardware queue, if it encounters a
|
||||
// AMD_AQL_FORMAT_INTERCEPT_MARKER vendor AQL packet it will call the following
|
||||
// handler. packet points to the packet, queue is the underlying hardware
|
||||
// queue, and packet_id is the packet id of the next packet to be put on the
|
||||
// underlying hardware queue. The intercept queue does not put these packets
|
||||
// onto the underlying hardware queue.
|
||||
typedef void (*amd_intercept_marker_handler)(const struct amd_aql_intercept_marker_s* packet,
|
||||
hsa_queue_t* queue, uint64_t packet_id);
|
||||
// An AQL vendor packet used by the intercept queue to mark the following
|
||||
// packet. The callback will be invoked to allow a tool to know where in the
|
||||
// underlying hardware queue the following packet will be placed. user_data can
|
||||
// be used to hold any data useful to the tool.
|
||||
typedef struct amd_aql_intercept_marker_s {
|
||||
uint16_t header; // Must have a packet type of HSA_PACKET_TYPE_VENDOR_SPECIFIC.
|
||||
uint8_t format; // Must be AMD_AQL_FORMAT_INTERCEPT_MARKER.
|
||||
uint8_t reserved[5]; // Must be 0.
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
amd_intercept_marker_handler callback;
|
||||
#elif defined HSA_LITTLE_ENDIAN
|
||||
amd_intercept_marker_handler callback;
|
||||
uint32_t reserved1; // Must be 0.
|
||||
#else
|
||||
uint32_t reserved1; // Must be 0.
|
||||
amd_intercept_marker_handler callback;
|
||||
#endif
|
||||
uint64_t user_data[6];
|
||||
} amd_aql_intercept_marker_t;
|
||||
|
||||
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
|
||||
typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
|
||||
uint64_t user_pkt_index, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer);
|
||||
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
|
||||
hsa_amd_queue_intercept_handler callback,
|
||||
void* user_data);
|
||||
hsa_status_t hsa_amd_queue_intercept_create(
|
||||
hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
||||
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
|
||||
|
||||
typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
|
||||
void* data);
|
||||
hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
|
||||
void* user_data);
|
||||
|
||||
// Structure of Version used to identify an instance of Api table
|
||||
// Must be the first member (offsetof == 0) of all API tables.
|
||||
// This is the root of the table passing ABI.
|
||||
struct ApiTableVersion {
|
||||
uint32_t major_id;
|
||||
uint32_t minor_id;
|
||||
uint32_t step_id;
|
||||
uint32_t reserved;
|
||||
};
|
||||
|
||||
struct ToolsApiTable {
|
||||
ApiTableVersion version;
|
||||
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_end_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_end_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_end_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Finalizer Extension Apis
|
||||
struct FinalizerExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
|
||||
decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
|
||||
decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
|
||||
decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
|
||||
decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
|
||||
decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Image Extension Apis
|
||||
struct ImageExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
|
||||
decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
|
||||
decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
|
||||
decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
|
||||
decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
|
||||
decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
|
||||
decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
|
||||
decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
|
||||
decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
|
||||
decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
|
||||
decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
|
||||
decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
|
||||
decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
|
||||
decltype(hsa_ext_sampler_create_v2)* hsa_ext_sampler_create_v2_fn;
|
||||
|
||||
};
|
||||
|
||||
// Table to export HSA PC Sampling Extension Apis
|
||||
struct PcSamplingExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
|
||||
decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
|
||||
decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
|
||||
decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
|
||||
decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
|
||||
decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
|
||||
decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
|
||||
};
|
||||
|
||||
|
||||
// Table to export AMD Extension Apis
|
||||
struct AmdExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
|
||||
decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
|
||||
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
|
||||
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
|
||||
decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
|
||||
decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
|
||||
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
|
||||
decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
|
||||
decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
|
||||
decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
|
||||
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
|
||||
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
|
||||
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
|
||||
decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_on_engine)* hsa_amd_memory_async_copy_on_engine_fn;
|
||||
decltype(hsa_amd_memory_copy_engine_status)* hsa_amd_memory_copy_engine_status_fn;
|
||||
decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
|
||||
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
|
||||
decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
|
||||
decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
|
||||
decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
|
||||
decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
|
||||
decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
|
||||
decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
|
||||
decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
|
||||
decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
|
||||
decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
|
||||
decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
|
||||
decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
|
||||
decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
|
||||
decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
|
||||
decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
|
||||
decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
|
||||
decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
|
||||
decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
|
||||
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
|
||||
decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
|
||||
decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
|
||||
decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
|
||||
decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
|
||||
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
|
||||
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
|
||||
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
|
||||
decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
|
||||
decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
|
||||
decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
|
||||
decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
|
||||
decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
|
||||
decltype(hsa_amd_vmem_address_reserve)* hsa_amd_vmem_address_reserve_fn;
|
||||
decltype(hsa_amd_vmem_address_free)* hsa_amd_vmem_address_free_fn;
|
||||
decltype(hsa_amd_vmem_handle_create)* hsa_amd_vmem_handle_create_fn;
|
||||
decltype(hsa_amd_vmem_handle_release)* hsa_amd_vmem_handle_release_fn;
|
||||
decltype(hsa_amd_vmem_map)* hsa_amd_vmem_map_fn;
|
||||
decltype(hsa_amd_vmem_unmap)* hsa_amd_vmem_unmap_fn;
|
||||
decltype(hsa_amd_vmem_set_access)* hsa_amd_vmem_set_access_fn;
|
||||
decltype(hsa_amd_vmem_get_access)* hsa_amd_vmem_get_access_fn;
|
||||
decltype(hsa_amd_vmem_export_shareable_handle)* hsa_amd_vmem_export_shareable_handle_fn;
|
||||
decltype(hsa_amd_vmem_import_shareable_handle)* hsa_amd_vmem_import_shareable_handle_fn;
|
||||
decltype(hsa_amd_vmem_retain_alloc_handle)* hsa_amd_vmem_retain_alloc_handle_fn;
|
||||
decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
|
||||
hsa_amd_vmem_get_alloc_properties_from_handle_fn;
|
||||
decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
|
||||
decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
|
||||
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
|
||||
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
|
||||
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
|
||||
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
struct CoreApiTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_init)* hsa_init_fn;
|
||||
decltype(hsa_shut_down)* hsa_shut_down_fn;
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info_fn;
|
||||
decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
|
||||
decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
|
||||
decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
|
||||
decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
|
||||
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
|
||||
decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
|
||||
decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
|
||||
decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
|
||||
decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
|
||||
decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
|
||||
decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
|
||||
decltype(hsa_region_get_info)* hsa_region_get_info_fn;
|
||||
decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
|
||||
decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
|
||||
decltype(hsa_memory_register)* hsa_memory_register_fn;
|
||||
decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
|
||||
decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
|
||||
decltype(hsa_memory_free)* hsa_memory_free_fn;
|
||||
decltype(hsa_memory_copy)* hsa_memory_copy_fn;
|
||||
decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
|
||||
decltype(hsa_signal_create)* hsa_signal_create_fn;
|
||||
decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
|
||||
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
|
||||
decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
|
||||
decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
|
||||
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
|
||||
decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
|
||||
decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
|
||||
decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
|
||||
decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
|
||||
decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
|
||||
decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
|
||||
decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
|
||||
decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
|
||||
decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
|
||||
decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
|
||||
decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
|
||||
decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
|
||||
decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
|
||||
decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
|
||||
decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
|
||||
decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
|
||||
decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
|
||||
decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
|
||||
decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
|
||||
decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
|
||||
decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
|
||||
decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
|
||||
decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
|
||||
decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
|
||||
decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
|
||||
decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
|
||||
decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
|
||||
decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
|
||||
|
||||
//===--- Instruction Set Architecture -----------------------------------===//
|
||||
|
||||
decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
|
||||
|
||||
//===--- Code Objects (deprecated) --------------------------------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
|
||||
|
||||
//===--- Executable -----------------------------------------------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_create)* hsa_executable_create_fn;
|
||||
decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
|
||||
decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
|
||||
decltype(hsa_executable_global_variable_define)*
|
||||
hsa_executable_global_variable_define_fn;
|
||||
decltype(hsa_executable_agent_global_variable_define)*
|
||||
hsa_executable_agent_global_variable_define_fn;
|
||||
decltype(hsa_executable_readonly_variable_define)*
|
||||
hsa_executable_readonly_variable_define_fn;
|
||||
decltype(hsa_executable_validate)* hsa_executable_validate_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
|
||||
|
||||
//===--- Runtime Notifications ------------------------------------------===//
|
||||
|
||||
decltype(hsa_status_string)* hsa_status_string_fn;
|
||||
|
||||
// Start HSA v1.1 additions
|
||||
decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
|
||||
decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
|
||||
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
|
||||
decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
|
||||
decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
|
||||
decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
|
||||
decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
|
||||
decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
|
||||
decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
|
||||
decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
|
||||
decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
|
||||
decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
|
||||
|
||||
//===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
|
||||
|
||||
decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
|
||||
decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
|
||||
decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
|
||||
decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
|
||||
decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
|
||||
decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
|
||||
|
||||
//===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_symbol_from_name)*
|
||||
hsa_code_object_get_symbol_from_name_fn;
|
||||
|
||||
//===--- Executable - HSA v1.1 additions --------------------------------===//
|
||||
|
||||
decltype(hsa_code_object_reader_create_from_file)*
|
||||
hsa_code_object_reader_create_from_file_fn;
|
||||
decltype(hsa_code_object_reader_create_from_memory)*
|
||||
hsa_code_object_reader_create_from_memory_fn;
|
||||
decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
|
||||
decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
|
||||
decltype(hsa_executable_load_program_code_object)*
|
||||
hsa_executable_load_program_code_object_fn;
|
||||
decltype(hsa_executable_load_agent_code_object)*
|
||||
hsa_executable_load_agent_code_object_fn;
|
||||
decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
|
||||
decltype(hsa_executable_get_symbol_by_name)*
|
||||
hsa_executable_get_symbol_by_name_fn;
|
||||
decltype(hsa_executable_iterate_agent_symbols)*
|
||||
hsa_executable_iterate_agent_symbols_fn;
|
||||
decltype(hsa_executable_iterate_program_symbols)*
|
||||
hsa_executable_iterate_program_symbols_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Apis from Core Runtime, Amd Extensions
|
||||
// Finalizer and Images
|
||||
struct HsaApiTable {
|
||||
|
||||
// Version of Hsa Api Table
|
||||
ApiTableVersion version;
|
||||
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable* core_;
|
||||
|
||||
// Table of function pointers to AMD extensions
|
||||
AmdExtTable* amd_ext_;
|
||||
|
||||
// Table of function pointers to HSA Finalizer Extension
|
||||
FinalizerExtTable* finalizer_ext_;
|
||||
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable* image_ext_;
|
||||
|
||||
// Table of function pointers for tools to use
|
||||
ToolsApiTable* tools_;
|
||||
|
||||
// Table of function pointers to AMD PC Sampling Extension
|
||||
PcSamplingExtTable* pc_sampling_ext_;
|
||||
};
|
||||
|
||||
// Structure containing instances of different api tables
|
||||
struct HsaApiTableContainer {
|
||||
HsaApiTable root;
|
||||
CoreApiTable core;
|
||||
AmdExtTable amd_ext;
|
||||
FinalizerExtTable finalizer_ext;
|
||||
ImageExtTable image_ext;
|
||||
ToolsApiTable tools;
|
||||
PcSamplingExtTable pc_sampling_ext;
|
||||
|
||||
// Default initialization of a container instance
|
||||
HsaApiTableContainer() {
|
||||
root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
|
||||
root.version.minor_id = sizeof(HsaApiTable);
|
||||
root.version.step_id = HSA_API_TABLE_STEP_VERSION;
|
||||
|
||||
core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
|
||||
core.version.minor_id = sizeof(CoreApiTable);
|
||||
core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
|
||||
root.core_ = &core;
|
||||
|
||||
amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
|
||||
amd_ext.version.minor_id = sizeof(AmdExtTable);
|
||||
amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
|
||||
root.amd_ext_ = &amd_ext;
|
||||
|
||||
finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
|
||||
finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
|
||||
finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
|
||||
root.finalizer_ext_ = &finalizer_ext;
|
||||
|
||||
image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
|
||||
image_ext.version.minor_id = sizeof(ImageExtTable);
|
||||
image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
|
||||
root.image_ext_ = &image_ext;
|
||||
|
||||
tools.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
|
||||
tools.version.minor_id = sizeof(ToolsApiTable);
|
||||
tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
|
||||
root.tools_ = &tools;
|
||||
|
||||
pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
|
||||
pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
|
||||
pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
|
||||
root.pc_sampling_ext_ = &pc_sampling_ext;
|
||||
}
|
||||
};
|
||||
|
||||
// Api to copy function pointers of a table
|
||||
static
|
||||
void inline copyApi(void* src, void* dest, size_t size) {
|
||||
assert(size >= sizeof(ApiTableVersion));
|
||||
memcpy((char*)src + sizeof(ApiTableVersion),
|
||||
(char*)dest + sizeof(ApiTableVersion),
|
||||
(size - sizeof(ApiTableVersion)));
|
||||
}
|
||||
|
||||
// Copy Api child tables if valid.
|
||||
static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
|
||||
if (src->major_id && (dest->major_id == src->major_id)) {
|
||||
dest->step_id = src->step_id;
|
||||
dest->minor_id = Min(dest->minor_id, src->minor_id);
|
||||
copyApi(dest, src, dest->minor_id);
|
||||
} else {
|
||||
dest->major_id = 0;
|
||||
dest->minor_id = 0;
|
||||
dest->step_id = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy constructor for all Api tables. The function assumes the
|
||||
// user has initialized an instance of tables container correctly
|
||||
// for the Major, Minor and Stepping Ids of Root and Child Api tables.
|
||||
// The function will overwrite the value of Minor Id by taking the
|
||||
// minimum of source and destination parameters. It will also overwrite
|
||||
// the stepping Id with value from source parameter.
|
||||
static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
|
||||
// Verify Major Id of source and destination tables match
|
||||
if (dest->version.major_id != src->version.major_id) {
|
||||
dest->version.major_id = 0;
|
||||
dest->version.minor_id = 0;
|
||||
dest->version.step_id = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize the stepping id and minor id of root table. For the
|
||||
// minor id which encodes struct size, take the minimum of source
|
||||
// and destination parameters
|
||||
dest->version.step_id = src->version.step_id;
|
||||
dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
|
||||
|
||||
// Copy child tables if present
|
||||
if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
|
||||
copyElement(&dest->core_->version, &src->core_->version);
|
||||
if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
|
||||
if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
|
||||
if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->image_ext_->version, &src->image_ext_->version);
|
||||
if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
|
||||
copyElement(&dest->tools_->version, &src->tools_->version);
|
||||
if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,70 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
#define HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
|
||||
// CODE IN THIS FILE **MUST** BE C-COMPATIBLE
|
||||
|
||||
// Major Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_MAJOR_VERSION 0x03
|
||||
#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
|
||||
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION 0x01
|
||||
#define HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION 0x01
|
||||
|
||||
// Step Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_STEP_VERSION 0x01
|
||||
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x07
|
||||
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
|
||||
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
|
||||
#define HSA_IMAGE_EXT_API_TABLE_STEP_VERSION HSA_IMAGE_API_TABLE_STEP_VERSION
|
||||
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_TOOLS_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_PC_SAMPLING_API_TABLE_STEP_VERSION 0x00
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
+3675
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -0,0 +1,531 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#undef HSA_API
|
||||
#ifdef HSA_EXPORT_FINALIZER
|
||||
#define HSA_API HSA_API_EXPORT
|
||||
#else
|
||||
#define HSA_API HSA_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct BrigModuleHeader;
|
||||
typedef struct BrigModuleHeader* BrigModule_t;
|
||||
|
||||
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* The HSAIL program is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
|
||||
/**
|
||||
* The HSAIL module is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
|
||||
/**
|
||||
* Machine model or profile of the HSAIL module do not match the machine model
|
||||
* or profile of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
|
||||
/**
|
||||
* The HSAIL module is already a part of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
|
||||
/**
|
||||
* Compatibility mismatch between symbol declaration and symbol definition.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
|
||||
/**
|
||||
* The finalization encountered an error while finalizing a kernel or
|
||||
* indirect function.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
|
||||
/**
|
||||
* Mismatch between a directive in the control directive structure and in
|
||||
* the HSAIL kernel.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup ext-alt-finalizer-program Finalization Program
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
|
||||
* the definition of the BrigModule_t type.
|
||||
*/
|
||||
typedef BrigModule_t hsa_ext_module_t;
|
||||
|
||||
/**
|
||||
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
|
||||
* modules that collectively define functions and variables used by kernels and
|
||||
* indirect functions.
|
||||
*/
|
||||
typedef struct hsa_ext_program_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
} hsa_ext_program_t;
|
||||
|
||||
/**
|
||||
* @brief Create an empty HSAIL program.
|
||||
*
|
||||
* @param[in] machine_model Machine model used in the HSAIL program.
|
||||
*
|
||||
* @param[in] profile Profile used in the HSAIL program.
|
||||
*
|
||||
* @param[in] default_float_rounding_mode Default float rounding mode used in
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[out] program Memory location where the HSA runtime stores the newly
|
||||
* created HSAIL program handle.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
|
||||
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
|
||||
* @p program is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_create(
|
||||
hsa_machine_model_t machine_model,
|
||||
hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options,
|
||||
hsa_ext_program_t *program);
|
||||
|
||||
/**
|
||||
* @brief Destroy a HSAIL program.
|
||||
*
|
||||
* @details The HSAIL program handle becomes invalid after it has been
|
||||
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
|
||||
* still valid after the HSAIL program has been destroyed, and can be used as
|
||||
* intended. Resources allocated outside and associated with the HSAIL program
|
||||
* (such as HSAIL modules that are added to the HSAIL program) can be released
|
||||
* after the finalization program has been destroyed.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_destroy(
|
||||
hsa_ext_program_t program);
|
||||
|
||||
/**
|
||||
* @brief Add a HSAIL module to an existing HSAIL program.
|
||||
*
|
||||
* @details The HSA runtime does not perform a deep copy of the HSAIL module
|
||||
* upon addition. Instead, it stores a pointer to the HSAIL module. The
|
||||
* ownership of the HSAIL module belongs to the application, which must ensure
|
||||
* that @p module is not released before destroying the HSAIL program.
|
||||
*
|
||||
* The HSAIL module is successfully added to the HSAIL program if @p module is
|
||||
* valid, if all the declarations and definitions for the same symbol are
|
||||
* compatible, and if @p module specify machine model and profile that matches
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] module HSAIL module. The application can add the same HSAIL module
|
||||
* to @p program at most once. The HSAIL module must specify the same machine
|
||||
* model and profile as @p program. If the floating-mode rounding mode of @p
|
||||
* module is not default, then it should match that of @p program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
|
||||
* module does not match machine model of @p program, or the profile of @p
|
||||
* module does not match profile of @p program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
|
||||
* already a part of the HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
|
||||
* definition compatibility mismatch. See the symbol compatibility rules in the
|
||||
* HSA Programming Reference Manual.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_add_module(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
/**
|
||||
* @brief Iterate over the HSAIL modules in a program, and invoke an
|
||||
* application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per HSAIL module in the
|
||||
* program. The HSA runtime passes three arguments to the callback: the program,
|
||||
* a HSAIL module, and the application data. If @p callback returns a status
|
||||
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
|
||||
* stops and ::hsa_ext_program_iterate_modules returns that status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
/**
|
||||
* @brief HSAIL program attributes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Machine model specified when the HSAIL program was created. The type
|
||||
* of this attribute is ::hsa_machine_model_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
|
||||
/**
|
||||
* Profile specified when the HSAIL program was created. The type of
|
||||
* this attribute is ::hsa_profile_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
|
||||
/**
|
||||
* Default float rounding mode specified when the HSAIL program was
|
||||
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
|
||||
} hsa_ext_program_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behaviour is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* HSAIL program attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_get_info(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
/**
|
||||
* @brief Finalizer-determined call convention.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Finalizer-determined call convention.
|
||||
*/
|
||||
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
|
||||
} hsa_ext_finalizer_call_convention_t;
|
||||
|
||||
/**
|
||||
* @brief Control directives specify low-level information about the
|
||||
* finalization process.
|
||||
*/
|
||||
typedef struct hsa_ext_control_directives_s {
|
||||
/**
|
||||
* Bitset indicating which control directives are enabled. The bit assigned to
|
||||
* a control directive is determined by the corresponding value in
|
||||
* BrigControlDirective.
|
||||
*
|
||||
* If a control directive is disabled, its corresponding field value (if any)
|
||||
* must be 0. Control directives that are only present or absent (such as
|
||||
* partial workgroups) have no corresponding field as the presence of the bit
|
||||
* in this mask is sufficient.
|
||||
*/
|
||||
uint64_t control_directives_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
|
||||
* assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t break_exceptions_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
|
||||
* bit assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t detect_exceptions_mask;
|
||||
/**
|
||||
* Maximum size (in bytes) of dynamic group memory that will be allocated by
|
||||
* the application for any dispatch of the kernel. If the kernel contains a
|
||||
* maxdynamicsize control directive, the two values should match.
|
||||
*/
|
||||
uint32_t max_dynamic_group_size;
|
||||
/**
|
||||
* Maximum number of grid work-items that will be used by the application to
|
||||
* launch the kernel. If the kernel contains a maxflatgridsize control
|
||||
* directive, the value of @a max_flat_grid_size must not be greater than the
|
||||
* value of the directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_grid_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint64_t max_flat_grid_size;
|
||||
/**
|
||||
* Maximum number of work-group work-items that will be used by the
|
||||
* application to launch the kernel. If the kernel contains a
|
||||
* maxflatworkgroupsize control directive, the value of @a
|
||||
* max_flat_workgroup_size must not be greater than the value of the
|
||||
* directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_workgroup_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint32_t max_flat_workgroup_size;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint32_t reserved1;
|
||||
/**
|
||||
* Grid size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredgridsize control directive, the
|
||||
* dimensions should match.
|
||||
*
|
||||
* The specified grid size must be consistent with @a required_workgroup_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
|
||||
* only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
uint64_t required_grid_size[3];
|
||||
/**
|
||||
* Work-group size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredworkgroupsize control directive,
|
||||
* the dimensions should match.
|
||||
*
|
||||
* The specified work-group size must be consistent with @a required_grid_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
|
||||
* hold only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
hsa_dim3_t required_workgroup_size;
|
||||
/**
|
||||
* Number of dimensions that will be used by the application to launch the
|
||||
* kernel. If the kernel contains a requireddim control directive, the two
|
||||
* values should match.
|
||||
*
|
||||
* The specified dimensions must be consistent with @a required_grid_size and
|
||||
* @a required_workgroup_size. This invariant must hold only if all the
|
||||
* corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
|
||||
* control_directives_mask, this field must be 1, 2, or 3.
|
||||
*/
|
||||
uint8_t required_dim;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint8_t reserved2[75];
|
||||
} hsa_ext_control_directives_t;
|
||||
|
||||
/**
|
||||
* @brief Finalize an HSAIL program for a given instruction set architecture.
|
||||
*
|
||||
* @details Finalize all of the kernels and indirect functions that belong to
|
||||
* the same HSAIL program for a specific instruction set architecture (ISA). The
|
||||
* transitive closure of all functions specified by call or scall must be
|
||||
* defined. Kernels and indirect functions that are being finalized must be
|
||||
* defined. Kernels and indirect functions that are referenced in kernels and
|
||||
* indirect functions being finalized may or may not be defined, but must be
|
||||
* declared. All the global/readonly segment variables that are referenced in
|
||||
* kernels and indirect functions being finalized may or may not be defined, but
|
||||
* must be declared.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] isa Instruction set architecture to finalize for.
|
||||
*
|
||||
* @param[in] call_convention A call convention used in a finalization. Must
|
||||
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
|
||||
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
|
||||
* isa (not inclusive).
|
||||
*
|
||||
* @param[in] control_directives Low-level control directives that influence
|
||||
* the finalization process.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[in] code_object_type Type of code object to produce.
|
||||
*
|
||||
* @param[out] code_object Code object generated by the Finalizer, which
|
||||
* contains the machine code for the kernels and indirect functions in the HSAIL
|
||||
* program. The code object is independent of the HSAIL module that was used to
|
||||
* generate it.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
|
||||
* the control directive structure and in the HSAIL kernel mismatch, or if the
|
||||
* same directive is used with a different value in one of the functions used by
|
||||
* this kernel.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
|
||||
* encountered an error while compiling a kernel or an indirect function.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_finalize(
|
||||
hsa_ext_program_t program,
|
||||
hsa_isa_t isa,
|
||||
int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives,
|
||||
const char *options,
|
||||
hsa_code_object_type_t code_object_type,
|
||||
hsa_code_object_t *code_object);
|
||||
|
||||
/** @} */
|
||||
|
||||
#define hsa_ext_finalizer_1_00
|
||||
|
||||
typedef struct hsa_ext_finalizer_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ext_program_create)(
|
||||
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options, hsa_ext_program_t *program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_iterate_modules)(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module, void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_get_info)(
|
||||
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_finalize)(
|
||||
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives, const char *options,
|
||||
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
|
||||
} hsa_ext_finalizer_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -0,0 +1,488 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "hsa.h"
|
||||
|
||||
#define HSA_AQLPROFILE_VERSION_MAJOR 2
|
||||
#define HSA_AQLPROFILE_VERSION_MINOR 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Library version
|
||||
uint32_t hsa_ven_amd_aqlprofile_version_major();
|
||||
uint32_t hsa_ven_amd_aqlprofile_version_minor();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Library API:
|
||||
// The library provides helper methods for instantiation of
|
||||
// the profile context object and for populating of the start
|
||||
// and stop AQL packets. The profile object contains a profiling
|
||||
// events list and needed for profiling buffers descriptors,
|
||||
// a command buffer and an output data buffer. To check if there
|
||||
// was an error the library methods return a status code. Also
|
||||
// the library provides methods for querying required buffers
|
||||
// attributes, to validate the event attributes and to get profiling
|
||||
// output data.
|
||||
//
|
||||
// Returned status:
|
||||
// hsa_status_t – HSA status codes are used from hsa.h header
|
||||
//
|
||||
// Supported profiling features:
|
||||
//
|
||||
// Supported profiling events
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
|
||||
} hsa_ven_amd_aqlprofile_event_type_t;
|
||||
|
||||
// Supported performance counters (PMC) blocks
|
||||
// The block ID is the same for a block instances set, for example
|
||||
// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
|
||||
// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
|
||||
// Memory related blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
|
||||
// System blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
|
||||
// GFX10 added blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,
|
||||
|
||||
// UMC & MMEA System Blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,
|
||||
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
|
||||
} hsa_ven_amd_aqlprofile_block_name_t;
|
||||
|
||||
// PMC event object structure
|
||||
// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
|
||||
// which is the counters select value, “Performance Counters Selection”
|
||||
// chapter.
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name;
|
||||
uint32_t block_index;
|
||||
uint32_t counter_id;
|
||||
} hsa_ven_amd_aqlprofile_event_t;
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, // HSA handle for the profiling GPU
|
||||
const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
|
||||
bool* result); // [out] True if the event valid, False otherwise
|
||||
|
||||
// Profiling parameters
|
||||
// All parameters are generic and if not applicable for a specific
|
||||
// profile configuration then error status will be returned.
|
||||
typedef enum {
|
||||
/**
|
||||
* Select the target compute unit (wgp) for profiling.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
|
||||
/**
|
||||
* VMID Mask
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
|
||||
/**
|
||||
* Shader engine mask for selection.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
|
||||
/**
|
||||
* Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
|
||||
/**
|
||||
* Set true for occupancy collection only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
|
||||
/**
|
||||
* ATT collection max data size, in MB. Shared among shader engines.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
|
||||
/**
|
||||
* Mask of which compute units to generate perfcounters. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
|
||||
/**
|
||||
* Select collection period for perfcounters. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
|
||||
/**
|
||||
* Select perfcounter ID (SQ block) for collection. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
|
||||
} hsa_ven_amd_aqlprofile_parameter_name_t;
|
||||
|
||||
// Profile parameter object
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
||||
uint32_t value;
|
||||
} hsa_ven_amd_aqlprofile_parameter_t;
|
||||
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
|
||||
} hsa_ven_amd_aqlprofile_att_marker_channel_t;
|
||||
|
||||
//
|
||||
// Profile context object:
|
||||
// The library provides a profile object structure which contains
|
||||
// the events array, a buffer for the profiling start/stop commands
|
||||
// and a buffer for the output data.
|
||||
// The buffers are specified by the buffer descriptors and allocated
|
||||
// by the application. The buffers allocation attributes, the command
|
||||
// buffer size, the PMC output buffer size as well as profiling output
|
||||
// data can be get using the generic get profile info helper _get_info.
|
||||
//
|
||||
// Buffer descriptor
|
||||
typedef struct {
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
} hsa_ven_amd_aqlprofile_descriptor_t;
|
||||
|
||||
// Profile context object structure, contains profiling events list and
|
||||
// needed for profiling buffers descriptors, a command buffer and
|
||||
// an output data buffer
|
||||
typedef struct {
|
||||
hsa_agent_t agent; // GFXIP handle
|
||||
hsa_ven_amd_aqlprofile_event_type_t type; // Events type
|
||||
const hsa_ven_amd_aqlprofile_event_t* events; // Events array
|
||||
uint32_t event_count; // Events count
|
||||
const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
|
||||
uint32_t parameter_count; // Parameters count
|
||||
hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
|
||||
hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
|
||||
} hsa_ven_amd_aqlprofile_profile_t;
|
||||
|
||||
//
|
||||
// AQL packets populating methods:
|
||||
// The helper methods to populate provided by the application START and
|
||||
// STOP AQL packets which the application is required to submit before and
|
||||
// after profiled GPU task packets respectively.
|
||||
//
|
||||
// AQL Vendor Specific packet which carries a PM4 command
|
||||
typedef struct {
|
||||
uint16_t header;
|
||||
uint16_t pm4_command[27];
|
||||
hsa_signal_t completion_signal;
|
||||
} hsa_ext_amd_aql_pm4_packet_t;
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_start(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type and a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_stop(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
|
||||
|
||||
// Method to populate the provided AQL packet with profiling read commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type and a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_read(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
|
||||
|
||||
// Legacy devices, PM4 profiling packet size
|
||||
const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
|
||||
// Legacy devices, converting the profiling AQL packet to PM4 packet blob
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
|
||||
void* data); // [out] PM4 packet blob
|
||||
|
||||
// Method to add a marker (correlation ID) into the ATT buffer.
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet, // [out] profile marker AQL packet
|
||||
uint32_t data, // [in] Data to be inserted
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel
|
||||
|
||||
//
|
||||
// Get profile info:
|
||||
// Generic method for getting various profile info including profile buffers
|
||||
// attributes like the command buffer size and the profiling PMC results.
|
||||
// It’s implied that all counters are 64bit values.
|
||||
//
|
||||
// Profile generic output data:
|
||||
typedef struct {
|
||||
uint32_t sample_id; // PMC sample or trace buffer index
|
||||
union {
|
||||
struct {
|
||||
hsa_ven_amd_aqlprofile_event_t event; // PMC event
|
||||
uint64_t result; // PMC result
|
||||
} pmc_data;
|
||||
hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
|
||||
};
|
||||
} hsa_ven_amd_aqlprofile_info_data_t;
|
||||
|
||||
// ID query type
|
||||
typedef struct {
|
||||
const char* name;
|
||||
uint32_t id;
|
||||
uint32_t instance_count;
|
||||
} hsa_ven_amd_aqlprofile_id_query_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
|
||||
// counters disable command buffer
|
||||
} hsa_ven_amd_aqlprofile_info_type_t;
|
||||
|
||||
|
||||
// Definition of output data iterator callback
|
||||
typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
|
||||
hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
|
||||
void* callback_data); // [in,out] data passed to the callback
|
||||
|
||||
// Method for getting the profile info
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_get_info(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
|
||||
void* value); // [in,out] returned value
|
||||
|
||||
// Method for iterating the events output data
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
|
||||
void* data); // [in,out] data passed to the callback
|
||||
|
||||
// Return error string
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_error_string(
|
||||
const char** str); // [out] pointer on the error string
|
||||
|
||||
/**
|
||||
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
||||
*/
|
||||
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
|
||||
/**
|
||||
* @brief Iterate over all possible event coordinate IDs and their names.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param position A counting sequence indicating callback number.
|
||||
* @param id Coordinate ID as in _iterate_event_ids.
|
||||
* @param extent Coordinate extent indicating maximum allowed instances.
|
||||
* @param coordinate The coordinate, in the range [0,extent-1].
|
||||
* @param name Coordinate name as in _iterate_event_ids.
|
||||
* @param userdata Userdata returned from _iterate_event_coord function.
|
||||
*/
|
||||
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
|
||||
int position,
|
||||
int id,
|
||||
int extent,
|
||||
int coordinate,
|
||||
const char* name,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param[in] agent HSA agent.
|
||||
* @param[in] event The event ID and block ID to iterate for.
|
||||
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
|
||||
* @param[in] callback Callback function to return the coordinates.
|
||||
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Extension version.
|
||||
*/
|
||||
#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
|
||||
#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
|
||||
#else
|
||||
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Extension function table.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
|
||||
uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
|
||||
uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
|
||||
const char** str);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ven_amd_aqlprofile_event_t* event,
|
||||
bool* result);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
||||
void* data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute,
|
||||
void* value);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback,
|
||||
void* data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
|
||||
hsa_ven_amd_aqlprofile_eventname_callback_t
|
||||
);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
|
||||
hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
);
|
||||
} hsa_ven_amd_aqlprofile_1_00_pfn_t;
|
||||
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
@@ -0,0 +1,667 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA AMD extension for additional loader functionality.
|
||||
|
||||
#ifndef HSA_VEN_AMD_LOADER_H
|
||||
#define HSA_VEN_AMD_LOADER_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/**
|
||||
* @brief Queries equivalent host address for given @p device_address, and
|
||||
* records it in @p host_address.
|
||||
*
|
||||
*
|
||||
* @details Contents of memory pointed to by @p host_address would be identical
|
||||
* to contents of memory pointed to by @p device_address. Only difference
|
||||
* between the two is host accessibility: @p host_address is always accessible
|
||||
* from host, @p device_address might not be accessible from host.
|
||||
*
|
||||
* If @p device_address already points to host accessible memory, then the value
|
||||
* of @p device_address is simply copied into @p host_address.
|
||||
*
|
||||
* The lifetime of @p host_address is the same as the lifetime of @p
|
||||
* device_address, and both lifetimes are limited by the lifetime of the
|
||||
* executable that is managing these addresses.
|
||||
*
|
||||
*
|
||||
* @param[in] device_address Device address to query equivalent host address
|
||||
* for.
|
||||
*
|
||||
* @param[out] host_address Pointer to application-allocated buffer to record
|
||||
* queried equivalent host address in.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
|
||||
* null, or @p host_address is null.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_host_address(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
/**
|
||||
* @brief The storage type of the code object that is backing loaded memory
|
||||
* segment.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Loaded memory segment is not backed by any code object (anonymous), as the
|
||||
* case would be with BSS (uninitialized data).
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* file.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* memory.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
|
||||
} hsa_ven_amd_loader_code_object_storage_type_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded memory segment descriptor.
|
||||
*
|
||||
*
|
||||
* @details Loaded memory segment descriptor describes underlying loaded memory
|
||||
* segment. Loaded memory segment is created/allocated by the executable during
|
||||
* the loading of the code object that is backing underlying memory segment.
|
||||
*
|
||||
* The lifetime of underlying memory segment is limited by the lifetime of the
|
||||
* executable that is managing underlying memory segment.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
|
||||
/**
|
||||
* Agent underlying memory segment is allocated on. If the code object that is
|
||||
* backing underlying memory segment is program code object, then 0.
|
||||
*/
|
||||
hsa_agent_t agent;
|
||||
/**
|
||||
* Executable that is managing this underlying memory segment.
|
||||
*/
|
||||
hsa_executable_t executable;
|
||||
/**
|
||||
* Storage type of the code object that is backing underlying memory segment.
|
||||
*/
|
||||
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
|
||||
* filepath to the code object;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
|
||||
* accessible pointer to the first byte of the code object.
|
||||
*/
|
||||
const void *code_object_storage_base;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
|
||||
* the filepath to the code object (including null-terminating character);
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
|
||||
* bytes, of the memory occupied by the code object.
|
||||
*/
|
||||
size_t code_object_storage_size;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - other, then offset, in bytes, from the beginning of the code object to
|
||||
* the first byte in the code object data is copied from.
|
||||
*/
|
||||
size_t code_object_storage_offset;
|
||||
/**
|
||||
* Starting address of the underlying memory segment.
|
||||
*/
|
||||
const void *segment_base;
|
||||
/**
|
||||
* Size, in bytes, of the underlying memory segment.
|
||||
*/
|
||||
size_t segment_size;
|
||||
} hsa_ven_amd_loader_segment_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Either queries loaded memory segment descriptors, or total number of
|
||||
* loaded memory segment descriptors.
|
||||
*
|
||||
*
|
||||
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
|
||||
* points to number that exactly matches total number of loaded memory segment
|
||||
* descriptors, then queries loaded memory segment descriptors, and records them
|
||||
* in @p segment_descriptors. If @p segment_descriptors is null and @p
|
||||
* num_segment_descriptors points to zero, then queries total number of loaded
|
||||
* memory segment descriptors, and records it in @p num_segment_descriptors. In
|
||||
* all other cases returns appropriate error code (see below).
|
||||
*
|
||||
* The caller of this function is responsible for the allocation/deallocation
|
||||
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
|
||||
*
|
||||
* The lifetime of loaded memory segments that are described by queried loaded
|
||||
* memory segment descriptors is limited by the lifetime of the executable that
|
||||
* is managing loaded memory segments.
|
||||
*
|
||||
* Queried loaded memory segment descriptors are always self-consistent: they
|
||||
* describe a complete set of loaded memory segments that are being backed by
|
||||
* fully loaded code objects that are present at the time (i.e. this function
|
||||
* is blocked until all executable manipulations are fully complete).
|
||||
*
|
||||
*
|
||||
* @param[out] segment_descriptors Pointer to application-allocated buffer to
|
||||
* record queried loaded memory segment descriptors in. Can be null if @p
|
||||
* num_segment_descriptors points to zero.
|
||||
*
|
||||
* @param[in,out] num_segment_descriptors Pointer to application-allocated
|
||||
* buffer that contains either total number of loaded memory segment descriptors
|
||||
* or zero.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
|
||||
* while @p num_segment_descriptors points to non-zero number, @p
|
||||
* segment_descriptors is not null while @p num_segment_descriptors points to
|
||||
* zero, or @p num_segment_descriptors is null.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
|
||||
* does not point to number that exactly matches total number of loaded memory
|
||||
* segment descriptors.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
/**
|
||||
* @brief Obtains the handle of executable to which the device address belongs.
|
||||
*
|
||||
* @details This method should not be used to obtain executable handle by using
|
||||
* a host address. The executable returned is expected to be alive until its
|
||||
* destroyed by the user.
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
|
||||
* is no exectuable found for this kernel code object.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_executable(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the loaded code objects in an executable, and invoke
|
||||
* an application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] executable Executable.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per loaded code object. The
|
||||
* HSA runtime passes three arguments to the callback: the executable, a
|
||||
* loaded code object, and the application data. If @p callback returns a
|
||||
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
|
||||
* traversal stops and
|
||||
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
|
||||
* status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/**
|
||||
* @brief Loaded code object kind.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Program code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
|
||||
/**
|
||||
* Agent code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
|
||||
} hsa_ven_amd_loader_loaded_code_object_kind_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded code object attributes.
|
||||
*/
|
||||
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
|
||||
/**
|
||||
* The executable in which this loaded code object is loaded. The
|
||||
* type of this attribute is ::hsa_executable_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
|
||||
/**
|
||||
* The kind of this loaded code object. The type of this attribute is
|
||||
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
|
||||
/**
|
||||
* The agent on which this loaded code object is loaded. The
|
||||
* value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
|
||||
* attribute is ::hsa_agent_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
|
||||
/**
|
||||
* The storage type of the code object reader used to load the loaded code object.
|
||||
* The type of this attribute is ::uint32_t interpreted as a
|
||||
* ::hsa_ven_amd_loader_code_object_storage_type_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
|
||||
/**
|
||||
* The memory address of the first byte of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
|
||||
/**
|
||||
* The memory size in bytes of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
|
||||
/**
|
||||
* The file descriptor of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
|
||||
* attribute is ::int.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
|
||||
/**
|
||||
* The signed byte address difference of the memory address at which the code
|
||||
* object is loaded minus the virtual address specified in the code object
|
||||
* that is loaded. The value of this attribute is only defined if the
|
||||
* executable in which the code object is loaded is froozen. The type of this
|
||||
* attribute is ::int64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
|
||||
/**
|
||||
* The base memory address at which the code object is loaded. This is the
|
||||
* base address of the allocation for the lowest addressed segment of the code
|
||||
* object that is loaded. Note that any non-loaded segments before the first
|
||||
* loaded segment are ignored. The value of this attribute is only defined if
|
||||
* the executable in which the code object is loaded is froozen. The type of
|
||||
* this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
|
||||
/**
|
||||
* The byte size of the loaded code objects contiguous memory allocation. The
|
||||
* value of this attribute is only defined if the executable in which the code
|
||||
* object is loaded is froozen. The type of this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
|
||||
/**
|
||||
* The length of the URI in bytes, not including the NUL terminator. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
|
||||
/**
|
||||
* The URI name from which the code object was loaded. The type of this
|
||||
* attribute is a NUL terminated \p char* with the length equal to the value
|
||||
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
|
||||
* The URI name syntax is defined by the following BNF syntax:
|
||||
*
|
||||
* code_object_uri ::== file_uri | memory_uri
|
||||
* file_uri ::== "file://" file_path [ range_specifier ]
|
||||
* memory_uri ::== "memory://" process_id range_specifier
|
||||
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
|
||||
* file_path ::== URI_ENCODED_OS_FILE_PATH
|
||||
* process_id ::== DECIMAL_NUMBER
|
||||
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
|
||||
*
|
||||
* ``number`` is a C integral literal where hexadecimal values are prefixed by
|
||||
* "0x" or "0X", and octal values by "0".
|
||||
*
|
||||
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
|
||||
* In URI encoding, every character that is not in the regular expression
|
||||
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
|
||||
* proceeded by "%". Directories in the path are separated by "/".
|
||||
*
|
||||
* ``offset`` is a 0-based byte offset to the start of the code object. For a
|
||||
* file URI, it is from the start of the file specified by the ``file_path``,
|
||||
* and if omitted defaults to 0. For a memory URI, it is the memory address
|
||||
* and is required.
|
||||
*
|
||||
* ``size`` is the number of bytes in the code object. For a file URI, if
|
||||
* omitted it defaults to the size of the file. It is required for a memory
|
||||
* URI.
|
||||
*
|
||||
* ``process_id`` is the identity of the process owning the memory. For Linux
|
||||
* it is the C unsigned integral decimal literal for the process ID (PID).
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* file:///dir1/dir2/file1
|
||||
* file:///dir3/dir4/file2#offset=0x2000&size=3000
|
||||
* memory://1234#offset=0x20000&size=3000
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
|
||||
} hsa_ven_amd_loader_loaded_code_object_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given loaded code
|
||||
* object.
|
||||
*
|
||||
* @param[in] loaded_code_object Loaded code object.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behavior is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* loaded code object attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Create a code object reader to operate on a file with size and offset.
|
||||
*
|
||||
* @param[in] file File descriptor. The file must have been opened by
|
||||
* application with at least read permissions prior calling this function. The
|
||||
* file must contain a vendor-specific code object.
|
||||
*
|
||||
* The file is owned and managed by the application; the lifetime of the file
|
||||
* descriptor must exceed that of any associated code object reader.
|
||||
*
|
||||
* @param[in] size Size of the code object embedded in @p file.
|
||||
*
|
||||
* @param[in] offset 0-based offset relative to the beginning of the @p file
|
||||
* that denotes the beginning of the code object embedded within the @p file.
|
||||
*
|
||||
* @param[out] code_object_reader Memory location to store the newly created
|
||||
* code object reader handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
|
||||
* read permissions. This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
|
||||
* do not form a valid code object. If file size is 0. Or offset > file size.
|
||||
* This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
|
||||
* allocate the required resources.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the available executables, and invoke an
|
||||
* application-defined callback on every iteration. While
|
||||
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
|
||||
* ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy will be blocked.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per executable. The HSA
|
||||
* runtime passes two arguments to the callback: the executable and the
|
||||
* application data. If @p callback returns a status other than
|
||||
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
|
||||
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
|
||||
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy then the behavior is undefined.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_iterate_executables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Extension version.
|
||||
*/
|
||||
#define hsa_ven_amd_loader 001003
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.00.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
} hsa_ven_amd_loader_1_00_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.01.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
} hsa_ven_amd_loader_1_01_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.02.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
} hsa_ven_amd_loader_1_02_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.03.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_iterate_executables)(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
} hsa_ven_amd_loader_1_03_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* HSA_VEN_AMD_LOADER_H */
|
||||
@@ -0,0 +1,416 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_VEN_AMD_PC_SAMPLING_H
|
||||
#define HSA_VEN_AMD_PC_SAMPLING_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief HSA AMD Vendor PC Sampling APIs
|
||||
* EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
|
||||
* modified extensively in the future
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief PC Sampling sample data for hosttrap sampling method
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
uint32_t wave_in_wg : 6;
|
||||
uint32_t chiplet : 3; // Currently not used
|
||||
uint32_t reserved : 23;
|
||||
uint32_t hw_id;
|
||||
uint32_t reserved0;
|
||||
uint64_t reserved1;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_hosttrap_v1_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling sample data for stochastic sampling method
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
uint32_t wave_in_wg : 6;
|
||||
uint32_t chiplet : 3; // Currently not used
|
||||
uint32_t reserved : 23;
|
||||
uint32_t hw_id;
|
||||
uint32_t perf_snapshot_data;
|
||||
uint32_t perf_snapshot_data1;
|
||||
uint32_t perf_snapshot_data2;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_snapshot_v1_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling method kinds
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
|
||||
HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
|
||||
} hsa_ven_amd_pcs_method_kind_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling interval unit type
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
|
||||
} hsa_ven_amd_pcs_units_t;
|
||||
|
||||
/**
|
||||
* @brief HSA callback function to perform the copy onto a destination buffer
|
||||
*
|
||||
* If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
|
||||
* buffers. Remaining contents of HSA internal buffers will be included in next
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
|
||||
* hsa_ven_amd_pcs_flush.
|
||||
*
|
||||
* @param[in] hsa_callback_data private data to pass back to HSA. Provided in
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t
|
||||
*
|
||||
* @param[in] data_size size of destination buffer in bytes.
|
||||
* @param[in] destination destination buffer
|
||||
* @retval TBD: but could be used to indicate that there is no more data to be read.
|
||||
* Or indicate an error and abort of current copy operations
|
||||
*/
|
||||
typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
|
||||
size_t data_size, void* destination);
|
||||
|
||||
/**
|
||||
* @brief HSA callback function to to indicate that there is data ready to be copied
|
||||
*
|
||||
* When the client receives this callback, the client should call back @p data_copy_callback for HSA
|
||||
* to perform the copy operation into an available buffer. @p data_copy_callback can be called back
|
||||
* multiple times with smaller @p data_size to split the copy operation.
|
||||
*
|
||||
* This callback must not call ::hsa_ven_amd_pcs_flush.
|
||||
*
|
||||
* @param[in] client_callback_data client private data passed in via
|
||||
* hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
|
||||
* @param[in] data_size size of data available to be copied
|
||||
* @param[in] lost_sample_count number of lost samples since last call to
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t.
|
||||
* @param[in] data_copy_callback callback function for HSA to perform the actual copy
|
||||
* @param[in] hsa_callback_data private data to pass back to HSA
|
||||
*/
|
||||
typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
|
||||
void* client_callback_data, size_t data_size, size_t lost_sample_count,
|
||||
hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);
|
||||
|
||||
/**
|
||||
* @brief Opaque handle representing a sampling session.
|
||||
* Two sessions having same handle value represent the same session
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} hsa_ven_amd_pcs_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling configuration flag options
|
||||
*/
|
||||
typedef enum {
|
||||
/* The interval for this sampling method have to be a power of 2 */
|
||||
HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
|
||||
} hsa_ven_amd_pcs_configuration_flags_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling method information
|
||||
* Used to provide client with list of supported PC Sampling methods
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ven_amd_pcs_method_kind_t method;
|
||||
hsa_ven_amd_pcs_units_t units;
|
||||
size_t min_interval;
|
||||
size_t max_interval;
|
||||
uint64_t flags;
|
||||
} hsa_ven_amd_pcs_configuration_t;
|
||||
|
||||
/**
|
||||
* @brief Callback function to iterate through list of supported PC Sampling configurations
|
||||
*
|
||||
* @param[in] configuration one entry for supported PC Sampling method and configuration options
|
||||
* @param[in] callback_data client private callback data that was passed in when calling
|
||||
* hsa_ven_amd_pcs_iterate_configuration
|
||||
*/
|
||||
typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
|
||||
const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Iterate through list of current supported PC Sampling configurations for this @p agent
|
||||
*
|
||||
* HSA will callback @p configuration_callback for each currently available PC Sampling
|
||||
* configuration. The list of currently available configurations may not be the complete list of
|
||||
* configurations supported on the @p agent. The list of currently available configurations may be
|
||||
* reduced if the @p agent is currently handling other PC sampling sessions.
|
||||
*
|
||||
* @param[in] agent target agent
|
||||
* @param[in] configuration_callback callback function to iterate through list of configurations
|
||||
* @param[in] callback_data client private callback data
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
|
||||
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
||||
void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Create a PC Sampling session on @p agent
|
||||
*
|
||||
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
||||
* parameters must be a legal configuration value, as described by the
|
||||
* hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
|
||||
* hsa_ven_amd_pcs_iterate_configuration for this @p agent.
|
||||
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
||||
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
||||
* on what types of PC sampling they can perform concurrently.
|
||||
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
||||
* The session will be in a stopped/inactive state after this call
|
||||
*
|
||||
* @param[in] agent target agent
|
||||
* @param[in] method method to use
|
||||
* @param[in] units sampling units
|
||||
* @param[in] interval sampling interval in @p units
|
||||
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
||||
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
||||
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
||||
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
||||
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
||||
* buffers are full. The value of latency can be 0.
|
||||
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
||||
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
||||
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
||||
* @param[in] data_ready_callback client callback function that will be called when:
|
||||
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
||||
* within @p latency period.
|
||||
* OR
|
||||
* 2. When hsa_ven_amd_pcs_flush is called.
|
||||
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
||||
* is called.
|
||||
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
||||
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
||||
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
||||
* cannot handle the type requested.
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
||||
* @retval ::HSA_STATUS_ERROR Unexpected error
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
|
||||
size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
||||
void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates a PC Sampling session on @p agent. Assumes that the caller provides the
|
||||
* @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
|
||||
* on the @p agent.
|
||||
*
|
||||
* Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
|
||||
* PC sampling session that was previously created in the underlying driver.
|
||||
*
|
||||
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
||||
* parameters must be a legal configuration value, and match the parameters that we used to create
|
||||
* the underlying PC Sampling session in the underlying driver.
|
||||
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
||||
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
||||
* on what types of PC sampling they can perform concurrently.
|
||||
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
||||
* The session will be in a stopped/inactive state after this call
|
||||
*
|
||||
* @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
|
||||
* @param[in] agent target agent
|
||||
* @param[in] method method to use
|
||||
* @param[in] units sampling units
|
||||
* @param[in] interval sampling interval in @p units
|
||||
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
||||
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
||||
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
||||
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
||||
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
||||
* buffers are full. The value of latency can be 0.
|
||||
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
||||
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
||||
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
||||
* @param[in] data_ready_callback client callback function that will be called when:
|
||||
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
||||
* within @p latency period.
|
||||
* OR
|
||||
* 2. When hsa_ven_amd_pcs_flush is called.
|
||||
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
||||
* is called.
|
||||
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
||||
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
||||
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
||||
* cannot handle the type requested.
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
||||
* @retval ::HSA_STATUS_ERROR Unexpected error
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_create_from_id(
|
||||
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Free a PC Sampling session on @p agent
|
||||
*
|
||||
* Free all the resources allocated for a PC Sampling session on @p agent
|
||||
* Internal buffers for this session will be lost.
|
||||
* If the session was active, the session will be stopped before it is destroyed.
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
* @retval ::HSA_STATUS_ERROR unexpected error
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Start a PC Sampling session
|
||||
*
|
||||
* Activate a PC Sampling session that was previous created.
|
||||
* The session with be in a active state after this call
|
||||
* If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session started successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
* @retval ::HSA_STATUS_ERROR unexpected error
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Stop a PC Sampling session
|
||||
*
|
||||
* Stop a session that is currently active
|
||||
* After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
|
||||
* The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
|
||||
* buffers are not drained and the session is started again, the internal buffers will be available
|
||||
* on the next data_ready_callback.
|
||||
* If the session was already inactive, this will result in a no-op and will return
|
||||
* HSA_STATUS_SUCCESS
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session stopped successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Flush internal buffers for a PC Sampling session
|
||||
*
|
||||
* Drain internal buffers for a PC Sampling session. If internal buffers have available data,
|
||||
* this trigger a data_ready_callback.
|
||||
*
|
||||
* The function blocks until all PC samples associated with the @p pc_sampling session
|
||||
* generated prior to the function call have been communicated by invocations of
|
||||
* @p data_ready_callback having completed execution.
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session flushed successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
#define hsa_ven_amd_pc_sampling_1_00
|
||||
|
||||
/**
|
||||
* @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
|
||||
* ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
|
||||
hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
|
||||
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
||||
void* callback_data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval,
|
||||
size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
||||
void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
|
||||
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
} hsa_ven_amd_pc_sampling_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // end extern "C" block
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif /* HSA_VEN_AMD_PC_SAMPLING_H */
|
||||
Referência em uma Nova Issue
Bloquear um usuário