Add 'projects/rocr-runtime/libhsakmt/include/impl/' from commit 'c34ec1e52fcb52da248c00207ebe646197ea9d3e'
git-subtree-dir: projects/rocr-runtime/libhsakmt/include/impl git-subtree-mainline:55f7d39fa5git-subtree-split:c34ec1e52f
Dieser Commit ist enthalten in:
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,91 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// The following set of header files provides definitions for AMD GPU
|
||||
// Architecture:
|
||||
// - amd_hsa_common.h
|
||||
// - amd_hsa_elf.h
|
||||
// - amd_hsa_kernel_code.h
|
||||
// - amd_hsa_queue.h
|
||||
// - amd_hsa_signal.h
|
||||
//
|
||||
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
|
||||
// information.
|
||||
|
||||
#ifndef AMD_HSA_COMMON_H
|
||||
#define AMD_HSA_COMMON_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// Descriptive version of the HSA Application Binary Interface.
|
||||
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"
|
||||
|
||||
// Alignment attribute that specifies a minimum alignment (in bytes) for
|
||||
// variables of the specified type.
|
||||
#if defined(__GNUC__)
|
||||
# define __ALIGNED__(x) __attribute__((aligned(x)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define __ALIGNED__(x) __declspec(align(x))
|
||||
#elif defined(RC_INVOKED)
|
||||
# define __ALIGNED__(x)
|
||||
#else
|
||||
# error
|
||||
#endif
|
||||
|
||||
// Creates enumeration entries for packed types. Enumeration entries include
|
||||
// bit shift amount, bit width, and bit mask.
|
||||
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \
|
||||
name##_SHIFT = (shift), \
|
||||
name##_WIDTH = (width), \
|
||||
name = (((1 << (width)) - 1) << (shift)) \
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT) \
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask##_SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask##_SHIFT) & mask) \
|
||||
|
||||
#endif // AMD_HSA_COMMON_H
|
||||
@@ -0,0 +1,467 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Undefine the macro in case it is defined in the system elf.h.
|
||||
#undef EM_AMDGPU
|
||||
|
||||
#ifndef AMD_HSA_ELF_H
|
||||
#define AMD_HSA_ELF_H
|
||||
|
||||
// AMD GPU Specific ELF Header Enumeration Values.
|
||||
//
|
||||
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
|
||||
// code object V1 defintions which are not part of the LLVM header. Code object
|
||||
// V1 was only supported by the Finalizer which is now deprecated and removed.
|
||||
//
|
||||
// TODO: Deprecate and remove V1 support and replace this header with using the
|
||||
// LLVM header.
|
||||
namespace ELF {
|
||||
|
||||
// Machine architectures
|
||||
// See current registered ELF machine architectures at:
|
||||
// http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
|
||||
enum {
|
||||
EM_AMDGPU = 224, // AMD GPU architecture
|
||||
};
|
||||
|
||||
// OS ABI identification.
|
||||
enum {
|
||||
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
|
||||
};
|
||||
|
||||
// AMDGPU OS ABI Version identification.
|
||||
enum {
|
||||
// ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
|
||||
// was never defined for V1.
|
||||
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
|
||||
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
|
||||
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
|
||||
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
|
||||
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
|
||||
};
|
||||
|
||||
// AMDGPU specific e_flags.
|
||||
enum : unsigned {
|
||||
// Processor selection mask for EF_AMDGPU_MACH_* values.
|
||||
EF_AMDGPU_MACH = 0x0ff,
|
||||
|
||||
// Not specified processor.
|
||||
EF_AMDGPU_MACH_NONE = 0x000,
|
||||
|
||||
// AMDGCN-based processors.
|
||||
// clang-format off
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f,
|
||||
// clang-format on
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
|
||||
// Indicates if the trap handler is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
|
||||
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
|
||||
// Indicates if the "sramecc" target feature is enabled for all code
|
||||
// contained in the object.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
|
||||
|
||||
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
|
||||
// XNACK is not supported.
|
||||
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
|
||||
// XNACK is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
|
||||
// XNACK is off.
|
||||
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
|
||||
// XNACK is on.
|
||||
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
|
||||
|
||||
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
|
||||
//
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
|
||||
// SRAMECC is not supported.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
|
||||
// SRAMECC is any/default/unspecified.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
|
||||
// SRAMECC is off.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
|
||||
// SRAMECC is on.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
|
||||
|
||||
// Generic target versioning. This is contained in the list byte of EFLAGS.
|
||||
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
|
||||
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
|
||||
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
|
||||
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
|
||||
};
|
||||
|
||||
// ELF Relocation types for AMDGPU.
|
||||
enum : unsigned {
|
||||
R_AMDGPU_ABS32_LO = 1,
|
||||
R_AMDGPU_ABS32_HI = 2,
|
||||
R_AMDGPU_ABS64 = 3,
|
||||
R_AMDGPU_ABS32 = 6,
|
||||
R_AMDGPU_RELATIVE64 = 13,
|
||||
};
|
||||
|
||||
} // end namespace ELF
|
||||
|
||||
// ELF Section Header Flag Enumeration Values.
|
||||
#define SHF_AMDGPU_HSA_GLOBAL (0x00100000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_CODE (0x00400000 & SHF_MASKOS)
|
||||
#define SHF_AMDGPU_HSA_AGENT (0x00800000 & SHF_MASKOS)
|
||||
|
||||
//
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
|
||||
AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
|
||||
AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
|
||||
AMDGPU_HSA_SEGMENT_LAST,
|
||||
} amdgpu_hsa_elf_segment_t;
|
||||
|
||||
// ELF Program Header Type Enumeration Values.
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
|
||||
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
|
||||
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)
|
||||
|
||||
// ELF Symbol Type Enumeration Values.
|
||||
#define STT_AMDGPU_HSA_KERNEL (STT_LOOS + 0)
|
||||
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
|
||||
#define STT_AMDGPU_HSA_METADATA (STT_LOOS + 2)
|
||||
|
||||
// ELF Symbol Binding Enumeration Values.
|
||||
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)
|
||||
|
||||
// ELF Symbol Other Information Creation/Retrieval.
|
||||
#define ELF64_ST_AMDGPU_ALLOCATION(o) (((o) >> 2) & 0x3)
|
||||
#define ELF64_ST_AMDGPU_FLAGS(o) ((o) >> 4)
|
||||
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
|
||||
AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
|
||||
} amdgpu_hsa_symbol_allocation_t;
|
||||
|
||||
// ELF Symbol Allocation Enumeration Values.
|
||||
#define STA_AMDGPU_HSA_DEFAULT AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
|
||||
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
|
||||
#define STA_AMDGPU_HSA_GLOBAL_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
|
||||
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
|
||||
AMDGPU_HSA_SYMBOL_FLAG_LAST,
|
||||
} amdgpu_hsa_symbol_flag_t;
|
||||
|
||||
// ELF Symbol Flag Enumeration Values.
|
||||
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST
|
||||
|
||||
// Legacy/V1 AMD GPU Relocation Type Enumeration Values.
|
||||
#define R_AMDGPU_V1_NONE 0
|
||||
#define R_AMDGPU_V1_32_LOW 1
|
||||
#define R_AMDGPU_V1_32_HIGH 2
|
||||
#define R_AMDGPU_V1_64 3
|
||||
#define R_AMDGPU_V1_INIT_SAMPLER 4
|
||||
#define R_AMDGPU_V1_INIT_IMAGE 5
|
||||
#define R_AMDGPU_V1_RELATIVE64 13
|
||||
|
||||
// AMD GPU Note Type Enumeration Values.
|
||||
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
|
||||
#define NT_AMD_HSA_HSAIL 2
|
||||
#define NT_AMD_HSA_ISA_VERSION 3
|
||||
#define NT_AMD_HSA_PRODUCER 4
|
||||
#define NT_AMD_HSA_PRODUCER_OPTIONS 5
|
||||
#define NT_AMD_HSA_EXTENSION 6
|
||||
#define NT_AMD_HSA_ISA_NAME 11
|
||||
/* AMDGPU snapshots of runtime, agent and queues state for use in core dump */
|
||||
#define NT_AMDGPU_CORE_STATE 33
|
||||
#define NT_AMD_HSA_HLDEBUG_DEBUG 101
|
||||
#define NT_AMD_HSA_HLDEBUG_TARGET 102
|
||||
|
||||
// AMD GPU Metadata Kind Enumeration Values.
|
||||
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_METADATA_KIND_NONE = 0,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
|
||||
AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
|
||||
} amdgpu_hsa_metadata_kind_t;
|
||||
|
||||
// AMD GPU Sampler Coordinate Normalization Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
|
||||
AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
|
||||
} amdgpu_hsa_sampler_coord_t;
|
||||
|
||||
// AMD GPU Sampler Filter Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
|
||||
AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
|
||||
} amdgpu_hsa_sampler_filter_t;
|
||||
|
||||
// AMD GPU Sampler Addressing Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
|
||||
AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
|
||||
} amdgpu_hsa_sampler_addressing_t;
|
||||
|
||||
// AMD GPU Sampler Descriptor.
|
||||
typedef struct amdgpu_hsa_sampler_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_sampler_coord8_t coord;
|
||||
amdgpu_hsa_sampler_filter8_t filter;
|
||||
amdgpu_hsa_sampler_addressing8_t addressing;
|
||||
uint8_t reserved1;
|
||||
} amdgpu_hsa_sampler_descriptor_t;
|
||||
|
||||
// AMD GPU Image Geometry Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_geometry8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
|
||||
AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
|
||||
} amdgpu_hsa_image_geometry_t;
|
||||
|
||||
// AMD GPU Image Channel Order Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
|
||||
} amdgpu_hsa_image_channel_order_t;
|
||||
|
||||
// AMD GPU Image Channel Type Enumeration Values.
|
||||
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
|
||||
typedef enum {
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
|
||||
AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
|
||||
} amdgpu_hsa_image_channel_type_t;
|
||||
|
||||
// AMD GPU Image Descriptor.
|
||||
typedef struct amdgpu_hsa_image_descriptor_s {
|
||||
uint16_t size;
|
||||
amdgpu_hsa_metadata_kind16_t kind;
|
||||
amdgpu_hsa_image_geometry8_t geometry;
|
||||
amdgpu_hsa_image_channel_order8_t channel_order;
|
||||
amdgpu_hsa_image_channel_type8_t channel_type;
|
||||
uint8_t reserved1;
|
||||
uint64_t width;
|
||||
uint64_t height;
|
||||
uint64_t depth;
|
||||
uint64_t array;
|
||||
} amdgpu_hsa_image_descriptor_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_code_object_version_s {
|
||||
uint32_t major_version;
|
||||
uint32_t minor_version;
|
||||
} amdgpu_hsa_note_code_object_version_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_hsail_s {
|
||||
uint32_t hsail_major_version;
|
||||
uint32_t hsail_minor_version;
|
||||
uint8_t profile;
|
||||
uint8_t machine_model;
|
||||
uint8_t default_float_round;
|
||||
} amdgpu_hsa_note_hsail_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_isa_s {
|
||||
uint16_t vendor_name_size;
|
||||
uint16_t architecture_name_size;
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t stepping;
|
||||
char vendor_and_architecture_name[1];
|
||||
} amdgpu_hsa_note_isa_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_s {
|
||||
uint16_t producer_name_size;
|
||||
uint16_t reserved;
|
||||
uint32_t producer_major_version;
|
||||
uint32_t producer_minor_version;
|
||||
char producer_name[1];
|
||||
} amdgpu_hsa_note_producer_t;
|
||||
|
||||
typedef struct amdgpu_hsa_note_producer_options_s {
|
||||
uint16_t producer_options_size;
|
||||
char producer_options[1];
|
||||
} amdgpu_hsa_note_producer_options_t;
|
||||
|
||||
typedef enum {
|
||||
AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
|
||||
AMDGPU_HSA_RODATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_RODATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_DATA_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_DATA_READONLY_AGENT,
|
||||
AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
|
||||
AMDGPU_HSA_BSS_GLOBAL_AGENT,
|
||||
AMDGPU_HSA_BSS_READONLY_AGENT,
|
||||
AMDGPU_HSA_SECTION_LAST,
|
||||
} amdgpu_hsa_elf_section_t;
|
||||
|
||||
#endif // AMD_HSA_ELF_H
|
||||
@@ -0,0 +1,270 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_KERNEL_CODE_H
|
||||
#define AMD_HSA_KERNEL_CODE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Kernel Code Version Enumeration Values.
|
||||
typedef uint32_t amd_kernel_code_version32_t;
|
||||
enum amd_kernel_code_version_t {
|
||||
AMD_KERNEL_CODE_VERSION_MAJOR = 1,
|
||||
AMD_KERNEL_CODE_VERSION_MINOR = 1
|
||||
};
|
||||
|
||||
// AMD Machine Kind Enumeration Values.
|
||||
typedef uint16_t amd_machine_kind16_t;
|
||||
enum amd_machine_kind_t {
|
||||
AMD_MACHINE_KIND_UNDEFINED = 0,
|
||||
AMD_MACHINE_KIND_AMDGPU = 1
|
||||
};
|
||||
|
||||
// AMD Machine Version.
|
||||
typedef uint16_t amd_machine_version16_t;
|
||||
|
||||
// AMD Float Round Mode Enumeration Values.
|
||||
enum amd_float_round_mode_t {
|
||||
AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
|
||||
AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
|
||||
AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
|
||||
AMD_FLOAT_ROUND_MODE_ZERO = 3
|
||||
};
|
||||
|
||||
// AMD Float Denorm Mode Enumeration Values.
|
||||
enum amd_float_denorm_mode_t {
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
|
||||
AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
|
||||
AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register One.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
|
||||
enum amd_compute_pgm_rsrc_one_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
|
||||
};
|
||||
|
||||
// AMD System VGPR Workitem ID Enumeration Values.
|
||||
enum amd_system_vgpr_workitem_id_t {
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
|
||||
AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
|
||||
};
|
||||
|
||||
// AMD Compute Program Resource Register Two.
|
||||
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
|
||||
enum amd_compute_pgm_rsrc_two_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
|
||||
};
|
||||
|
||||
// AMD Element Byte Size Enumeration Values.
|
||||
enum amd_element_byte_size_t {
|
||||
AMD_ELEMENT_BYTE_SIZE_2 = 0,
|
||||
AMD_ELEMENT_BYTE_SIZE_4 = 1,
|
||||
AMD_ELEMENT_BYTE_SIZE_8 = 2,
|
||||
AMD_ELEMENT_BYTE_SIZE_16 = 3
|
||||
};
|
||||
|
||||
// AMD Kernel Code Properties.
|
||||
typedef uint32_t amd_kernel_code_properties32_t;
|
||||
enum amd_kernel_code_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
|
||||
};
|
||||
|
||||
// AMD Power Of Two Enumeration Values.
|
||||
typedef uint8_t amd_powertwo8_t;
|
||||
enum amd_powertwo_t {
|
||||
AMD_POWERTWO_1 = 0,
|
||||
AMD_POWERTWO_2 = 1,
|
||||
AMD_POWERTWO_4 = 2,
|
||||
AMD_POWERTWO_8 = 3,
|
||||
AMD_POWERTWO_16 = 4,
|
||||
AMD_POWERTWO_32 = 5,
|
||||
AMD_POWERTWO_64 = 6,
|
||||
AMD_POWERTWO_128 = 7,
|
||||
AMD_POWERTWO_256 = 8
|
||||
};
|
||||
|
||||
// AMD Enabled Control Directive Enumeration Values.
|
||||
typedef uint64_t amd_enabled_control_directive64_t;
|
||||
enum amd_enabled_control_directive_t {
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
|
||||
AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
|
||||
};
|
||||
|
||||
// AMD Exception Kind Enumeration Values.
|
||||
typedef uint16_t amd_exception_kind16_t;
|
||||
enum amd_exception_kind_t {
|
||||
AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
|
||||
AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
|
||||
AMD_EXCEPTION_KIND_OVERFLOW = 4,
|
||||
AMD_EXCEPTION_KIND_UNDERFLOW = 8,
|
||||
AMD_EXCEPTION_KIND_INEXACT = 16
|
||||
};
|
||||
|
||||
// AMD Control Directives.
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
|
||||
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
|
||||
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
|
||||
amd_enabled_control_directive64_t enabled_control_directives;
|
||||
uint16_t enable_break_exceptions;
|
||||
uint16_t enable_detect_exceptions;
|
||||
uint32_t max_dynamic_group_size;
|
||||
uint64_t max_flat_grid_size;
|
||||
uint32_t max_flat_workgroup_size;
|
||||
uint8_t required_dim;
|
||||
uint8_t reserved1[3];
|
||||
uint64_t required_grid_size[3];
|
||||
uint32_t required_workgroup_size[3];
|
||||
uint8_t reserved2[60];
|
||||
} amd_control_directives_t;
|
||||
|
||||
// AMD Kernel Code.
|
||||
#define AMD_ISA_ALIGN_BYTES 256
|
||||
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
|
||||
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
|
||||
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_major;
|
||||
amd_kernel_code_version32_t amd_kernel_code_version_minor;
|
||||
amd_machine_kind16_t amd_machine_kind;
|
||||
amd_machine_version16_t amd_machine_version_major;
|
||||
amd_machine_version16_t amd_machine_version_minor;
|
||||
amd_machine_version16_t amd_machine_version_stepping;
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
|
||||
amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
|
||||
amd_kernel_code_properties32_t kernel_code_properties;
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
uint32_t gds_segment_byte_size;
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
uint16_t wavefront_sgpr_count;
|
||||
uint16_t workitem_vgpr_count;
|
||||
uint16_t reserved_vgpr_first;
|
||||
uint16_t reserved_vgpr_count;
|
||||
uint16_t reserved_sgpr_first;
|
||||
uint16_t reserved_sgpr_count;
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
amd_powertwo8_t kernarg_segment_alignment;
|
||||
amd_powertwo8_t group_segment_alignment;
|
||||
amd_powertwo8_t private_segment_alignment;
|
||||
amd_powertwo8_t wavefront_size;
|
||||
int32_t call_convention;
|
||||
uint8_t reserved1[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
amd_control_directives_t control_directives;
|
||||
} amd_kernel_code_t;
|
||||
|
||||
// TODO: this struct should be completely gone once debugger designs/implements
|
||||
// Debugger APIs.
|
||||
typedef struct amd_runtime_loader_debug_info_s {
|
||||
const void* elf_raw;
|
||||
size_t elf_size;
|
||||
const char *kernel_name;
|
||||
const void *owning_segment;
|
||||
} amd_runtime_loader_debug_info_t;
|
||||
|
||||
#endif // AMD_HSA_KERNEL_CODE_H
|
||||
@@ -0,0 +1,154 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_QUEUE_H
|
||||
#define AMD_HSA_QUEUE_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "hsa.h"
|
||||
|
||||
// AMD Queue Properties.
|
||||
typedef uint32_t amd_queue_properties32_t;
|
||||
enum amd_queue_properties_t {
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
|
||||
};
|
||||
|
||||
// AMD Queue.
|
||||
#define AMD_QUEUE_ALIGN_BYTES 64
|
||||
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)
|
||||
|
||||
// AMD Queue Capabilities.
|
||||
typedef uint32_t amd_queue_capabilities32_t;
|
||||
enum amd_queue_capabilities_t {
|
||||
/* This version of CP FW supports dual-scratch and async-reclaim */
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM, 0, 1),
|
||||
|
||||
/*
|
||||
* This version of ROCr supports async-reclaim and CP FW may access the
|
||||
* V2 fields.
|
||||
*/
|
||||
AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM, 1, 1),
|
||||
};
|
||||
|
||||
/* This is the original amd_queue_t structure. The definition is only kept
|
||||
* for reference purposes. This structure should not be used. */
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t caps;
|
||||
uint32_t reserved1[3];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint32_t reserved3[2];
|
||||
uint32_t scratch_wave64_lane_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
uint32_t reserved4[2];
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
uint32_t reserved5[14];
|
||||
} amd_queue_t;
|
||||
|
||||
/*
|
||||
* AMD_QUEUE Version 2
|
||||
* amd_queue_v2_t is backwards compatible with amd_queue_t structure and can
|
||||
* be used with previous versions of CP FW. The added fields tagged as V2 are
|
||||
* ignored when running previous versions of CP FW.
|
||||
* CP FW will not try to access elements beyond the original 64-bytes
|
||||
* (sizeof(amd_queue_t)) unless the AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM bit is set.
|
||||
*/
|
||||
|
||||
#define MAX_NUM_XCC 128
|
||||
typedef struct scratch_last_used_index_xcc_s {
|
||||
volatile uint64_t main;
|
||||
volatile uint64_t alt;
|
||||
} scratch_last_used_index_xcc_t;
|
||||
|
||||
typedef struct AMD_QUEUE_ALIGN amd_queue_v2_s {
|
||||
hsa_queue_t hsa_queue;
|
||||
uint32_t caps;
|
||||
uint32_t reserved1[3];
|
||||
volatile uint64_t write_dispatch_id;
|
||||
uint32_t group_segment_aperture_base_hi;
|
||||
uint32_t private_segment_aperture_base_hi;
|
||||
uint32_t max_cu_id;
|
||||
uint32_t max_wave_id;
|
||||
volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
|
||||
volatile uint32_t legacy_doorbell_lock;
|
||||
uint32_t reserved2[9];
|
||||
volatile uint64_t read_dispatch_id;
|
||||
uint32_t read_dispatch_id_field_base_byte_offset;
|
||||
uint32_t compute_tmpring_size;
|
||||
uint32_t scratch_resource_descriptor[4];
|
||||
uint64_t scratch_backing_memory_location;
|
||||
uint64_t scratch_backing_memory_byte_size;
|
||||
uint32_t scratch_wave64_lane_byte_size;
|
||||
amd_queue_properties32_t queue_properties;
|
||||
volatile uint64_t scratch_max_use_index; /* V2 */
|
||||
hsa_signal_t queue_inactive_signal;
|
||||
volatile uint64_t alt_scratch_max_use_index; /* V2 */
|
||||
uint32_t alt_scratch_resource_descriptor[4]; /* V2 */
|
||||
uint64_t alt_scratch_backing_memory_location; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_x; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_y; /* V2 */
|
||||
uint32_t alt_scratch_dispatch_limit_z; /* V2 */
|
||||
uint32_t alt_scratch_wave64_lane_byte_size; /* V2 */
|
||||
uint32_t alt_compute_tmpring_size; /* V2 */
|
||||
uint32_t reserved5;
|
||||
|
||||
scratch_last_used_index_xcc_t scratch_last_used_index[MAX_NUM_XCC];
|
||||
} amd_queue_v2_t;
|
||||
|
||||
#endif // AMD_HSA_QUEUE_H
|
||||
@@ -0,0 +1,79 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef AMD_HSA_SIGNAL_H
|
||||
#define AMD_HSA_SIGNAL_H
|
||||
|
||||
#include "amd_hsa_common.h"
|
||||
#include "amd_hsa_queue.h"
|
||||
|
||||
// AMD Signal Kind Enumeration Values.
|
||||
typedef int64_t amd_signal_kind64_t;
|
||||
enum amd_signal_kind_t {
|
||||
AMD_SIGNAL_KIND_INVALID = 0,
|
||||
AMD_SIGNAL_KIND_USER = 1,
|
||||
AMD_SIGNAL_KIND_DOORBELL = -1,
|
||||
AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
|
||||
};
|
||||
|
||||
// AMD Signal.
|
||||
#define AMD_SIGNAL_ALIGN_BYTES 64
|
||||
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
|
||||
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
|
||||
amd_signal_kind64_t kind;
|
||||
union {
|
||||
volatile int64_t value;
|
||||
volatile uint64_t* hardware_doorbell_ptr;
|
||||
};
|
||||
uint64_t event_mailbox_ptr;
|
||||
uint32_t event_id;
|
||||
uint32_t reserved1;
|
||||
uint64_t start_ts;
|
||||
uint64_t end_ts;
|
||||
union {
|
||||
amd_queue_v2_t* queue_ptr;
|
||||
uint64_t reserved2;
|
||||
};
|
||||
uint32_t reserved3[2];
|
||||
} amd_signal_t;
|
||||
|
||||
#endif // AMD_HSA_SIGNAL_H
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef HSA_RUNTIME_AMD_TOOL_EVENTS_H_
|
||||
#define HSA_RUNTIME_AMD_TOOL_EVENTS_H_
|
||||
|
||||
// Insert license header
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "hsa.h"
|
||||
|
||||
|
||||
typedef enum {
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE = 0,
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE =
|
||||
(1 << 0), // This scratch allocation is only valid for 1 dispatch.
|
||||
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT =
|
||||
(1 << 1), // Used alternate scratch instead of main scratch
|
||||
} hsa_amd_event_scratch_alloc_flag_t;
|
||||
|
||||
typedef enum {
|
||||
HSA_AMD_TOOL_EVENT_MIN = 0,
|
||||
|
||||
// Scratch memory tracking
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
|
||||
HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,
|
||||
|
||||
// Add new events above ^
|
||||
HSA_AMD_TOOL_EVENT_MAX
|
||||
} hsa_amd_tool_event_kind_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
} hsa_amd_tool_event_none_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
|
||||
} hsa_amd_event_scratch_alloc_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
uint64_t dispatch_id; // Dispatch ID of the AQL packet that needs more scratch memory
|
||||
size_t size; // Amount of scratch allocated - in bytes
|
||||
size_t num_slots; // limit of number of waves
|
||||
} hsa_amd_event_scratch_alloc_end_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_free_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_free_end_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_async_reclaim_start_t;
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_tool_event_kind_t kind;
|
||||
const hsa_queue_t* queue;
|
||||
hsa_amd_event_scratch_alloc_flag_t flags;
|
||||
} hsa_amd_event_scratch_async_reclaim_end_t;
|
||||
|
||||
typedef union {
|
||||
const hsa_amd_tool_event_none_t* none;
|
||||
const hsa_amd_event_scratch_alloc_start_t* scratch_alloc_start;
|
||||
const hsa_amd_event_scratch_alloc_end_t* scratch_alloc_end;
|
||||
const hsa_amd_event_scratch_free_start_t* scratch_free_start;
|
||||
const hsa_amd_event_scratch_free_end_t* scratch_free_end;
|
||||
const hsa_amd_event_scratch_async_reclaim_start_t* scratch_async_reclaim_start;
|
||||
const hsa_amd_event_scratch_async_reclaim_end_t* scratch_async_reclaim_end;
|
||||
} hsa_amd_tool_event_t;
|
||||
|
||||
typedef hsa_status_t (*hsa_amd_tool_event)(hsa_amd_tool_event_t);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,587 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
#define HSA_RUNTIME_INC_HSA_API_TRACE_H
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_api_trace_version.h"
|
||||
#ifdef AMD_INTERNAL_BUILD
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "hsa_amd_tool.h"
|
||||
#include "hsa_ven_amd_pc_sampling.h"
|
||||
#else
|
||||
#include "inc/hsa_ext_image.h"
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
#include "inc/hsa_ext_finalize.h"
|
||||
#include "inc/hsa_amd_tool.h"
|
||||
#include "inc/hsa_ven_amd_pc_sampling.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h
|
||||
|
||||
// Min function used to copy Api Tables
|
||||
static inline uint32_t Min(const uint32_t a, const uint32_t b) {
|
||||
return (a > b) ? b : a;
|
||||
}
|
||||
|
||||
// Declarations of APIs intended for use only by tools.
|
||||
|
||||
// An AQL packet that can be put in an intercept queue to cause a callback to
|
||||
// be invoked when the packet is about to be submitted to the underlying
|
||||
// hardware queue. These packets are not copied to the underlying hardware
|
||||
// queue. These packets should come immediately before the regular AQL packet
|
||||
// they relate to. This implies that packet rewriters should always keep these
|
||||
// packets adjacent to the regular AQL packet that follows them.
|
||||
const uint32_t AMD_AQL_FORMAT_INTERCEPT_MARKER = 0xFE;
|
||||
|
||||
struct amd_aql_intercept_marker_s;
|
||||
|
||||
// When an intercept queue is processing rewritten packets to put them on the
|
||||
// underlying hardware queue, if it encounters a
|
||||
// AMD_AQL_FORMAT_INTERCEPT_MARKER vendor AQL packet it will call the following
|
||||
// handler. packet points to the packet, queue is the underlying hardware
|
||||
// queue, and packet_id is the packet id of the next packet to be put on the
|
||||
// underlying hardware queue. The intercept queue does not put these packets
|
||||
// onto the underlying hardware queue.
|
||||
typedef void (*amd_intercept_marker_handler)(const struct amd_aql_intercept_marker_s* packet,
|
||||
hsa_queue_t* queue, uint64_t packet_id);
|
||||
// An AQL vendor packet used by the intercept queue to mark the following
|
||||
// packet. The callback will be invoked to allow a tool to know where in the
|
||||
// underlying hardware queue the following packet will be placed. user_data can
|
||||
// be used to hold any data useful to the tool.
|
||||
typedef struct amd_aql_intercept_marker_s {
|
||||
uint16_t header; // Must have a packet type of HSA_PACKET_TYPE_VENDOR_SPECIFIC.
|
||||
uint8_t format; // Must be AMD_AQL_FORMAT_INTERCEPT_MARKER.
|
||||
uint8_t reserved[5]; // Must be 0.
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
amd_intercept_marker_handler callback;
|
||||
#elif defined HSA_LITTLE_ENDIAN
|
||||
amd_intercept_marker_handler callback;
|
||||
uint32_t reserved1; // Must be 0.
|
||||
#else
|
||||
uint32_t reserved1; // Must be 0.
|
||||
amd_intercept_marker_handler callback;
|
||||
#endif
|
||||
uint64_t user_data[6];
|
||||
} amd_aql_intercept_marker_t;
|
||||
|
||||
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
|
||||
typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
|
||||
uint64_t user_pkt_index, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer);
|
||||
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
|
||||
hsa_amd_queue_intercept_handler callback,
|
||||
void* user_data);
|
||||
hsa_status_t hsa_amd_queue_intercept_create(
|
||||
hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
||||
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
|
||||
|
||||
typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
|
||||
void* data);
|
||||
hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
|
||||
void* user_data);
|
||||
|
||||
// Structure of Version used to identify an instance of Api table
|
||||
// Must be the first member (offsetof == 0) of all API tables.
|
||||
// This is the root of the table passing ABI.
|
||||
struct ApiTableVersion {
|
||||
uint32_t major_id;
|
||||
uint32_t minor_id;
|
||||
uint32_t step_id;
|
||||
uint32_t reserved;
|
||||
};
|
||||
|
||||
struct ToolsApiTable {
|
||||
ApiTableVersion version;
|
||||
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_end_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_free_end_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_start_fn;
|
||||
hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_end_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Finalizer Extension Apis
|
||||
struct FinalizerExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
|
||||
decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
|
||||
decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
|
||||
decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
|
||||
decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
|
||||
decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Image Extension Apis
|
||||
struct ImageExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
|
||||
decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
|
||||
decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
|
||||
decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
|
||||
decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
|
||||
decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
|
||||
decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
|
||||
decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
|
||||
decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
|
||||
decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
|
||||
decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
|
||||
decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
|
||||
decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
|
||||
decltype(hsa_ext_sampler_create_v2)* hsa_ext_sampler_create_v2_fn;
|
||||
|
||||
};
|
||||
|
||||
// Table to export HSA PC Sampling Extension Apis
|
||||
struct PcSamplingExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
|
||||
decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
|
||||
decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
|
||||
decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
|
||||
decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
|
||||
decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
|
||||
decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
|
||||
};
|
||||
|
||||
|
||||
// Table to export AMD Extension Apis
|
||||
struct AmdExtTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
|
||||
decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
|
||||
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
|
||||
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
|
||||
decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
|
||||
decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
|
||||
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
|
||||
decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
|
||||
decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
|
||||
decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
|
||||
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
|
||||
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
|
||||
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
|
||||
decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_on_engine)* hsa_amd_memory_async_copy_on_engine_fn;
|
||||
decltype(hsa_amd_memory_copy_engine_status)* hsa_amd_memory_copy_engine_status_fn;
|
||||
decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
|
||||
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
|
||||
decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
|
||||
decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
|
||||
decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
|
||||
decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
|
||||
decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
|
||||
decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
|
||||
decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
|
||||
decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
|
||||
decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
|
||||
decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
|
||||
decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
|
||||
decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
|
||||
decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
|
||||
decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
|
||||
decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
|
||||
decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
|
||||
decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
|
||||
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
|
||||
decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
|
||||
decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
|
||||
decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
|
||||
decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
|
||||
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
|
||||
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
|
||||
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
|
||||
decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
|
||||
decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
|
||||
decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
|
||||
decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
|
||||
decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
|
||||
decltype(hsa_amd_vmem_address_reserve)* hsa_amd_vmem_address_reserve_fn;
|
||||
decltype(hsa_amd_vmem_address_free)* hsa_amd_vmem_address_free_fn;
|
||||
decltype(hsa_amd_vmem_handle_create)* hsa_amd_vmem_handle_create_fn;
|
||||
decltype(hsa_amd_vmem_handle_release)* hsa_amd_vmem_handle_release_fn;
|
||||
decltype(hsa_amd_vmem_map)* hsa_amd_vmem_map_fn;
|
||||
decltype(hsa_amd_vmem_unmap)* hsa_amd_vmem_unmap_fn;
|
||||
decltype(hsa_amd_vmem_set_access)* hsa_amd_vmem_set_access_fn;
|
||||
decltype(hsa_amd_vmem_get_access)* hsa_amd_vmem_get_access_fn;
|
||||
decltype(hsa_amd_vmem_export_shareable_handle)* hsa_amd_vmem_export_shareable_handle_fn;
|
||||
decltype(hsa_amd_vmem_import_shareable_handle)* hsa_amd_vmem_import_shareable_handle_fn;
|
||||
decltype(hsa_amd_vmem_retain_alloc_handle)* hsa_amd_vmem_retain_alloc_handle_fn;
|
||||
decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
|
||||
hsa_amd_vmem_get_alloc_properties_from_handle_fn;
|
||||
decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
|
||||
decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
|
||||
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
|
||||
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
|
||||
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
|
||||
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
|
||||
decltype(hsa_amd_ais_file_write)* hsa_amd_ais_file_write_fn;
|
||||
decltype(hsa_amd_ais_file_read)* hsa_amd_ais_file_read_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
struct CoreApiTable {
|
||||
ApiTableVersion version;
|
||||
decltype(hsa_init)* hsa_init_fn;
|
||||
decltype(hsa_shut_down)* hsa_shut_down_fn;
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info_fn;
|
||||
decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
|
||||
decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
|
||||
decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
|
||||
decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
|
||||
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
|
||||
decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
|
||||
decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
|
||||
decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
|
||||
decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
|
||||
decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
|
||||
decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
|
||||
decltype(hsa_region_get_info)* hsa_region_get_info_fn;
|
||||
decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
|
||||
decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
|
||||
decltype(hsa_memory_register)* hsa_memory_register_fn;
|
||||
decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
|
||||
decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
|
||||
decltype(hsa_memory_free)* hsa_memory_free_fn;
|
||||
decltype(hsa_memory_copy)* hsa_memory_copy_fn;
|
||||
decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
|
||||
decltype(hsa_signal_create)* hsa_signal_create_fn;
|
||||
decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
|
||||
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
|
||||
decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
|
||||
decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
|
||||
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
|
||||
decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
|
||||
decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
|
||||
decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
|
||||
decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
|
||||
decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
|
||||
decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
|
||||
decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
|
||||
decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
|
||||
decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
|
||||
decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
|
||||
decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
|
||||
decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
|
||||
decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
|
||||
decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
|
||||
decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
|
||||
decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
|
||||
decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
|
||||
decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
|
||||
decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
|
||||
decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
|
||||
decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
|
||||
decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
|
||||
decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
|
||||
decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
|
||||
decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
|
||||
decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
|
||||
decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
|
||||
decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;
|
||||
|
||||
//===--- Instruction Set Architecture -----------------------------------===//
|
||||
|
||||
decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;
|
||||
|
||||
//===--- Code Objects (deprecated) --------------------------------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;
|
||||
|
||||
//===--- Executable -----------------------------------------------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_create)* hsa_executable_create_fn;
|
||||
decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
|
||||
decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
|
||||
decltype(hsa_executable_global_variable_define)*
|
||||
hsa_executable_global_variable_define_fn;
|
||||
decltype(hsa_executable_agent_global_variable_define)*
|
||||
hsa_executable_agent_global_variable_define_fn;
|
||||
decltype(hsa_executable_readonly_variable_define)*
|
||||
hsa_executable_readonly_variable_define_fn;
|
||||
decltype(hsa_executable_validate)* hsa_executable_validate_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;
|
||||
|
||||
//===--- Runtime Notifications ------------------------------------------===//
|
||||
|
||||
decltype(hsa_status_string)* hsa_status_string_fn;
|
||||
|
||||
// Start HSA v1.1 additions
|
||||
decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
|
||||
decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
|
||||
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
|
||||
decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
|
||||
decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
|
||||
decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
|
||||
decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
|
||||
decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
|
||||
decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
|
||||
decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
|
||||
decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
|
||||
decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;
|
||||
|
||||
//===--- Instruction Set Architecture - HSA v1.1 additions --------------===//
|
||||
|
||||
decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
|
||||
decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
|
||||
decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
|
||||
decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
|
||||
decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
|
||||
decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;
|
||||
|
||||
//===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//
|
||||
|
||||
// Deprecated since v1.1.
|
||||
decltype(hsa_code_object_get_symbol_from_name)*
|
||||
hsa_code_object_get_symbol_from_name_fn;
|
||||
|
||||
//===--- Executable - HSA v1.1 additions --------------------------------===//
|
||||
|
||||
decltype(hsa_code_object_reader_create_from_file)*
|
||||
hsa_code_object_reader_create_from_file_fn;
|
||||
decltype(hsa_code_object_reader_create_from_memory)*
|
||||
hsa_code_object_reader_create_from_memory_fn;
|
||||
decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
|
||||
decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
|
||||
decltype(hsa_executable_load_program_code_object)*
|
||||
hsa_executable_load_program_code_object_fn;
|
||||
decltype(hsa_executable_load_agent_code_object)*
|
||||
hsa_executable_load_agent_code_object_fn;
|
||||
decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
|
||||
decltype(hsa_executable_get_symbol_by_name)*
|
||||
hsa_executable_get_symbol_by_name_fn;
|
||||
decltype(hsa_executable_iterate_agent_symbols)*
|
||||
hsa_executable_iterate_agent_symbols_fn;
|
||||
decltype(hsa_executable_iterate_program_symbols)*
|
||||
hsa_executable_iterate_program_symbols_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Apis from Core Runtime, Amd Extensions
|
||||
// Finalizer and Images
|
||||
struct HsaApiTable {
|
||||
|
||||
// Version of Hsa Api Table
|
||||
ApiTableVersion version;
|
||||
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable* core_;
|
||||
|
||||
// Table of function pointers to AMD extensions
|
||||
AmdExtTable* amd_ext_;
|
||||
|
||||
// Table of function pointers to HSA Finalizer Extension
|
||||
FinalizerExtTable* finalizer_ext_;
|
||||
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable* image_ext_;
|
||||
|
||||
// Table of function pointers for tools to use
|
||||
ToolsApiTable* tools_;
|
||||
|
||||
// Table of function pointers to AMD PC Sampling Extension
|
||||
PcSamplingExtTable* pc_sampling_ext_;
|
||||
};
|
||||
|
||||
// Structure containing instances of different api tables
|
||||
struct HsaApiTableContainer {
|
||||
HsaApiTable root;
|
||||
CoreApiTable core;
|
||||
AmdExtTable amd_ext;
|
||||
FinalizerExtTable finalizer_ext;
|
||||
ImageExtTable image_ext;
|
||||
ToolsApiTable tools;
|
||||
PcSamplingExtTable pc_sampling_ext;
|
||||
|
||||
// Default initialization of a container instance
|
||||
HsaApiTableContainer() {
|
||||
root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
|
||||
root.version.minor_id = sizeof(HsaApiTable);
|
||||
root.version.step_id = HSA_API_TABLE_STEP_VERSION;
|
||||
|
||||
core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
|
||||
core.version.minor_id = sizeof(CoreApiTable);
|
||||
core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
|
||||
root.core_ = &core;
|
||||
|
||||
amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
|
||||
amd_ext.version.minor_id = sizeof(AmdExtTable);
|
||||
amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
|
||||
root.amd_ext_ = &amd_ext;
|
||||
|
||||
finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
|
||||
finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
|
||||
finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
|
||||
root.finalizer_ext_ = &finalizer_ext;
|
||||
|
||||
image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
|
||||
image_ext.version.minor_id = sizeof(ImageExtTable);
|
||||
image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
|
||||
root.image_ext_ = &image_ext;
|
||||
|
||||
tools.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
|
||||
tools.version.minor_id = sizeof(ToolsApiTable);
|
||||
tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
|
||||
root.tools_ = &tools;
|
||||
|
||||
pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
|
||||
pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
|
||||
pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
|
||||
root.pc_sampling_ext_ = &pc_sampling_ext;
|
||||
}
|
||||
};
|
||||
|
||||
// Api to copy function pointers of a table
|
||||
static
|
||||
void inline copyApi(void* src, void* dest, size_t size) {
|
||||
assert(size >= sizeof(ApiTableVersion));
|
||||
memcpy((char*)src + sizeof(ApiTableVersion),
|
||||
(char*)dest + sizeof(ApiTableVersion),
|
||||
(size - sizeof(ApiTableVersion)));
|
||||
}
|
||||
|
||||
// Copy Api child tables if valid.
|
||||
static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
|
||||
if (src->major_id && (dest->major_id == src->major_id)) {
|
||||
dest->step_id = src->step_id;
|
||||
dest->minor_id = Min(dest->minor_id, src->minor_id);
|
||||
copyApi(dest, src, dest->minor_id);
|
||||
} else {
|
||||
dest->major_id = 0;
|
||||
dest->minor_id = 0;
|
||||
dest->step_id = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy constructor for all Api tables. The function assumes the
|
||||
// user has initialized an instance of tables container correctly
|
||||
// for the Major, Minor and Stepping Ids of Root and Child Api tables.
|
||||
// The function will overwrite the value of Minor Id by taking the
|
||||
// minimum of source and destination parameters. It will also overwrite
|
||||
// the stepping Id with value from source parameter.
|
||||
static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
|
||||
// Verify Major Id of source and destination tables match
|
||||
if (dest->version.major_id != src->version.major_id) {
|
||||
dest->version.major_id = 0;
|
||||
dest->version.minor_id = 0;
|
||||
dest->version.step_id = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize the stepping id and minor id of root table. For the
|
||||
// minor id which encodes struct size, take the minimum of source
|
||||
// and destination parameters
|
||||
dest->version.step_id = src->version.step_id;
|
||||
dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);
|
||||
|
||||
// Copy child tables if present
|
||||
if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
|
||||
copyElement(&dest->core_->version, &src->core_->version);
|
||||
if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
|
||||
if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
|
||||
if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->image_ext_->version, &src->image_ext_->version);
|
||||
if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
|
||||
copyElement(&dest->tools_->version, &src->tools_->version);
|
||||
if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
|
||||
copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,70 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
#define HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
|
||||
// CODE IN THIS FILE **MUST** BE C-COMPATIBLE
|
||||
|
||||
// Major Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_MAJOR_VERSION 0x03
|
||||
#define HSA_CORE_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION 0x02
|
||||
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION 0x01
|
||||
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION 0x01
|
||||
#define HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION 0x01
|
||||
|
||||
// Step Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_STEP_VERSION 0x01
|
||||
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x08
|
||||
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
|
||||
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
|
||||
#define HSA_IMAGE_EXT_API_TABLE_STEP_VERSION HSA_IMAGE_API_TABLE_STEP_VERSION
|
||||
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_TOOLS_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_PC_SAMPLING_API_TABLE_STEP_VERSION 0x00
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,531 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#undef HSA_API
|
||||
#ifdef HSA_EXPORT_FINALIZER
|
||||
#define HSA_API HSA_API_EXPORT
|
||||
#else
|
||||
#define HSA_API HSA_API_IMPORT
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct BrigModuleHeader;
|
||||
typedef struct BrigModuleHeader* BrigModule_t;
|
||||
|
||||
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
||||
*/
|
||||
enum {
|
||||
/**
|
||||
* The HSAIL program is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
|
||||
/**
|
||||
* The HSAIL module is invalid.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
|
||||
/**
|
||||
* Machine model or profile of the HSAIL module do not match the machine model
|
||||
* or profile of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
|
||||
/**
|
||||
* The HSAIL module is already a part of the HSAIL program.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
|
||||
/**
|
||||
* Compatibility mismatch between symbol declaration and symbol definition.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
|
||||
/**
|
||||
* The finalization encountered an error while finalizing a kernel or
|
||||
* indirect function.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
|
||||
/**
|
||||
* Mismatch between a directive in the control directive structure and in
|
||||
* the HSAIL kernel.
|
||||
*/
|
||||
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup ext-alt-finalizer-program Finalization Program
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
|
||||
* the definition of the BrigModule_t type.
|
||||
*/
|
||||
typedef BrigModule_t hsa_ext_module_t;
|
||||
|
||||
/**
|
||||
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
|
||||
* modules that collectively define functions and variables used by kernels and
|
||||
* indirect functions.
|
||||
*/
|
||||
typedef struct hsa_ext_program_s {
|
||||
/**
|
||||
* Opaque handle.
|
||||
*/
|
||||
uint64_t handle;
|
||||
} hsa_ext_program_t;
|
||||
|
||||
/**
|
||||
* @brief Create an empty HSAIL program.
|
||||
*
|
||||
* @param[in] machine_model Machine model used in the HSAIL program.
|
||||
*
|
||||
* @param[in] profile Profile used in the HSAIL program.
|
||||
*
|
||||
* @param[in] default_float_rounding_mode Default float rounding mode used in
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[out] program Memory location where the HSA runtime stores the newly
|
||||
* created HSAIL program handle.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
|
||||
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
|
||||
* @p program is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_create(
|
||||
hsa_machine_model_t machine_model,
|
||||
hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options,
|
||||
hsa_ext_program_t *program);
|
||||
|
||||
/**
|
||||
* @brief Destroy a HSAIL program.
|
||||
*
|
||||
* @details The HSAIL program handle becomes invalid after it has been
|
||||
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
|
||||
* still valid after the HSAIL program has been destroyed, and can be used as
|
||||
* intended. Resources allocated outside and associated with the HSAIL program
|
||||
* (such as HSAIL modules that are added to the HSAIL program) can be released
|
||||
* after the finalization program has been destroyed.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_destroy(
|
||||
hsa_ext_program_t program);
|
||||
|
||||
/**
|
||||
* @brief Add a HSAIL module to an existing HSAIL program.
|
||||
*
|
||||
* @details The HSA runtime does not perform a deep copy of the HSAIL module
|
||||
* upon addition. Instead, it stores a pointer to the HSAIL module. The
|
||||
* ownership of the HSAIL module belongs to the application, which must ensure
|
||||
* that @p module is not released before destroying the HSAIL program.
|
||||
*
|
||||
* The HSAIL module is successfully added to the HSAIL program if @p module is
|
||||
* valid, if all the declarations and definitions for the same symbol are
|
||||
* compatible, and if @p module specify machine model and profile that matches
|
||||
* the HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] module HSAIL module. The application can add the same HSAIL module
|
||||
* to @p program at most once. The HSAIL module must specify the same machine
|
||||
* model and profile as @p program. If the floating-mode rounding mode of @p
|
||||
* module is not default, then it should match that of @p program.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
|
||||
* module does not match machine model of @p program, or the profile of @p
|
||||
* module does not match profile of @p program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
|
||||
* already a part of the HSAIL program.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
|
||||
* definition compatibility mismatch. See the symbol compatibility rules in the
|
||||
* HSA Programming Reference Manual.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_add_module(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
/**
|
||||
* @brief Iterate over the HSAIL modules in a program, and invoke an
|
||||
* application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per HSAIL module in the
|
||||
* program. The HSA runtime passes three arguments to the callback: the program,
|
||||
* a HSAIL module, and the application data. If @p callback returns a status
|
||||
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
|
||||
* stops and ::hsa_ext_program_iterate_modules returns that status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
||||
void* data),
|
||||
void* data);
|
||||
|
||||
/**
|
||||
* @brief HSAIL program attributes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Machine model specified when the HSAIL program was created. The type
|
||||
* of this attribute is ::hsa_machine_model_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
|
||||
/**
|
||||
* Profile specified when the HSAIL program was created. The type of
|
||||
* this attribute is ::hsa_profile_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
|
||||
/**
|
||||
* Default float rounding mode specified when the HSAIL program was
|
||||
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
|
||||
*/
|
||||
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
|
||||
} hsa_ext_program_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given HSAIL program.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behaviour is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* HSAIL program attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_get_info(
|
||||
hsa_ext_program_t program,
|
||||
hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
/**
|
||||
* @brief Finalizer-determined call convention.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Finalizer-determined call convention.
|
||||
*/
|
||||
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
|
||||
} hsa_ext_finalizer_call_convention_t;
|
||||
|
||||
/**
|
||||
* @brief Control directives specify low-level information about the
|
||||
* finalization process.
|
||||
*/
|
||||
typedef struct hsa_ext_control_directives_s {
|
||||
/**
|
||||
* Bitset indicating which control directives are enabled. The bit assigned to
|
||||
* a control directive is determined by the corresponding value in
|
||||
* BrigControlDirective.
|
||||
*
|
||||
* If a control directive is disabled, its corresponding field value (if any)
|
||||
* must be 0. Control directives that are only present or absent (such as
|
||||
* partial workgroups) have no corresponding field as the presence of the bit
|
||||
* in this mask is sufficient.
|
||||
*/
|
||||
uint64_t control_directives_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
|
||||
* assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t break_exceptions_mask;
|
||||
/**
|
||||
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
|
||||
* bit assigned to an HSAIL exception is determined by the corresponding value
|
||||
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
|
||||
* control directive, the finalizer uses the union of the two masks.
|
||||
*/
|
||||
uint16_t detect_exceptions_mask;
|
||||
/**
|
||||
* Maximum size (in bytes) of dynamic group memory that will be allocated by
|
||||
* the application for any dispatch of the kernel. If the kernel contains a
|
||||
* maxdynamicsize control directive, the two values should match.
|
||||
*/
|
||||
uint32_t max_dynamic_group_size;
|
||||
/**
|
||||
* Maximum number of grid work-items that will be used by the application to
|
||||
* launch the kernel. If the kernel contains a maxflatgridsize control
|
||||
* directive, the value of @a max_flat_grid_size must not be greater than the
|
||||
* value of the directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_grid_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint64_t max_flat_grid_size;
|
||||
/**
|
||||
* Maximum number of work-group work-items that will be used by the
|
||||
* application to launch the kernel. If the kernel contains a
|
||||
* maxflatworkgroupsize control directive, the value of @a
|
||||
* max_flat_workgroup_size must not be greater than the value of the
|
||||
* directive, and takes precedence.
|
||||
*
|
||||
* The value specified for maximum absolute grid size must be greater than or
|
||||
* equal to the product of the values specified by @a required_workgroup_size.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, this field must be greater than 0.
|
||||
*/
|
||||
uint32_t max_flat_workgroup_size;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint32_t reserved1;
|
||||
/**
|
||||
* Grid size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredgridsize control directive, the
|
||||
* dimensions should match.
|
||||
*
|
||||
* The specified grid size must be consistent with @a required_workgroup_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
|
||||
* only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
uint64_t required_grid_size[3];
|
||||
/**
|
||||
* Work-group size that will be used by the application in any dispatch of the
|
||||
* kernel. If the kernel contains a requiredworkgroupsize control directive,
|
||||
* the dimensions should match.
|
||||
*
|
||||
* The specified work-group size must be consistent with @a required_grid_size
|
||||
* and @a required_dim. Also, the product of the three dimensions must not
|
||||
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
|
||||
* hold only if all the corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
|
||||
* control_directives_mask, the three dimension values must be greater than 0.
|
||||
*/
|
||||
hsa_dim3_t required_workgroup_size;
|
||||
/**
|
||||
* Number of dimensions that will be used by the application to launch the
|
||||
* kernel. If the kernel contains a requireddim control directive, the two
|
||||
* values should match.
|
||||
*
|
||||
* The specified dimensions must be consistent with @a required_grid_size and
|
||||
* @a required_workgroup_size. This invariant must hold only if all the
|
||||
* corresponding control directives are enabled.
|
||||
*
|
||||
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
|
||||
* control_directives_mask, this field must be 1, 2, or 3.
|
||||
*/
|
||||
uint8_t required_dim;
|
||||
/**
|
||||
* Reserved. Must be 0.
|
||||
*/
|
||||
uint8_t reserved2[75];
|
||||
} hsa_ext_control_directives_t;
|
||||
|
||||
/**
|
||||
* @brief Finalize an HSAIL program for a given instruction set architecture.
|
||||
*
|
||||
* @details Finalize all of the kernels and indirect functions that belong to
|
||||
* the same HSAIL program for a specific instruction set architecture (ISA). The
|
||||
* transitive closure of all functions specified by call or scall must be
|
||||
* defined. Kernels and indirect functions that are being finalized must be
|
||||
* defined. Kernels and indirect functions that are referenced in kernels and
|
||||
* indirect functions being finalized may or may not be defined, but must be
|
||||
* declared. All the global/readonly segment variables that are referenced in
|
||||
* kernels and indirect functions being finalized may or may not be defined, but
|
||||
* must be declared.
|
||||
*
|
||||
* @param[in] program HSAIL program.
|
||||
*
|
||||
* @param[in] isa Instruction set architecture to finalize for.
|
||||
*
|
||||
* @param[in] call_convention A call convention used in a finalization. Must
|
||||
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
|
||||
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
|
||||
* isa (not inclusive).
|
||||
*
|
||||
* @param[in] control_directives Low-level control directives that influence
|
||||
* the finalization process.
|
||||
*
|
||||
* @param[in] options Vendor-specific options. May be NULL.
|
||||
*
|
||||
* @param[in] code_object_type Type of code object to produce.
|
||||
*
|
||||
* @param[out] code_object Code object generated by the Finalizer, which
|
||||
* contains the machine code for the kernels and indirect functions in the HSAIL
|
||||
* program. The code object is independent of the HSAIL module that was used to
|
||||
* generate it.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
||||
* resources required for the operation.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
|
||||
* the control directive structure and in the HSAIL kernel mismatch, or if the
|
||||
* same directive is used with a different value in one of the functions used by
|
||||
* this kernel.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
|
||||
* encountered an error while compiling a kernel or an indirect function.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_program_finalize(
|
||||
hsa_ext_program_t program,
|
||||
hsa_isa_t isa,
|
||||
int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives,
|
||||
const char *options,
|
||||
hsa_code_object_type_t code_object_type,
|
||||
hsa_code_object_t *code_object);
|
||||
|
||||
/** @} */
|
||||
|
||||
#define hsa_ext_finalizer_1_00
|
||||
|
||||
typedef struct hsa_ext_finalizer_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ext_program_create)(
|
||||
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
||||
const char *options, hsa_ext_program_t *program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_iterate_modules)(
|
||||
hsa_ext_program_t program,
|
||||
hsa_status_t (*callback)(hsa_ext_program_t program,
|
||||
hsa_ext_module_t module, void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_get_info)(
|
||||
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t (*hsa_ext_program_finalize)(
|
||||
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
||||
hsa_ext_control_directives_t control_directives, const char *options,
|
||||
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
|
||||
} hsa_ext_finalizer_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,488 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "hsa.h"
|
||||
|
||||
#define HSA_AQLPROFILE_VERSION_MAJOR 2
|
||||
#define HSA_AQLPROFILE_VERSION_MINOR 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Library version
|
||||
uint32_t hsa_ven_amd_aqlprofile_version_major();
|
||||
uint32_t hsa_ven_amd_aqlprofile_version_minor();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Library API:
|
||||
// The library provides helper methods for instantiation of
|
||||
// the profile context object and for populating of the start
|
||||
// and stop AQL packets. The profile object contains a profiling
|
||||
// events list and needed for profiling buffers descriptors,
|
||||
// a command buffer and an output data buffer. To check if there
|
||||
// was an error the library methods return a status code. Also
|
||||
// the library provides methods for querying required buffers
|
||||
// attributes, to validate the event attributes and to get profiling
|
||||
// output data.
|
||||
//
|
||||
// Returned status:
|
||||
// hsa_status_t – HSA status codes are used from hsa.h header
|
||||
//
|
||||
// Supported profiling features:
|
||||
//
|
||||
// Supported profiling events
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
|
||||
} hsa_ven_amd_aqlprofile_event_type_t;
|
||||
|
||||
// Supported performance counters (PMC) blocks
|
||||
// The block ID is the same for a block instances set, for example
|
||||
// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
|
||||
// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
|
||||
// Memory related blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
|
||||
// System blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
|
||||
// GFX10 added blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,
|
||||
|
||||
// UMC & MMEA System Blocks
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,
|
||||
|
||||
HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
|
||||
} hsa_ven_amd_aqlprofile_block_name_t;
|
||||
|
||||
// PMC event object structure
|
||||
// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
|
||||
// which is the counters select value, “Performance Counters Selection”
|
||||
// chapter.
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_block_name_t block_name;
|
||||
uint32_t block_index;
|
||||
uint32_t counter_id;
|
||||
} hsa_ven_amd_aqlprofile_event_t;
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, // HSA handle for the profiling GPU
|
||||
const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
|
||||
bool* result); // [out] True if the event valid, False otherwise
|
||||
|
||||
// Profiling parameters
|
||||
// All parameters are generic and if not applicable for a specific
|
||||
// profile configuration then error status will be returned.
|
||||
typedef enum {
|
||||
/**
|
||||
* Select the target compute unit (wgp) for profiling.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
|
||||
/**
|
||||
* VMID Mask
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
|
||||
/**
|
||||
* Shader engine mask for selection.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
|
||||
/**
|
||||
* Legacy. Deprecated.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
|
||||
/**
|
||||
* Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
|
||||
/**
|
||||
* Set true for occupancy collection only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
|
||||
/**
|
||||
* ATT collection max data size, in MB. Shared among shader engines.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
|
||||
/**
|
||||
* Mask of which compute units to generate perfcounters. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
|
||||
/**
|
||||
* Select collection period for perfcounters. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
|
||||
/**
|
||||
* Select perfcounter ID (SQ block) for collection. GFX9 only.
|
||||
*/
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
|
||||
} hsa_ven_amd_aqlprofile_parameter_name_t;
|
||||
|
||||
// Profile parameter object
|
||||
typedef struct {
|
||||
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
||||
uint32_t value;
|
||||
} hsa_ven_amd_aqlprofile_parameter_t;
|
||||
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
|
||||
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
|
||||
} hsa_ven_amd_aqlprofile_att_marker_channel_t;
|
||||
|
||||
//
|
||||
// Profile context object:
|
||||
// The library provides a profile object structure which contains
|
||||
// the events array, a buffer for the profiling start/stop commands
|
||||
// and a buffer for the output data.
|
||||
// The buffers are specified by the buffer descriptors and allocated
|
||||
// by the application. The buffers allocation attributes, the command
|
||||
// buffer size, the PMC output buffer size as well as profiling output
|
||||
// data can be get using the generic get profile info helper _get_info.
|
||||
//
|
||||
// Buffer descriptor
|
||||
typedef struct {
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
} hsa_ven_amd_aqlprofile_descriptor_t;
|
||||
|
||||
// Profile context object structure, contains profiling events list and
|
||||
// needed for profiling buffers descriptors, a command buffer and
|
||||
// an output data buffer
|
||||
typedef struct {
|
||||
hsa_agent_t agent; // GFXIP handle
|
||||
hsa_ven_amd_aqlprofile_event_type_t type; // Events type
|
||||
const hsa_ven_amd_aqlprofile_event_t* events; // Events array
|
||||
uint32_t event_count; // Events count
|
||||
const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
|
||||
uint32_t parameter_count; // Parameters count
|
||||
hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
|
||||
hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
|
||||
} hsa_ven_amd_aqlprofile_profile_t;
|
||||
|
||||
//
|
||||
// AQL packets populating methods:
|
||||
// The helper methods to populate provided by the application START and
|
||||
// STOP AQL packets which the application is required to submit before and
|
||||
// after profiled GPU task packets respectively.
|
||||
//
|
||||
// AQL Vendor Specific packet which carries a PM4 command
|
||||
typedef struct {
|
||||
uint16_t header;
|
||||
uint16_t pm4_command[27];
|
||||
hsa_signal_t completion_signal;
|
||||
} hsa_ext_amd_aql_pm4_packet_t;
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_start(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type and a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_stop(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
|
||||
|
||||
// Method to populate the provided AQL packet with profiling read commands
|
||||
// Only 'pm4_command' fields of the packet are set and the application
|
||||
// is responsible to set Vendor Specific header type and a completion signal
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_read(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
|
||||
|
||||
// Legacy devices, PM4 profiling packet size
|
||||
const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
|
||||
// Legacy devices, converting the profiling AQL packet to PM4 packet blob
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
|
||||
void* data); // [out] PM4 packet blob
|
||||
|
||||
// Method to add a marker (correlation ID) into the ATT buffer.
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile, // [in,out] profile context object
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet, // [out] profile marker AQL packet
|
||||
uint32_t data, // [in] Data to be inserted
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel
|
||||
|
||||
//
|
||||
// Get profile info:
|
||||
// Generic method for getting various profile info including profile buffers
|
||||
// attributes like the command buffer size and the profiling PMC results.
|
||||
// It’s implied that all counters are 64bit values.
|
||||
//
|
||||
// Profile generic output data:
|
||||
typedef struct {
|
||||
uint32_t sample_id; // PMC sample or trace buffer index
|
||||
union {
|
||||
struct {
|
||||
hsa_ven_amd_aqlprofile_event_t event; // PMC event
|
||||
uint64_t result; // PMC result
|
||||
} pmc_data;
|
||||
hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
|
||||
};
|
||||
} hsa_ven_amd_aqlprofile_info_data_t;
|
||||
|
||||
// ID query type
|
||||
typedef struct {
|
||||
const char* name;
|
||||
uint32_t id;
|
||||
uint32_t instance_count;
|
||||
} hsa_ven_amd_aqlprofile_id_query_t;
|
||||
|
||||
// Profile attributes
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
|
||||
// in info_data object
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
||||
// by name string using _id_query_t
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
||||
// counters enable command buffer
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
|
||||
// counters disable command buffer
|
||||
} hsa_ven_amd_aqlprofile_info_type_t;
|
||||
|
||||
|
||||
// Definition of output data iterator callback
|
||||
typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
|
||||
hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
|
||||
void* callback_data); // [in,out] data passed to the callback
|
||||
|
||||
// Method for getting the profile info
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_get_info(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
|
||||
void* value); // [in,out] returned value
|
||||
|
||||
// Method for iterating the events output data
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
|
||||
void* data); // [in,out] data passed to the callback
|
||||
|
||||
// Return error string
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_error_string(
|
||||
const char** str); // [out] pointer on the error string
|
||||
|
||||
/**
|
||||
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
||||
*/
|
||||
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
|
||||
/**
|
||||
* @brief Iterate over all possible event coordinate IDs and their names.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param position A counting sequence indicating callback number.
|
||||
* @param id Coordinate ID as in _iterate_event_ids.
|
||||
* @param extent Coordinate extent indicating maximum allowed instances.
|
||||
* @param coordinate The coordinate, in the range [0,extent-1].
|
||||
* @param name Coordinate name as in _iterate_event_ids.
|
||||
* @param userdata Userdata returned from _iterate_event_coord function.
|
||||
*/
|
||||
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
|
||||
int position,
|
||||
int id,
|
||||
int extent,
|
||||
int coordinate,
|
||||
const char* name,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
||||
* @param[in] agent HSA agent.
|
||||
* @param[in] event The event ID and block ID to iterate for.
|
||||
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
|
||||
* @param[in] callback Callback function to return the coordinates.
|
||||
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Extension version.
|
||||
*/
|
||||
#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
|
||||
#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
|
||||
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
|
||||
#else
|
||||
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Extension function table.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
|
||||
uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
|
||||
uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
|
||||
const char** str);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
|
||||
hsa_agent_t agent,
|
||||
const hsa_ven_amd_aqlprofile_event_t* event,
|
||||
bool* result);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
||||
void* data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute,
|
||||
void* value);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback,
|
||||
void* data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
|
||||
hsa_ven_amd_aqlprofile_eventname_callback_t
|
||||
);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
|
||||
hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
||||
void* userdata
|
||||
);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
|
||||
hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
);
|
||||
} hsa_ven_amd_aqlprofile_1_00_pfn_t;
|
||||
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
||||
@@ -0,0 +1,667 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// HSA AMD extension for additional loader functionality.
|
||||
|
||||
#ifndef HSA_VEN_AMD_LOADER_H
|
||||
#define HSA_VEN_AMD_LOADER_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/**
|
||||
* @brief Queries equivalent host address for given @p device_address, and
|
||||
* records it in @p host_address.
|
||||
*
|
||||
*
|
||||
* @details Contents of memory pointed to by @p host_address would be identical
|
||||
* to contents of memory pointed to by @p device_address. Only difference
|
||||
* between the two is host accessibility: @p host_address is always accessible
|
||||
* from host, @p device_address might not be accessible from host.
|
||||
*
|
||||
* If @p device_address already points to host accessible memory, then the value
|
||||
* of @p device_address is simply copied into @p host_address.
|
||||
*
|
||||
* The lifetime of @p host_address is the same as the lifetime of @p
|
||||
* device_address, and both lifetimes are limited by the lifetime of the
|
||||
* executable that is managing these addresses.
|
||||
*
|
||||
*
|
||||
* @param[in] device_address Device address to query equivalent host address
|
||||
* for.
|
||||
*
|
||||
* @param[out] host_address Pointer to application-allocated buffer to record
|
||||
* queried equivalent host address in.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
|
||||
* null, or @p host_address is null.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_host_address(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
/**
|
||||
* @brief The storage type of the code object that is backing loaded memory
|
||||
* segment.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Loaded memory segment is not backed by any code object (anonymous), as the
|
||||
* case would be with BSS (uninitialized data).
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* file.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
|
||||
/**
|
||||
* Loaded memory segment is backed by the code object that is stored in the
|
||||
* memory.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
|
||||
} hsa_ven_amd_loader_code_object_storage_type_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded memory segment descriptor.
|
||||
*
|
||||
*
|
||||
* @details Loaded memory segment descriptor describes underlying loaded memory
|
||||
* segment. Loaded memory segment is created/allocated by the executable during
|
||||
* the loading of the code object that is backing underlying memory segment.
|
||||
*
|
||||
* The lifetime of underlying memory segment is limited by the lifetime of the
|
||||
* executable that is managing underlying memory segment.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
|
||||
/**
|
||||
* Agent underlying memory segment is allocated on. If the code object that is
|
||||
* backing underlying memory segment is program code object, then 0.
|
||||
*/
|
||||
hsa_agent_t agent;
|
||||
/**
|
||||
* Executable that is managing this underlying memory segment.
|
||||
*/
|
||||
hsa_executable_t executable;
|
||||
/**
|
||||
* Storage type of the code object that is backing underlying memory segment.
|
||||
*/
|
||||
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
|
||||
* filepath to the code object;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
|
||||
* accessible pointer to the first byte of the code object.
|
||||
*/
|
||||
const void *code_object_storage_base;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
|
||||
* the filepath to the code object (including null-terminating character);
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
|
||||
* bytes, of the memory occupied by the code object.
|
||||
*/
|
||||
size_t code_object_storage_size;
|
||||
/**
|
||||
* If the storage type of the code object that is backing underlying memory
|
||||
* segment is:
|
||||
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
||||
* - other, then offset, in bytes, from the beginning of the code object to
|
||||
* the first byte in the code object data is copied from.
|
||||
*/
|
||||
size_t code_object_storage_offset;
|
||||
/**
|
||||
* Starting address of the underlying memory segment.
|
||||
*/
|
||||
const void *segment_base;
|
||||
/**
|
||||
* Size, in bytes, of the underlying memory segment.
|
||||
*/
|
||||
size_t segment_size;
|
||||
} hsa_ven_amd_loader_segment_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Either queries loaded memory segment descriptors, or total number of
|
||||
* loaded memory segment descriptors.
|
||||
*
|
||||
*
|
||||
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
|
||||
* points to number that exactly matches total number of loaded memory segment
|
||||
* descriptors, then queries loaded memory segment descriptors, and records them
|
||||
* in @p segment_descriptors. If @p segment_descriptors is null and @p
|
||||
* num_segment_descriptors points to zero, then queries total number of loaded
|
||||
* memory segment descriptors, and records it in @p num_segment_descriptors. In
|
||||
* all other cases returns appropriate error code (see below).
|
||||
*
|
||||
* The caller of this function is responsible for the allocation/deallocation
|
||||
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
|
||||
*
|
||||
* The lifetime of loaded memory segments that are described by queried loaded
|
||||
* memory segment descriptors is limited by the lifetime of the executable that
|
||||
* is managing loaded memory segments.
|
||||
*
|
||||
* Queried loaded memory segment descriptors are always self-consistent: they
|
||||
* describe a complete set of loaded memory segments that are being backed by
|
||||
* fully loaded code objects that are present at the time (i.e. this function
|
||||
* is blocked until all executable manipulations are fully complete).
|
||||
*
|
||||
*
|
||||
* @param[out] segment_descriptors Pointer to application-allocated buffer to
|
||||
* record queried loaded memory segment descriptors in. Can be null if @p
|
||||
* num_segment_descriptors points to zero.
|
||||
*
|
||||
* @param[in,out] num_segment_descriptors Pointer to application-allocated
|
||||
* buffer that contains either total number of loaded memory segment descriptors
|
||||
* or zero.
|
||||
*
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
|
||||
* while @p num_segment_descriptors points to non-zero number, @p
|
||||
* segment_descriptors is not null while @p num_segment_descriptors points to
|
||||
* zero, or @p num_segment_descriptors is null.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
|
||||
* does not point to number that exactly matches total number of loaded memory
|
||||
* segment descriptors.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
/**
|
||||
* @brief Obtains the handle of executable to which the device address belongs.
|
||||
*
|
||||
* @details This method should not be used to obtain executable handle by using
|
||||
* a host address. The executable returned is expected to be alive until its
|
||||
* destroyed by the user.
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
||||
*
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
|
||||
* is no exectuable found for this kernel code object.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_query_executable(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the loaded code objects in an executable, and invoke
|
||||
* an application-defined callback on every iteration.
|
||||
*
|
||||
* @param[in] executable Executable.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per loaded code object. The
|
||||
* HSA runtime passes three arguments to the callback: the executable, a
|
||||
* loaded code object, and the application data. If @p callback returns a
|
||||
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
|
||||
* traversal stops and
|
||||
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
|
||||
* status value.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
/**
|
||||
* @brief Loaded code object kind.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Program code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
|
||||
/**
|
||||
* Agent code object.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
|
||||
} hsa_ven_amd_loader_loaded_code_object_kind_t;
|
||||
|
||||
/**
|
||||
* @brief Loaded code object attributes.
|
||||
*/
|
||||
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
|
||||
/**
|
||||
* The executable in which this loaded code object is loaded. The
|
||||
* type of this attribute is ::hsa_executable_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
|
||||
/**
|
||||
* The kind of this loaded code object. The type of this attribute is
|
||||
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
|
||||
/**
|
||||
* The agent on which this loaded code object is loaded. The
|
||||
* value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
|
||||
* attribute is ::hsa_agent_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
|
||||
/**
|
||||
* The storage type of the code object reader used to load the loaded code object.
|
||||
* The type of this attribute is ::uint32_t interpreted as a
|
||||
* ::hsa_ven_amd_loader_code_object_storage_type_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
|
||||
/**
|
||||
* The memory address of the first byte of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
|
||||
/**
|
||||
* The memory size in bytes of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
||||
* attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
|
||||
/**
|
||||
* The file descriptor of the code object that was loaaded.
|
||||
* The value of this attribute is only defined if
|
||||
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
||||
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
|
||||
* attribute is ::int.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
|
||||
/**
|
||||
* The signed byte address difference of the memory address at which the code
|
||||
* object is loaded minus the virtual address specified in the code object
|
||||
* that is loaded. The value of this attribute is only defined if the
|
||||
* executable in which the code object is loaded is froozen. The type of this
|
||||
* attribute is ::int64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
|
||||
/**
|
||||
* The base memory address at which the code object is loaded. This is the
|
||||
* base address of the allocation for the lowest addressed segment of the code
|
||||
* object that is loaded. Note that any non-loaded segments before the first
|
||||
* loaded segment are ignored. The value of this attribute is only defined if
|
||||
* the executable in which the code object is loaded is froozen. The type of
|
||||
* this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
|
||||
/**
|
||||
* The byte size of the loaded code objects contiguous memory allocation. The
|
||||
* value of this attribute is only defined if the executable in which the code
|
||||
* object is loaded is froozen. The type of this attribute is ::uint64_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
|
||||
/**
|
||||
* The length of the URI in bytes, not including the NUL terminator. The type
|
||||
* of this attribute is uint32_t.
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
|
||||
/**
|
||||
* The URI name from which the code object was loaded. The type of this
|
||||
* attribute is a NUL terminated \p char* with the length equal to the value
|
||||
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
|
||||
* The URI name syntax is defined by the following BNF syntax:
|
||||
*
|
||||
* code_object_uri ::== file_uri | memory_uri
|
||||
* file_uri ::== "file://" file_path [ range_specifier ]
|
||||
* memory_uri ::== "memory://" process_id range_specifier
|
||||
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
|
||||
* file_path ::== URI_ENCODED_OS_FILE_PATH
|
||||
* process_id ::== DECIMAL_NUMBER
|
||||
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
|
||||
*
|
||||
* ``number`` is a C integral literal where hexadecimal values are prefixed by
|
||||
* "0x" or "0X", and octal values by "0".
|
||||
*
|
||||
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
|
||||
* In URI encoding, every character that is not in the regular expression
|
||||
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
|
||||
* proceeded by "%". Directories in the path are separated by "/".
|
||||
*
|
||||
* ``offset`` is a 0-based byte offset to the start of the code object. For a
|
||||
* file URI, it is from the start of the file specified by the ``file_path``,
|
||||
* and if omitted defaults to 0. For a memory URI, it is the memory address
|
||||
* and is required.
|
||||
*
|
||||
* ``size`` is the number of bytes in the code object. For a file URI, if
|
||||
* omitted it defaults to the size of the file. It is required for a memory
|
||||
* URI.
|
||||
*
|
||||
* ``process_id`` is the identity of the process owning the memory. For Linux
|
||||
* it is the C unsigned integral decimal literal for the process ID (PID).
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* file:///dir1/dir2/file1
|
||||
* file:///dir3/dir4/file2#offset=0x2000&size=3000
|
||||
* memory://1234#offset=0x20000&size=3000
|
||||
*/
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
|
||||
} hsa_ven_amd_loader_loaded_code_object_info_t;
|
||||
|
||||
/**
|
||||
* @brief Get the current value of an attribute for a given loaded code
|
||||
* object.
|
||||
*
|
||||
* @param[in] loaded_code_object Loaded code object.
|
||||
*
|
||||
* @param[in] attribute Attribute to query.
|
||||
*
|
||||
* @param[out] value Pointer to an application-allocated buffer where to store
|
||||
* the value of the attribute. If the buffer passed by the application is not
|
||||
* large enough to hold the value of @p attribute, the behavior is undefined.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
|
||||
* invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
||||
* loaded code object attribute, or @p value is NULL.
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Create a code object reader to operate on a file with size and offset.
|
||||
*
|
||||
* @param[in] file File descriptor. The file must have been opened by
|
||||
* application with at least read permissions prior calling this function. The
|
||||
* file must contain a vendor-specific code object.
|
||||
*
|
||||
* The file is owned and managed by the application; the lifetime of the file
|
||||
* descriptor must exceed that of any associated code object reader.
|
||||
*
|
||||
* @param[in] size Size of the code object embedded in @p file.
|
||||
*
|
||||
* @param[in] offset 0-based offset relative to the beginning of the @p file
|
||||
* that denotes the beginning of the code object embedded within the @p file.
|
||||
*
|
||||
* @param[out] code_object_reader Memory location to store the newly created
|
||||
* code object reader handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
|
||||
* read permissions. This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
|
||||
* do not form a valid code object. If file size is 0. Or offset > file size.
|
||||
* This condition may also be reported as
|
||||
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
|
||||
* ::hsa_executable_load_agent_code_object function.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
|
||||
* allocate the required resources.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Iterate over the available executables, and invoke an
|
||||
* application-defined callback on every iteration. While
|
||||
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
|
||||
* ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy will be blocked.
|
||||
*
|
||||
* @param[in] callback Callback to be invoked once per executable. The HSA
|
||||
* runtime passes two arguments to the callback: the executable and the
|
||||
* application data. If @p callback returns a status other than
|
||||
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
|
||||
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
|
||||
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
|
||||
* ::hsa_executable_destroy then the behavior is undefined.
|
||||
*
|
||||
* @param[in] data Application data that is passed to @p callback on every
|
||||
* iteration. May be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
||||
*/
|
||||
hsa_status_t
|
||||
hsa_ven_amd_loader_iterate_executables(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/**
|
||||
* @brief Extension version.
|
||||
*/
|
||||
#define hsa_ven_amd_loader 001003
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.00.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
} hsa_ven_amd_loader_1_00_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.01.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
} hsa_ven_amd_loader_1_01_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.02.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
} hsa_ven_amd_loader_1_02_pfn_t;
|
||||
|
||||
/**
|
||||
* @brief Extension function table version 1.03.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
||||
const void *device_address,
|
||||
const void **host_address);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
||||
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
||||
size_t *num_segment_descriptors);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
||||
const void *device_address,
|
||||
hsa_executable_t *executable);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
||||
hsa_executable_t executable,
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void *data),
|
||||
void *data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
||||
void *value);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
||||
hsa_file_t file,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
hsa_code_object_reader_t *code_object_reader);
|
||||
|
||||
hsa_status_t
|
||||
(*hsa_ven_amd_loader_iterate_executables)(
|
||||
hsa_status_t (*callback)(
|
||||
hsa_executable_t executable,
|
||||
void *data),
|
||||
void *data);
|
||||
} hsa_ven_amd_loader_1_03_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* HSA_VEN_AMD_LOADER_H */
|
||||
@@ -0,0 +1,416 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_VEN_AMD_PC_SAMPLING_H
|
||||
#define HSA_VEN_AMD_PC_SAMPLING_H
|
||||
|
||||
#include "hsa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief HSA AMD Vendor PC Sampling APIs
|
||||
* EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
|
||||
* modified extensively in the future
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief PC Sampling sample data for hosttrap sampling method
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
uint32_t wave_in_wg : 6;
|
||||
uint32_t chiplet : 3; // Currently not used
|
||||
uint32_t reserved : 23;
|
||||
uint32_t hw_id;
|
||||
uint32_t reserved0;
|
||||
uint64_t reserved1;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_hosttrap_v1_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling sample data for stochastic sampling method
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
uint32_t wave_in_wg : 6;
|
||||
uint32_t chiplet : 3; // Currently not used
|
||||
uint32_t reserved : 23;
|
||||
uint32_t hw_id;
|
||||
uint32_t perf_snapshot_data;
|
||||
uint32_t perf_snapshot_data1;
|
||||
uint32_t perf_snapshot_data2;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_snapshot_v1_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling method kinds
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
|
||||
HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
|
||||
} hsa_ven_amd_pcs_method_kind_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling interval unit type
|
||||
*/
|
||||
typedef enum {
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
|
||||
HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
|
||||
} hsa_ven_amd_pcs_units_t;
|
||||
|
||||
/**
|
||||
* @brief HSA callback function to perform the copy onto a destination buffer
|
||||
*
|
||||
* If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
|
||||
* buffers. Remaining contents of HSA internal buffers will be included in next
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
|
||||
* hsa_ven_amd_pcs_flush.
|
||||
*
|
||||
* @param[in] hsa_callback_data private data to pass back to HSA. Provided in
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t
|
||||
*
|
||||
* @param[in] data_size size of destination buffer in bytes.
|
||||
* @param[in] destination destination buffer
|
||||
* @retval TBD: but could be used to indicate that there is no more data to be read.
|
||||
* Or indicate an error and abort of current copy operations
|
||||
*/
|
||||
typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
|
||||
size_t data_size, void* destination);
|
||||
|
||||
/**
|
||||
* @brief HSA callback function to to indicate that there is data ready to be copied
|
||||
*
|
||||
* When the client receives this callback, the client should call back @p data_copy_callback for HSA
|
||||
* to perform the copy operation into an available buffer. @p data_copy_callback can be called back
|
||||
* multiple times with smaller @p data_size to split the copy operation.
|
||||
*
|
||||
* This callback must not call ::hsa_ven_amd_pcs_flush.
|
||||
*
|
||||
* @param[in] client_callback_data client private data passed in via
|
||||
* hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
|
||||
* @param[in] data_size size of data available to be copied
|
||||
* @param[in] lost_sample_count number of lost samples since last call to
|
||||
* hsa_ven_amd_pcs_data_ready_callback_t.
|
||||
* @param[in] data_copy_callback callback function for HSA to perform the actual copy
|
||||
* @param[in] hsa_callback_data private data to pass back to HSA
|
||||
*/
|
||||
typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
|
||||
void* client_callback_data, size_t data_size, size_t lost_sample_count,
|
||||
hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);
|
||||
|
||||
/**
|
||||
* @brief Opaque handle representing a sampling session.
|
||||
* Two sessions having same handle value represent the same session
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t handle;
|
||||
} hsa_ven_amd_pcs_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling configuration flag options
|
||||
*/
|
||||
typedef enum {
|
||||
/* The interval for this sampling method have to be a power of 2 */
|
||||
HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
|
||||
} hsa_ven_amd_pcs_configuration_flags_t;
|
||||
|
||||
/**
|
||||
* @brief PC Sampling method information
|
||||
* Used to provide client with list of supported PC Sampling methods
|
||||
*/
|
||||
typedef struct {
|
||||
hsa_ven_amd_pcs_method_kind_t method;
|
||||
hsa_ven_amd_pcs_units_t units;
|
||||
size_t min_interval;
|
||||
size_t max_interval;
|
||||
uint64_t flags;
|
||||
} hsa_ven_amd_pcs_configuration_t;
|
||||
|
||||
/**
|
||||
* @brief Callback function to iterate through list of supported PC Sampling configurations
|
||||
*
|
||||
* @param[in] configuration one entry for supported PC Sampling method and configuration options
|
||||
* @param[in] callback_data client private callback data that was passed in when calling
|
||||
* hsa_ven_amd_pcs_iterate_configuration
|
||||
*/
|
||||
typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
|
||||
const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Iterate through list of current supported PC Sampling configurations for this @p agent
|
||||
*
|
||||
* HSA will callback @p configuration_callback for each currently available PC Sampling
|
||||
* configuration. The list of currently available configurations may not be the complete list of
|
||||
* configurations supported on the @p agent. The list of currently available configurations may be
|
||||
* reduced if the @p agent is currently handling other PC sampling sessions.
|
||||
*
|
||||
* @param[in] agent target agent
|
||||
* @param[in] configuration_callback callback function to iterate through list of configurations
|
||||
* @param[in] callback_data client private callback data
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
|
||||
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
||||
void* callback_data);
|
||||
|
||||
/**
|
||||
* @brief Create a PC Sampling session on @p agent
|
||||
*
|
||||
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
||||
* parameters must be a legal configuration value, as described by the
|
||||
* hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
|
||||
* hsa_ven_amd_pcs_iterate_configuration for this @p agent.
|
||||
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
||||
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
||||
* on what types of PC sampling they can perform concurrently.
|
||||
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
||||
* The session will be in a stopped/inactive state after this call
|
||||
*
|
||||
* @param[in] agent target agent
|
||||
* @param[in] method method to use
|
||||
* @param[in] units sampling units
|
||||
* @param[in] interval sampling interval in @p units
|
||||
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
||||
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
||||
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
||||
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
||||
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
||||
* buffers are full. The value of latency can be 0.
|
||||
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
||||
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
||||
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
||||
* @param[in] data_ready_callback client callback function that will be called when:
|
||||
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
||||
* within @p latency period.
|
||||
* OR
|
||||
* 2. When hsa_ven_amd_pcs_flush is called.
|
||||
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
||||
* is called.
|
||||
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
||||
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
||||
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
||||
* cannot handle the type requested.
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
||||
* @retval ::HSA_STATUS_ERROR Unexpected error
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
|
||||
size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
||||
void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates a PC Sampling session on @p agent. Assumes that the caller provides the
|
||||
* @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
|
||||
* on the @p agent.
|
||||
*
|
||||
* Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
|
||||
* PC sampling session that was previously created in the underlying driver.
|
||||
*
|
||||
* Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
|
||||
* parameters must be a legal configuration value, and match the parameters that we used to create
|
||||
* the underlying PC Sampling session in the underlying driver.
|
||||
* A successfull call may restrict the list of possible PC sampling methods available to subsequent
|
||||
* calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
|
||||
* on what types of PC sampling they can perform concurrently.
|
||||
* For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
|
||||
* The session will be in a stopped/inactive state after this call
|
||||
*
|
||||
* @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
|
||||
* @param[in] agent target agent
|
||||
* @param[in] method method to use
|
||||
* @param[in] units sampling units
|
||||
* @param[in] interval sampling interval in @p units
|
||||
* @param[in] latency expected latency in microseconds for client to provide a buffer for the data
|
||||
* copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
|
||||
* buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
|
||||
* how many samples are received within @p latency and call @p data_ready_callback ahead of time so
|
||||
* that the client has @p latency time to allocate the buffer before the HSA-runtime internal
|
||||
* buffers are full. The value of latency can be 0.
|
||||
* @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
|
||||
* HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
|
||||
* perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
|
||||
* @param[in] data_ready_callback client callback function that will be called when:
|
||||
* 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
|
||||
* within @p latency period.
|
||||
* OR
|
||||
* 2. When hsa_ven_amd_pcs_flush is called.
|
||||
* @param[in] client_callback_data client private data to be provided back when data_ready_callback
|
||||
* is called.
|
||||
* @param[out] pc_sampling PC sampling session handle used to reference this session when calling
|
||||
* hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS session created successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
|
||||
* @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
|
||||
* cannot handle the type requested.
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
|
||||
* @retval ::HSA_STATUS_ERROR Unexpected error
|
||||
**/
|
||||
hsa_status_t hsa_ven_amd_pcs_create_from_id(
|
||||
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Free a PC Sampling session on @p agent
|
||||
*
|
||||
* Free all the resources allocated for a PC Sampling session on @p agent
|
||||
* Internal buffers for this session will be lost.
|
||||
* If the session was active, the session will be stopped before it is destroyed.
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
* @retval ::HSA_STATUS_ERROR unexpected error
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Start a PC Sampling session
|
||||
*
|
||||
* Activate a PC Sampling session that was previous created.
|
||||
* The session with be in a active state after this call
|
||||
* If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session started successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
* @retval ::HSA_STATUS_ERROR unexpected error
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Stop a PC Sampling session
|
||||
*
|
||||
* Stop a session that is currently active
|
||||
* After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
|
||||
* The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
|
||||
* buffers are not drained and the session is started again, the internal buffers will be available
|
||||
* on the next data_ready_callback.
|
||||
* If the session was already inactive, this will result in a no-op and will return
|
||||
* HSA_STATUS_SUCCESS
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session stopped successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
/**
|
||||
* @brief Flush internal buffers for a PC Sampling session
|
||||
*
|
||||
* Drain internal buffers for a PC Sampling session. If internal buffers have available data,
|
||||
* this trigger a data_ready_callback.
|
||||
*
|
||||
* The function blocks until all PC samples associated with the @p pc_sampling session
|
||||
* generated prior to the function call have been communicated by invocations of
|
||||
* @p data_ready_callback having completed execution.
|
||||
*
|
||||
* @param[in] pc_sampling PC sampling session handle
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Session flushed successfully
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
|
||||
*/
|
||||
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
#define hsa_ven_amd_pc_sampling_1_00
|
||||
|
||||
/**
|
||||
* @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
|
||||
* ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
|
||||
*/
|
||||
typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
|
||||
hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
|
||||
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
||||
void* callback_data);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval,
|
||||
size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
|
||||
void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
|
||||
uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
|
||||
hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
|
||||
hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
|
||||
hsa_ven_amd_pcs_t* pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);
|
||||
|
||||
} hsa_ven_amd_pc_sampling_1_00_pfn_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // end extern "C" block
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif /* HSA_VEN_AMD_PC_SAMPLING_H */
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,363 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// This file is used only for open source cmake builds, if we hardcode the
|
||||
// register values in amd_aql_queue.cpp then this file won't be required. For
|
||||
// now we are using this file where register details are spelled out in the
|
||||
// structs/unions below.
|
||||
#ifndef _WSL_INC_REGISTERS_H_
|
||||
#define _WSL_INC_REGISTERS_H_
|
||||
|
||||
typedef enum SQ_RSRC_BUF_TYPE {
|
||||
SQ_RSRC_BUF = 0x00000000,
|
||||
SQ_RSRC_BUF_RSVD_1 = 0x00000001,
|
||||
SQ_RSRC_BUF_RSVD_2 = 0x00000002,
|
||||
SQ_RSRC_BUF_RSVD_3 = 0x00000003,
|
||||
} SQ_RSRC_BUF_TYPE;
|
||||
|
||||
typedef enum BUF_DATA_FORMAT {
|
||||
BUF_DATA_FORMAT_INVALID = 0x00000000,
|
||||
BUF_DATA_FORMAT_8 = 0x00000001,
|
||||
BUF_DATA_FORMAT_16 = 0x00000002,
|
||||
BUF_DATA_FORMAT_8_8 = 0x00000003,
|
||||
BUF_DATA_FORMAT_32 = 0x00000004,
|
||||
BUF_DATA_FORMAT_16_16 = 0x00000005,
|
||||
BUF_DATA_FORMAT_10_11_11 = 0x00000006,
|
||||
BUF_DATA_FORMAT_11_11_10 = 0x00000007,
|
||||
BUF_DATA_FORMAT_10_10_10_2 = 0x00000008,
|
||||
BUF_DATA_FORMAT_2_10_10_10 = 0x00000009,
|
||||
BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a,
|
||||
BUF_DATA_FORMAT_32_32 = 0x0000000b,
|
||||
BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c,
|
||||
BUF_DATA_FORMAT_32_32_32 = 0x0000000d,
|
||||
BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e,
|
||||
BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f,
|
||||
} BUF_DATA_FORMAT;
|
||||
|
||||
typedef enum BUF_NUM_FORMAT {
|
||||
BUF_NUM_FORMAT_UNORM = 0x00000000,
|
||||
BUF_NUM_FORMAT_SNORM = 0x00000001,
|
||||
BUF_NUM_FORMAT_USCALED = 0x00000002,
|
||||
BUF_NUM_FORMAT_SSCALED = 0x00000003,
|
||||
BUF_NUM_FORMAT_UINT = 0x00000004,
|
||||
BUF_NUM_FORMAT_SINT = 0x00000005,
|
||||
BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006,
|
||||
BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006,
|
||||
BUF_NUM_FORMAT_FLOAT = 0x00000007,
|
||||
} BUF_NUM_FORMAT;
|
||||
|
||||
typedef enum BUF_FORMAT {
|
||||
BUF_FORMAT_32_UINT = 0x00000014,
|
||||
} BUF_FORMAT;
|
||||
|
||||
typedef enum SQ_SEL_XYZW01 {
|
||||
SQ_SEL_0 = 0x00000000,
|
||||
SQ_SEL_1 = 0x00000001,
|
||||
SQ_SEL_RESERVED_0 = 0x00000002,
|
||||
SQ_SEL_RESERVED_1 = 0x00000003,
|
||||
SQ_SEL_X = 0x00000004,
|
||||
SQ_SEL_Y = 0x00000005,
|
||||
SQ_SEL_Z = 0x00000006,
|
||||
SQ_SEL_W = 0x00000007,
|
||||
} SQ_SEL_XYZW01;
|
||||
|
||||
union COMPUTE_TMPRING_SIZE {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int : 7;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 7;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union COMPUTE_TMPRING_SIZE_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 15;
|
||||
unsigned int : 5;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 5;
|
||||
unsigned int WAVESIZE : 15;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union COMPUTE_TMPRING_SIZE_GFX12 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 18;
|
||||
unsigned int : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 2;
|
||||
unsigned int WAVESIZE : 18;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD0 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD1 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD1_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int SWIZZLE_ENABLE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int SWIZZLE_ENABLE : 2;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD2 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD3 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD3_GFX10 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 7;
|
||||
unsigned int RESERVED1 : 2;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int RESOURCE_LEVEL : 1;
|
||||
unsigned int RESERVED2 : 3;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int RESERVED2 : 3;
|
||||
unsigned int RESOURCE_LEVEL : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 2;
|
||||
unsigned int FORMAT : 7;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
// From V# Table
|
||||
union SQ_BUF_RSRC_WORD3_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int RESERVED2 : 4;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int RESERVED2 : 4;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
// From V# Table
|
||||
union SQ_BUF_RSRC_WORD3_GFX12 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int WRITE_COMPRESS_ENABLE : 1;
|
||||
unsigned int COMPRESSION_EN : 1;
|
||||
unsigned int COMPRESSION_ACCESS_MODE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int COMPRESSION_ACCESS_MODE : 2;
|
||||
unsigned int COMPRESSION_EN : 1;
|
||||
unsigned int WRITE_COMPRESS_ENABLE : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
#endif // header guard
|
||||
@@ -0,0 +1,122 @@
|
||||
#ifndef _WSL_INC_THUNK_PROXY_H_
|
||||
#define _WSL_INC_THUNK_PROXY_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace thunk_proxy {
|
||||
enum AllocDomain {
|
||||
kSystem,
|
||||
kLocal,
|
||||
kUserMemory,
|
||||
kUserQueue,
|
||||
kDomainCount,
|
||||
};
|
||||
|
||||
enum MemFlag {
|
||||
kFineGrain = (1ULL << 0),
|
||||
kKernarg = (1ULL << 1),
|
||||
};
|
||||
|
||||
enum EngineFlag {
|
||||
KCOMPUTE0 = (1ULL << 0),
|
||||
KDRMDMA = (1ULL << 1),
|
||||
KDRMDMA1 = (1ULL << 2),
|
||||
};
|
||||
|
||||
enum SchedLevel {
|
||||
kLow = 0,
|
||||
kNormal = 1,
|
||||
kHigh = 2,
|
||||
};
|
||||
|
||||
struct HwsInfo {
|
||||
union {
|
||||
struct {
|
||||
uint32_t gfxHwsEnabled : 1;
|
||||
uint32_t computeHwsEnabled : 1;
|
||||
uint32_t dmaHwsEnabled : 1;
|
||||
uint32_t dma1HwsEnabled : 1;
|
||||
uint32_t reserved : 28;
|
||||
} hwsMask;
|
||||
uint32_t osHwsEnableFlags;
|
||||
};
|
||||
uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int major;
|
||||
int minor;
|
||||
int stepping;
|
||||
bool is_dgpu;
|
||||
char product_name[MAX_PATH];
|
||||
uint64_t uuid;
|
||||
uint32_t family;
|
||||
uint32_t device_id;
|
||||
uint32_t wavefront_size;
|
||||
uint32_t compute_unit_count;
|
||||
uint32_t max_engine_clock_mhz;
|
||||
uint32_t watch_points_num;
|
||||
uint32_t pci_bus_addr;
|
||||
uint32_t memory_bus_width;
|
||||
uint32_t max_memory_clock_mhz;
|
||||
uint64_t gpu_counter_frequency;
|
||||
uint32_t wave_per_cu;
|
||||
uint32_t simd_per_cu;
|
||||
uint32_t max_scratch_slots_per_cu;
|
||||
uint32_t num_shader_engine;
|
||||
uint32_t shader_array_per_shader_engine;
|
||||
uint32_t domain;
|
||||
uint32_t num_gws;
|
||||
uint32_t asic_revision;
|
||||
uint64_t local_visible_heap_size;
|
||||
uint64_t local_invisible_heap_size;
|
||||
uint64_t non_local_heap_size;
|
||||
uint64_t private_aperture_base;
|
||||
uint64_t private_aperture_size;
|
||||
uint64_t shared_aperture_base;
|
||||
uint64_t shared_aperture_size;
|
||||
uint32_t user_queue_size;
|
||||
uint32_t lds_size;
|
||||
uint32_t big_page_alignment_size;
|
||||
uint32_t hw_big_page_min_alignment_size;
|
||||
uint32_t hw_big_page_alignment_size;
|
||||
bool enable_big_page_alignment;
|
||||
uint32_t mec_fw_version;
|
||||
uint32_t sdma_fw_version;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
uint32_t l3_cache_size;
|
||||
uint32_t gl2_cacheline_size;
|
||||
uint32_t num_cp_queues;
|
||||
HwsInfo hwsInfo;
|
||||
std::vector<int> sdma_schedid;
|
||||
uint32_t compute_schedid;
|
||||
bool state_shadowing_by_cpfw;
|
||||
bool platform_atomic_support;
|
||||
void *adapter_info;
|
||||
uint32_t kmd_version;
|
||||
} DeviceInfo;
|
||||
|
||||
int EngineOrdinal(int engine, DeviceInfo *device_info);
|
||||
bool GetHwsEnabled(int engine, DeviceInfo *device_info);
|
||||
bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info);
|
||||
bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info);
|
||||
bool QueryAdapterSupported(unsigned int device_id);
|
||||
|
||||
uint32_t QueueEngine2EngineFlag(uint32_t queue_engine);
|
||||
void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain,
|
||||
uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info);
|
||||
void GetAllocPrivDataSize(int *priv_drv_data_size, int *priv_alloc_data_size);
|
||||
void FillinAllocPrivDrvData(void *drv_priv, int priv_alloc_data_size);
|
||||
|
||||
int GetSubmitPrivDataSize();
|
||||
void FillinSubmitPrivData(void *priv_data, D3DKMT_HANDLE queue, uint64_t command_addr,
|
||||
uint64_t command_size, bool is_hw_queue);
|
||||
int GetHwQueuePrivDataSize();
|
||||
void FillinHwQueuePrivData(void *priv_data, bool FwManagedGfxState, SchedLevel level = kNormal);
|
||||
int GetContextPrivDataSize();
|
||||
void FillinContextPrivData(void *priv_data, bool FwManagedGfxState);
|
||||
int GetPowerOptPrivDataSize();
|
||||
void FillinPowerOptPrivData(void *priv_data, bool restore);
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,169 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
|
||||
#define _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <no_sal2.h>
|
||||
|
||||
typedef uint32_t UINT, *UINT_PTR;
|
||||
typedef int32_t INT32;
|
||||
typedef int32_t LONG;
|
||||
typedef uint32_t ULONG, *ULONG_PTR;
|
||||
typedef int64_t LONGLONG;
|
||||
typedef int64_t LONG64;
|
||||
typedef uint64_t ULONGLONG;
|
||||
typedef uint64_t ULONG64, *ULONG64_PTR;
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t WORD;
|
||||
typedef uint32_t DWORD;
|
||||
typedef int32_t BOOL;
|
||||
typedef int32_t NTSTATUS;
|
||||
typedef uint16_t USHORT;
|
||||
typedef uint16_t UINT16;
|
||||
typedef uint32_t UINT32;
|
||||
typedef uint64_t UINT64;
|
||||
typedef int32_t INT;
|
||||
typedef uint64_t SIZE_T;
|
||||
typedef void VOID;
|
||||
typedef float FLOAT;
|
||||
typedef char CHAR;
|
||||
typedef unsigned char UCHAR;
|
||||
typedef UCHAR BOOLEAN;
|
||||
typedef int16_t WCHAR;
|
||||
typedef void *HANDLE;
|
||||
typedef void *PVOID;
|
||||
typedef void *LPVOID;
|
||||
typedef const int16_t *PCWSTR;
|
||||
|
||||
#define ULONG ULONG
|
||||
#define ULONG_PTR ULONG_PTR
|
||||
#define USHORT USHORT
|
||||
|
||||
#define DECLARE_HANDLE(name) struct name##__{int unused;}; typedef struct name##__ *name
|
||||
#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1]
|
||||
|
||||
DECLARE_HANDLE(HWND);
|
||||
DECLARE_HANDLE(HDC);
|
||||
DECLARE_HANDLE(PALETTEENTRY);
|
||||
|
||||
typedef struct tagPOINT {
|
||||
LONG x;
|
||||
LONG y;
|
||||
} POINT;
|
||||
|
||||
typedef struct tagRECT {
|
||||
LONG left;
|
||||
LONG top;
|
||||
LONG right;
|
||||
LONG bottom;
|
||||
} RECT;
|
||||
|
||||
typedef struct tagRECTL {
|
||||
LONG left;
|
||||
LONG top;
|
||||
LONG right;
|
||||
LONG bottom;
|
||||
} RECTL;
|
||||
|
||||
typedef union _LARGE_INTEGER {
|
||||
struct {
|
||||
DWORD LowPart;
|
||||
DWORD HighPart;
|
||||
} u;
|
||||
LONGLONG QuadPart;
|
||||
} LARGE_INTEGER;
|
||||
|
||||
typedef LARGE_INTEGER *PLARGE_INTEGER;
|
||||
|
||||
typedef union _ULARGE_INTEGER {
|
||||
struct {
|
||||
ULONG LowPart;
|
||||
ULONG HighPart;
|
||||
} DUMMYSTRUCTNAME;
|
||||
struct {
|
||||
ULONG LowPart;
|
||||
ULONG HighPart;
|
||||
} u;
|
||||
ULONGLONG QuadPart;
|
||||
} ULARGE_INTEGER;
|
||||
|
||||
typedef ULARGE_INTEGER *PULARGE_INTEGER;
|
||||
|
||||
typedef struct _LUID {
|
||||
ULONG LowPart;
|
||||
LONG HighPart;
|
||||
} LUID, *PLUID;
|
||||
|
||||
typedef enum _DEVICE_POWER_STATE {
|
||||
PowerDeviceUnspecified = 0,
|
||||
PowerDeviceD0,
|
||||
PowerDeviceD1,
|
||||
PowerDeviceD2,
|
||||
PowerDeviceD3,
|
||||
PowerDeviceMaximum
|
||||
} DEVICE_POWER_STATE, *PDEVICE_POWER_STATE;
|
||||
|
||||
#define _Check_return_
|
||||
#define APIENTRY
|
||||
#define CONST const
|
||||
#define IN
|
||||
#define OUT
|
||||
#define FAR
|
||||
#define MAX_PATH 260
|
||||
#define __stdcall
|
||||
|
||||
#ifndef GUID_DEFINED
|
||||
#define GUID_DEFINED
|
||||
typedef struct _GUID {
|
||||
uint32_t Data1;
|
||||
uint16_t Data2;
|
||||
uint16_t Data3;
|
||||
uint8_t Data4[ 8 ];
|
||||
} GUID;
|
||||
#endif
|
||||
|
||||
#include <guiddef.h>
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,82 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */
|
||||
|
||||
#ifndef _WSL_INC_WDDM_CMD_UTIL_H_
|
||||
#define _WSL_INC_WDDM_CMD_UTIL_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "impl/hsa/hsa.h"
|
||||
#include "impl/hsa/amd_hsa_queue.h"
|
||||
#include "impl/hsa/amd_hsa_kernel_code.h"
|
||||
#include "impl/pm4_cmds.h"
|
||||
#include "util/utils.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
struct DispatchInfo {
|
||||
uint8_t major;
|
||||
hsa_kernel_dispatch_packet_t *pPacket;
|
||||
void *pEntry;
|
||||
const amd_kernel_code_t *pKernelObject;
|
||||
uint32_t ldsBlks;
|
||||
amd_queue_v2_t *pAmdQueue;
|
||||
bool wave32;
|
||||
uint32_t srd;
|
||||
void *pScratchBase;
|
||||
uint32_t scratchSizePerWave;
|
||||
uint32_t scratchBaseOffset[2];
|
||||
uint32_t offsetCnt;
|
||||
};
|
||||
|
||||
class CmdUtil {
|
||||
public:
|
||||
CmdUtil() {};
|
||||
~CmdUtil() {};
|
||||
|
||||
static size_t BuildCopyData(
|
||||
uint64_t *pDstAddr,
|
||||
void *pBuffer,
|
||||
uint32_t dstSel = dst_sel__mec_copy_data__tc_l2,
|
||||
uint32_t dstCachePolicy = dst_cache_policy__mec_copy_data__stream,
|
||||
uint32_t srcSel = src_sel__mec_copy_data__gpu_clock_count,
|
||||
uint32_t srcCachePolicy = src_cache_policy__mec_copy_data__lru,
|
||||
uint32_t countSel = count_sel__mec_copy_data__64_bits_of_data,
|
||||
uint32_t wrConfirm = wr_confirm__mec_copy_data__wait_for_confirmation);
|
||||
|
||||
static size_t BuildBarrier(
|
||||
void *pBuffer,
|
||||
uint32_t eventIndex = event_index__mec_event_write__cs_partial_flush,
|
||||
uint32_t eventType = CS_PARTIAL_FLUSH);
|
||||
|
||||
static size_t BuildWriteData64Command(
|
||||
void *pBuffer,
|
||||
uint64_t* write_addr,
|
||||
uint64_t write_value);
|
||||
|
||||
static size_t BuildAcquireMem(
|
||||
uint8_t major,
|
||||
void *pBuffer);
|
||||
|
||||
static size_t BuildScratch(
|
||||
void *pScratchBase,
|
||||
void *pBuffer);
|
||||
|
||||
static size_t BuildComputeShaderParams(
|
||||
void *pBuffer);
|
||||
|
||||
static size_t BuildDispatch(
|
||||
struct DispatchInfo *pInfo,
|
||||
void *pBuffer);
|
||||
|
||||
static size_t BuildAtomicMem(
|
||||
uint64_t *pAddr,
|
||||
uint32_t atomic,
|
||||
void *pBuffer,
|
||||
uint32_t cachePolicy = cache_policy__mec_atomic_mem__stream,
|
||||
uint64_t srcData = 1);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,246 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_DEVICE_H_
|
||||
#define _WSL_INC_WDDM_DEVICE_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <ntstatus.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/thunk_proxy/thunk_proxy.h"
|
||||
#include "impl/wddm/va_mgr.h"
|
||||
#include "impl/wddm/status.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/gpu_memory.h"
|
||||
#include "impl/wddm/cmd_util.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
//class Queue;
|
||||
class WDDMQueue;
|
||||
|
||||
// WSL2 hyperv GPADL protocol limitation
|
||||
#define MAX_USERPTR_BLOCK_SIZE 0xf0000000
|
||||
#define START_NON_CANONICAL_ADDR (1ULL << 47)
|
||||
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))
|
||||
#define IS_OVERLAPPING(start1, size1, start2, size2) \
|
||||
((start1 < (start2 + size2)) && (start2 < (start1 + size1)))
|
||||
|
||||
struct SegmentInfo {
|
||||
uint32_t segment_id;
|
||||
uint32_t segment_type; // 0=aperture, 1=gpu memory, 2=system memory
|
||||
bool aperture;
|
||||
bool system_memory;
|
||||
uint64_t commit_limit;
|
||||
|
||||
SegmentInfo()
|
||||
: segment_id(0), segment_type(0), aperture(false),
|
||||
system_memory(false), commit_limit(0) {}
|
||||
};
|
||||
|
||||
class WDDMDevice {
|
||||
public:
|
||||
static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB
|
||||
|
||||
WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_id);
|
||||
~WDDMDevice();
|
||||
|
||||
int NodeId() const { return node_id_; }
|
||||
int Major() { return device_info_.major; }
|
||||
int Minor() { return device_info_.minor; }
|
||||
int Stepping() { return device_info_.stepping; }
|
||||
bool IsDgpu() { return device_info_.is_dgpu; }
|
||||
const char *ProductName() { return device_info_.product_name; }
|
||||
uint64_t Uuid() { return device_info_.uuid; }
|
||||
uint32_t GfxFamily() { return device_info_.family; }
|
||||
uint32_t DeviceId() { return device_info_.device_id; }
|
||||
uint32_t WavefrontSize() { return device_info_.wavefront_size; }
|
||||
uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; }
|
||||
uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; }
|
||||
uint32_t WatchPointsNum() { return device_info_.watch_points_num; }
|
||||
uint32_t PciBusAddr() { return device_info_.pci_bus_addr; }
|
||||
|
||||
uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; }
|
||||
uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; }
|
||||
uint32_t WavePerCu() { return device_info_.wave_per_cu; }
|
||||
uint32_t SimdPerCu() { return device_info_.simd_per_cu; }
|
||||
uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; }
|
||||
uint32_t NumShaderEngine() { return device_info_.num_shader_engine; }
|
||||
uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; }
|
||||
uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); }
|
||||
uint32_t Domain() { return device_info_.domain; }
|
||||
uint32_t NumGws() { return device_info_.num_gws; }
|
||||
uint32_t AsicRevision() { return device_info_.asic_revision; }
|
||||
uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; }
|
||||
uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; }
|
||||
uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; }
|
||||
uint64_t NonLocalHeapSize() { return device_info_.non_local_heap_size; }
|
||||
uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; }
|
||||
uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; }
|
||||
uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; }
|
||||
uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; }
|
||||
uint32_t LdsSize() { return device_info_.lds_size; }
|
||||
uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; }
|
||||
uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; }
|
||||
uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; }
|
||||
uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; }
|
||||
uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; }
|
||||
uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; }
|
||||
uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; }
|
||||
uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; }
|
||||
bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; }
|
||||
bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; }
|
||||
uint32_t GetSdmaEngine(uint32_t idx) {
|
||||
assert(idx < NumSdmaEngine());
|
||||
return device_info_.sdma_schedid[idx];
|
||||
}
|
||||
uint32_t GetComputeEngine() { return device_info_.compute_schedid; }
|
||||
|
||||
uint64_t VramAvail();
|
||||
|
||||
void GetClockCounters(uint64_t *gpu, uint64_t *cpu);
|
||||
uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; }
|
||||
|
||||
bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr);
|
||||
void DestroySyncobj(D3DKMT_HANDLE handle);
|
||||
|
||||
bool CreateQueue(WDDMQueue *queue);
|
||||
void DestroyQueue(WDDMQueue *queue);
|
||||
bool CreateHwQueue(WDDMQueue *queue);
|
||||
bool DestroyHwQueue(WDDMQueue *queue);
|
||||
bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
|
||||
uint64_t command_size, uint64_t fence_value);
|
||||
bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
|
||||
uint64_t command_size, uint64_t fence_value);
|
||||
|
||||
bool WaitPagingFence(WDDMQueue *queue) {
|
||||
uint64_t value = page_fence_value_;
|
||||
|
||||
if (*page_fence_addr_ < value &&
|
||||
!GpuWait(queue, &page_syncobj_, &value, 1))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
|
||||
uint64_t *values, int count);
|
||||
bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
|
||||
uint64_t *value, int count);
|
||||
bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
|
||||
int count, bool wait_any);
|
||||
bool WaitOnPagingFenceFromCpu();
|
||||
|
||||
uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt);
|
||||
uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; }
|
||||
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; }
|
||||
static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; }
|
||||
|
||||
// Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory,
|
||||
// return false by default
|
||||
bool AllocUserQueueMemFromUMD(void) const { return false; }
|
||||
|
||||
bool IsHwsEnabled(int engine) {
|
||||
return thunk_proxy::GetHwsEnabled(engine, &device_info_);
|
||||
}
|
||||
|
||||
void UpdatePageFence(uint64_t fence_value);
|
||||
|
||||
D3DKMT_HANDLE PagingQueue() const { return page_queue_; }
|
||||
D3DKMT_HANDLE PagingFence() const { return page_syncobj_; }
|
||||
D3DKMT_HANDLE DeviceHandle() const { return device_; }
|
||||
LUID GetLuid() const { return adapter_luid_; }
|
||||
D3DKMT_HANDLE GetAdapter() const { return adapter_; }
|
||||
|
||||
const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; }
|
||||
|
||||
ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem, gpusize *gpu_va = nullptr);
|
||||
|
||||
private:
|
||||
bool ParseDeviceInfo(void);
|
||||
void DestroyDeviceInfo(void);
|
||||
bool CreateDevice(void);
|
||||
bool DestroyDevice(void);
|
||||
bool CreatePagingQueue(void);
|
||||
bool DestroyPagingQueue(void);
|
||||
void *Lock(D3DKMT_HANDLE handle);
|
||||
bool Unlock(D3DKMT_HANDLE handle);
|
||||
bool CreateContext(int engine, D3DKMT_HANDLE *handle);
|
||||
bool DestroyContext(D3DKMT_HANDLE handle);
|
||||
|
||||
void SetPowerOptimization(bool restore);
|
||||
void InitCmdbufInfo(void);
|
||||
|
||||
bool QuerySegmentInfo();
|
||||
bool GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type, uint32_t &segment_id);
|
||||
|
||||
D3DKMT_HANDLE adapter_;
|
||||
LUID adapter_luid_;
|
||||
D3DKMT_HANDLE device_;
|
||||
|
||||
D3DKMT_HANDLE page_queue_;
|
||||
D3DKMT_HANDLE page_syncobj_;
|
||||
uint64_t *page_fence_addr_;
|
||||
std::atomic<uint64_t> page_fence_value_;
|
||||
|
||||
uint32_t cmdbuf_size_;
|
||||
uint32_t cmdbuf_aql_frame_size_;
|
||||
static const uint32_t cmdbuf_aql_frame_num_;
|
||||
uint32_t node_id_;
|
||||
// device info
|
||||
thunk_proxy::DeviceInfo device_info_;
|
||||
std::vector<struct SegmentInfo> segment_infos_;
|
||||
//CmdUtil cmd_util;
|
||||
};
|
||||
|
||||
NTSTATUS WDDMCreateDevices(std::vector<WDDMDevice *> &devices);
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,249 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_GPU_MEMORY_H_
|
||||
#define _WSL_INC_WDDM_GPU_MEMORY_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include "util/utils.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/thunks.h"
|
||||
#include "impl/thunk_proxy/thunk_proxy.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class WDDMDevice;
|
||||
|
||||
union GpuMemoryCreateFlags {
|
||||
struct {
|
||||
uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer
|
||||
uint64_t physical_only : 1; // only allocate physical buffer, without virutal address
|
||||
uint64_t interprocess : 1; // physical buffer need share info between exporter and importer
|
||||
uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area
|
||||
uint64_t physical_contiguous : 1; // contiguous physical pages
|
||||
uint64_t sysmem_ipc_sig_importer : 1; // allocate system memory for IPC signal
|
||||
uint64_t sysmem_ipc_sig_exporter : 1; // allocate system memory for IPC signal, prepare to export
|
||||
uint64_t alloc_va : 1; // allocate va. 0 for vmem import
|
||||
uint64_t blit_kernel_object : 1; // allocate executable blit kernel object
|
||||
uint64_t unused : 55;
|
||||
};
|
||||
uint64_t reserved;
|
||||
};
|
||||
|
||||
union GpuMemoryDescFlags {
|
||||
struct {
|
||||
uint32_t is_virtual : 1;
|
||||
uint32_t is_shared : 1;
|
||||
uint32_t is_external : 1;
|
||||
uint32_t is_physical_only : 1;
|
||||
uint32_t is_locked : 1;
|
||||
uint32_t is_queue_referenced : 1;
|
||||
uint32_t is_physical_contiguous : 1;
|
||||
uint32_t is_imported_sys_memfd : 1; // 0 - ignored; 1 - va from system heap
|
||||
uint32_t is_sysmem_exporter : 1; // allocate system memory for IPC signal, prepare to export
|
||||
uint32_t is_va_required :1;
|
||||
uint32_t is_imported_vram_vmem :1;
|
||||
uint32_t is_imported_vram_ipc :1;
|
||||
uint32_t is_imported_from_same_process : 3; // imported from same process, record shared cnt
|
||||
uint32_t is_blit_kernel_object : 1; // blit kernel object
|
||||
uint32_t unused : 16;
|
||||
};
|
||||
|
||||
uint32_t reserved;
|
||||
};
|
||||
|
||||
struct GpuMemoryCreateInfo {
|
||||
GpuMemoryCreateInfo() {
|
||||
flags.reserved = 0;
|
||||
domain = thunk_proxy::kLocal;
|
||||
size = 0;
|
||||
alignment = 0;
|
||||
mem_flags = 0;
|
||||
engine_flag = 0;
|
||||
va_hint = 0;
|
||||
user_ptr = nullptr;
|
||||
dmabuf_fd = -1;
|
||||
}
|
||||
|
||||
GpuMemoryCreateFlags flags;
|
||||
thunk_proxy::AllocDomain domain;
|
||||
gpusize size;
|
||||
gpusize alignment;
|
||||
int mem_flags;
|
||||
int engine_flag;
|
||||
int dmabuf_fd; // Import from dmabuf
|
||||
|
||||
void *user_ptr;
|
||||
gpusize va_hint;
|
||||
};
|
||||
|
||||
struct GpuMemoryDesc {
|
||||
GpuMemoryDesc() {
|
||||
gpu_addr = 0;
|
||||
cpu_addr = nullptr;
|
||||
client_size = 0;
|
||||
size = alignment = 0;
|
||||
flags.reserved = 0;
|
||||
mem_flags = 0;
|
||||
engine_flag = 0;
|
||||
handle_ape_addr = 0;
|
||||
}
|
||||
|
||||
thunk_proxy::AllocDomain domain;
|
||||
LUID adapter_luid; // Where is the backing store location
|
||||
gpusize gpu_addr;
|
||||
void *cpu_addr;
|
||||
gpusize client_size; // user request size
|
||||
gpusize size;
|
||||
gpusize alignment;
|
||||
gpusize handle_ape_addr;
|
||||
|
||||
GpuMemoryDescFlags flags;
|
||||
int mem_flags;
|
||||
int engine_flag;
|
||||
};
|
||||
|
||||
struct SharedHandleInfo {
|
||||
thunk_proxy::AllocDomain domain;
|
||||
LUID adapter_luid;
|
||||
gpusize client_size; // user request size
|
||||
uint64_t size;
|
||||
uint32_t flags;
|
||||
int mem_flags;
|
||||
pid_t pid;
|
||||
gpusize gpu_addr;
|
||||
};
|
||||
|
||||
using GpuMemoryHandle = void *;
|
||||
|
||||
class GpuMemory {
|
||||
public:
|
||||
static size_t CalcChunkNumbers(gpusize size);
|
||||
|
||||
ErrorCode Init(const GpuMemoryCreateInfo &create_info);
|
||||
|
||||
WDDMDevice *GetDevice() const { return device_; }
|
||||
gpusize Size() const { return desc_.size; }
|
||||
gpusize ClientSize() const { return desc_.client_size; }
|
||||
uint64_t GpuAddress() const { return desc_.gpu_addr; }
|
||||
void *CpuAddress() const { return desc_.cpu_addr; }
|
||||
uint64_t HandleApeAddress() const { return desc_.handle_ape_addr; }
|
||||
|
||||
inline bool IsLocal() const { return desc_.domain == thunk_proxy::kLocal; }
|
||||
inline bool IsUserMemory() const { return desc_.domain == thunk_proxy::kUserMemory; }
|
||||
inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; }
|
||||
inline bool IsSysMemFd() const { return desc_.flags.is_imported_sys_memfd; }
|
||||
inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; }
|
||||
inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; }
|
||||
inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; }
|
||||
inline bool IsVirtual() const { return desc_.flags.is_virtual; }
|
||||
inline bool IsShared() const { return desc_.flags.is_shared; }
|
||||
inline bool IsExternal() const { return desc_.flags.is_external; }
|
||||
inline bool IsVaAllocated() const { return desc_.flags.is_va_required; }
|
||||
inline bool IsBlitKernelObject() const { return desc_.flags.is_blit_kernel_object; }
|
||||
|
||||
inline uint32_t Flags() const { return desc_.flags.reserved; }
|
||||
inline int GetAllocInfo() const { return desc_.mem_flags; }
|
||||
inline bool IsFineGrain() const { return (desc_.mem_flags & thunk_proxy::kFineGrain); }
|
||||
inline bool IsSameAdapter(const LUID &luid) const {
|
||||
return (desc_.adapter_luid.HighPart == luid.HighPart &&
|
||||
desc_.adapter_luid.LowPart == luid.LowPart);
|
||||
}
|
||||
inline void GetQueueReference() { desc_.flags.is_queue_referenced = 1; }
|
||||
inline void PutQueueReference() { desc_.flags.is_queue_referenced = 0; }
|
||||
inline bool IsQueueReferenced() const { return desc_.flags.is_queue_referenced; }
|
||||
inline void IncSharedReference() { desc_.flags.is_imported_from_same_process++; }
|
||||
inline uint32_t DecSharedReference() { return (desc_.flags.is_imported_from_same_process == 0) ? 0 : --desc_.flags.is_imported_from_same_process; }
|
||||
inline bool IsSharedFromSameProcess() const { return desc_.flags.is_imported_from_same_process > 0; }
|
||||
|
||||
WinAllocationHandle GetAllocationHandle(size_t index) const { return alloc_handles_ptr_[index]; }
|
||||
size_t NumChunks() const { return num_allocations_; }
|
||||
|
||||
const GpuMemoryHandle GetGpuMemoryHandle() const {
|
||||
return reinterpret_cast<GpuMemoryHandle>(const_cast<GpuMemory*>(this));
|
||||
}
|
||||
|
||||
static GpuMemory *Convert(GpuMemoryHandle handle) { return reinterpret_cast<GpuMemory *>(handle); }
|
||||
|
||||
ErrorCode ReserveGpuVirtualAddress(gpusize base_virt_addr, gpusize va_size, gpusize alignment);
|
||||
ErrorCode FreeGpuVirtualAddress(gpusize va_start_address, gpusize va_size);
|
||||
|
||||
ErrorCode MapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
|
||||
ErrorCode UnmapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
|
||||
|
||||
ErrorCode MakeResident();
|
||||
ErrorCode Evict();
|
||||
|
||||
ErrorCode ExportPhysicalHandle(int* dmabuf_fd, uint32_t flags = SHARED_ALLOCATION_ALL_ACCESS);
|
||||
ErrorCode ImportPhysicalHandle(const GpuMemoryCreateInfo &create_info, gpusize *gpu_addr = nullptr);
|
||||
~GpuMemory();
|
||||
protected:
|
||||
explicit GpuMemory(WDDMDevice *device);
|
||||
private:
|
||||
ErrorCode CreatePhysicalMemory();
|
||||
ErrorCode FreePhysicalMemory();
|
||||
|
||||
uint64_t AdjustSize(gpusize size) const;
|
||||
private:
|
||||
friend class WDDMDevice;
|
||||
|
||||
WDDMDevice *const device_;
|
||||
|
||||
GpuMemoryDesc desc_;
|
||||
|
||||
size_t num_allocations_;
|
||||
WinAllocationHandle *alloc_handles_ptr_;
|
||||
WinAllocationHandle alloc_handle_; // Optimization for num_allocations_ is 1
|
||||
|
||||
WinResourceHandle resource_; // Handle to a resource object that wraps the allocation. Used for shared resources
|
||||
|
||||
int mem_fd_; // IPC sigal's sys mem fd
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(GpuMemory);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,370 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef _WSL_INC_WDDM_QUEUE_H_
|
||||
#define _WSL_INC_WDDM_QUEUE_H_
|
||||
|
||||
#include <cinttypes>
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "impl/wddm/gpu_memory.h"
|
||||
#include "impl/hsa/hsa_ext_amd.h"
|
||||
#include "impl/hsa/amd_hsa_queue.h"
|
||||
#include "impl/hsa/amd_hsa_signal.h"
|
||||
#include "impl/wddm/cmd_util.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class Queue;
|
||||
class WDDMDevice;
|
||||
|
||||
class WDDMQueue {
|
||||
public:
|
||||
WDDMQueue(WDDMDevice *device,
|
||||
uint64_t cmdbuf_addr,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true) :
|
||||
device(device),
|
||||
context(0),
|
||||
queue(0),
|
||||
syncobj(0),
|
||||
sync_addr(NULL),
|
||||
cmdbuf(0),
|
||||
cmdbuf_addr(cmdbuf_addr),
|
||||
cmdbuf_size(cmdbuf_size),
|
||||
queue_engine(engine),
|
||||
use_hws(use_hws),
|
||||
prio(thunk_proxy::kNormal) {
|
||||
|
||||
}
|
||||
|
||||
virtual ~WDDMQueue() { }
|
||||
|
||||
virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual void RingDoorbell() { }
|
||||
virtual void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
|
||||
|
||||
hsa_status_t SwsInit(void);
|
||||
hsa_status_t SwsFini(void);
|
||||
hsa_status_t SwsSubmit(uint64_t command_addr,
|
||||
uint64_t command_size,
|
||||
uint64_t fence_value);
|
||||
|
||||
hsa_status_t HwsInit(void);
|
||||
hsa_status_t HwsFini(void);
|
||||
hsa_status_t HwsSubmit(uint64_t command_addr,
|
||||
uint64_t command_size,
|
||||
uint64_t fence_value);
|
||||
hsa_status_t SetPriority(hsa_amd_queue_priority_t priority);
|
||||
|
||||
uint64_t *GetSyncAddr(void) const { return sync_addr; }
|
||||
uint64_t GetCmdbufAddr(void) const { return cmdbuf_addr; }
|
||||
|
||||
thunk_proxy::SchedLevel ConvertSchedLevel(hsa_amd_queue_priority_t prio) const {
|
||||
switch (prio) {
|
||||
case HSA_AMD_QUEUE_PRIORITY_LOW:
|
||||
return thunk_proxy::kLow;
|
||||
case HSA_AMD_QUEUE_PRIORITY_HIGH:
|
||||
return thunk_proxy::kHigh;
|
||||
case HSA_AMD_QUEUE_PRIORITY_NORMAL:
|
||||
default:
|
||||
return thunk_proxy::kNormal;
|
||||
}
|
||||
}
|
||||
|
||||
WDDMDevice *device;
|
||||
|
||||
D3DKMT_HANDLE context;
|
||||
D3DKMT_HANDLE queue;
|
||||
|
||||
D3DKMT_HANDLE syncobj;
|
||||
uint64_t *sync_addr;
|
||||
|
||||
GpuMemoryHandle cmdbuf;
|
||||
uint64_t cmdbuf_addr;
|
||||
uint32_t cmdbuf_size;
|
||||
|
||||
GpuMemoryHandle queue_mem;
|
||||
uint64_t queue_addr;
|
||||
|
||||
uint32_t queue_engine;
|
||||
|
||||
bool use_hws;
|
||||
thunk_proxy::SchedLevel prio;
|
||||
};
|
||||
|
||||
class ComputeQueue : public WDDMQueue {
|
||||
public:
|
||||
ComputeQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t ring_size,
|
||||
std::atomic<uint64_t> *ring_wptr,
|
||||
std::atomic<uint64_t> *ring_rptr,
|
||||
volatile int64_t *error_addr,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true);
|
||||
|
||||
~ComputeQueue();
|
||||
|
||||
virtual hsa_status_t Init(void);
|
||||
virtual hsa_status_t Fini(void);
|
||||
virtual hsa_status_t Submit(void);
|
||||
|
||||
void* GetRing(void) const { return ring; }
|
||||
uint64_t GetRingSize(void) const { return ring_size; }
|
||||
std::atomic<uint64_t>* GetRingWptr(void) const { return ring_wptr; }
|
||||
std::atomic<uint64_t>* GetRingRptr(void) const { return ring_rptr; }
|
||||
|
||||
uint64_t GetAqlWriteIndex(void) const { return cmdbuf_aql_frame_write_index; }
|
||||
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size; }
|
||||
void* GetHsaQueueAddr(void) const { return ring; }
|
||||
|
||||
bool IsInvalidPacket(void) const {
|
||||
uint16_t *packet = (uint16_t *)((char *)ring +
|
||||
(cmdbuf_aql_frame_write_index % ring_size) * 64);
|
||||
return ((*packet >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1))
|
||||
== HSA_PACKET_TYPE_INVALID;
|
||||
}
|
||||
|
||||
hsa_status_t Process(void);
|
||||
uint64_t * GetDoorbellPtr() const { return (uint64_t *)&doorbell_signal_value_; }
|
||||
void RingDoorbell();
|
||||
private:
|
||||
hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet);
|
||||
hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false);
|
||||
|
||||
uint64_t CalcDispatchGroups(hsa_kernel_dispatch_packet_t *packet);
|
||||
uint64_t CalcDispatchWavesPerGroup(hsa_kernel_dispatch_packet_t *packet, bool wave32);
|
||||
|
||||
struct amd_aql_pm4_ib {
|
||||
uint16_t header;
|
||||
uint16_t ven_hdr;
|
||||
uint32_t ib_jump_cmd[4];
|
||||
uint32_t dw_cnt_remain;
|
||||
uint32_t reserved[8];
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
hsa_status_t VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *packet);
|
||||
hsa_status_t SwitchAql2PM4(void);
|
||||
|
||||
hsa_status_t PreSubmit(void);
|
||||
hsa_status_t EndSubmit(void);
|
||||
|
||||
void *ring;
|
||||
uint64_t ring_size;
|
||||
std::atomic<uint64_t> *ring_wptr;
|
||||
std::atomic<uint64_t> *ring_rptr;
|
||||
|
||||
// ib_start_addr is the current ib start address
|
||||
uint64_t ib_start_addr;
|
||||
|
||||
// ib_size is the current ib size.
|
||||
uint64_t ib_size;
|
||||
|
||||
// record the last submitted aql frame write index
|
||||
uint64_t sync_point;
|
||||
|
||||
uint64_t cmdbuf_aql_frame_write_index;
|
||||
uint32_t cmdbuf_aql_frame_size;
|
||||
|
||||
uint64_t *signal_addr_;
|
||||
bool platform_atomic_support_;
|
||||
bool needs_barrier;
|
||||
bool ready_to_submit;
|
||||
|
||||
CmdUtil cmd_util;
|
||||
|
||||
private:
|
||||
bool EnableProfiling() {
|
||||
return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING);
|
||||
}
|
||||
void HandleError(hsa_status_t status);
|
||||
bool UpdateScratch(hsa_kernel_dispatch_packet_t *packet, bool wave32);
|
||||
|
||||
uint32_t UpdateIndexStride(uint32_t srd, bool wave32);
|
||||
|
||||
void *ScratchBase() { return scratch_base_; }
|
||||
|
||||
void AppendCmdbufSratchBaseOffset(int offset) {
|
||||
scratch_base_offset_array_.push_back(offset);
|
||||
}
|
||||
|
||||
bool RelocateCmdbufScratchBase(uint64_t addr);
|
||||
|
||||
uint32_t ScratchSizePerWave() { return scratch_size_per_wave_; }
|
||||
uint64_t GetKernelObjAddr(uint64_t addr) const;
|
||||
void InitScratchSRD();
|
||||
GpuMemoryHandle amd_queue_mem_;
|
||||
amd_queue_v2_t *amd_queue_;
|
||||
amd_queue_v2_t *amd_queue_rocr_;
|
||||
uint64_t doorbell_signal_value_;
|
||||
volatile std::atomic<int64_t> *error_code_;
|
||||
std::thread aql_to_pm4_thread_;
|
||||
bool thread_stop_;
|
||||
std::mutex thread_cond_lock_;
|
||||
std::condition_variable thread_cond_;
|
||||
static void AqlToPm4Thread(ComputeQueue *queue);
|
||||
|
||||
uint64_t max_scratch_waves_;
|
||||
uint64_t dispatch_waves_;
|
||||
uint64_t scratch_size_per_wave_;
|
||||
uint64_t scratch_size_;
|
||||
uint64_t total_scratch_size_;
|
||||
void *scratch_base_;
|
||||
uint32_t scratch_mem_alignment_size_;
|
||||
GpuMemoryHandle scratch_mem_;
|
||||
|
||||
std::vector<int> scratch_base_offset_array_;
|
||||
};
|
||||
|
||||
class SDMAQueue : public WDDMQueue {
|
||||
public:
|
||||
SDMAQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true);
|
||||
|
||||
virtual ~SDMAQueue();
|
||||
|
||||
hsa_status_t Init(void);
|
||||
hsa_status_t Fini(void);
|
||||
hsa_status_t Submit(void);
|
||||
|
||||
int PreparePacket(uint32_t offset, uint64_t size);
|
||||
|
||||
void WaitQueue(void) {
|
||||
device->CpuWait(&syncobj, &rptr_next, 1, false);
|
||||
}
|
||||
|
||||
uint64_t * GetRingWptr(void) { return &wptr_next_; }
|
||||
uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); }
|
||||
uint64_t * GetDoorbellPtr() { return &doorbell_; }
|
||||
void RingDoorbell();
|
||||
void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
|
||||
|
||||
private:
|
||||
uint64_t wptr_next_;
|
||||
uint64_t wptr_pre_;
|
||||
uint64_t rptr_next;
|
||||
uint64_t doorbell_;
|
||||
std::vector<std::pair<uint64_t, uint64_t>> wptr_queue_;
|
||||
uint64_t ib_size;
|
||||
uint64_t ib_start_addr;
|
||||
|
||||
std::thread thread_;
|
||||
bool thread_stop_;
|
||||
std::mutex thread_cond_lock_;
|
||||
std::condition_variable thread_cond_;
|
||||
static void SdmaThread(SDMAQueue *queue);
|
||||
|
||||
struct SDMA_PKT_POLL_REGMEM {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 10;
|
||||
unsigned int hdp_flush : 1;
|
||||
unsigned int reserved_1 : 1;
|
||||
unsigned int func : 3;
|
||||
unsigned int mem_poll : 1;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int value : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} VALUE_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int mask : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} MASK_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int interval : 16;
|
||||
unsigned int retry_count : 12;
|
||||
unsigned int reserved_0 : 4;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} DW5_UNION;
|
||||
};
|
||||
const unsigned int SDMA_OP_POLL_REGMEM = 8;
|
||||
bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) {
|
||||
return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM &&
|
||||
pkt->HEADER_UNION.mem_poll == 1 &&
|
||||
pkt->HEADER_UNION.func == 3;
|
||||
}
|
||||
uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); }
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,61 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_STATUS_H
|
||||
#define _WSL_INC_WDDM_STATUS_H
|
||||
|
||||
enum class ErrorCode {
|
||||
Success,
|
||||
DeviceLost,
|
||||
UnSupported,
|
||||
NotReady,
|
||||
OutOfMemory,
|
||||
OutOfGpuMemory,
|
||||
OutOfHandleApeMemory,
|
||||
Timeout,
|
||||
SyscallFail,
|
||||
InvalidateParams,
|
||||
SameProcessSameDevice,
|
||||
Unknown,
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,233 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_THUNKS_H
|
||||
#define _WSL_INC_WDDM_THUNKS_H
|
||||
|
||||
#include "impl/wddm/status.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "dxcore_loader.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
inline ErrorCode TranslateNtStatus(NTSTATUS status) {
|
||||
switch (status) {
|
||||
case STATUS_SUCCESS:
|
||||
return ErrorCode::Success;
|
||||
case STATUS_PENDING:
|
||||
return ErrorCode::NotReady;
|
||||
case STATUS_NO_MEMORY:
|
||||
return ErrorCode::OutOfMemory;
|
||||
case STATUS_DEVICE_REMOVED:
|
||||
return ErrorCode::DeviceLost;
|
||||
case STATUS_GRAPHICS_NO_VIDEO_MEMORY:
|
||||
return ErrorCode::OutOfGpuMemory;
|
||||
case STATUS_TIMEOUT:
|
||||
return ErrorCode::Timeout;
|
||||
case STATUS_INVALID_PARAMETER:
|
||||
return ErrorCode::InvalidateParams;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ErrorCode::Unknown;
|
||||
}
|
||||
|
||||
namespace d3dthunk {
|
||||
|
||||
typedef D3DKMT_CREATEALLOCATION CreateAllocationArgs;
|
||||
typedef D3DKMT_CREATECONTEXT CreateContextArgs;
|
||||
typedef D3DKMT_CREATECONTEXTVIRTUAL CreateContextVirtualArgs;
|
||||
typedef D3DKMT_CREATEPAGINGQUEUE CreatePagingQueueArgs;
|
||||
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT CreateSynchronizationObjectArgs;
|
||||
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject2Args;
|
||||
typedef D3DKMT_ESCAPE EscapeArgs;
|
||||
typedef D3DKMT_EVICT EvictArgs;
|
||||
typedef D3DKMT_FREEGPUVIRTUALADDRESS FreeGpuVirtualAddressArgs;
|
||||
typedef D3DKMT_LOCK LockArgs;
|
||||
typedef D3DKMT_LOCK2 Lock2Args;
|
||||
typedef D3DKMT_OPENRESOURCE OpenResourceArgs;
|
||||
typedef D3DKMT_OPENRESOURCEFROMNTHANDLE OpenResourceFromNtHandleArgs;
|
||||
typedef D3DKMT_QUERYADAPTERINFO QueryAdapterInfoArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT SignalSynchronizationObjectArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 SignalSynchronizationObject2Args;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU SignalSynchronizationObjectFromCpuArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU2 SignalSynchronizationObjectFromGpuArgs;
|
||||
typedef D3DKMT_SUBMITCOMMAND SubmitCommandArgs;
|
||||
typedef D3DKMT_UNLOCK UnlockArgs;
|
||||
typedef D3DKMT_UNLOCK2 Unlock2Args;
|
||||
typedef D3DKMT_UPDATEGPUVIRTUALADDRESS UpdateGpuVirtualAddressArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT WaitForSynchronizationObjectArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT2 WaitForSynchronizationObject2Args;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU WaitForSynchronizationObjectFromCpuArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitForSynchronizationObjectFromGpuArgs;
|
||||
typedef D3DKMT_ACQUIREKEYEDMUTEX AcquireKeyedMutexArgs;
|
||||
typedef D3DKMT_RELEASEKEYEDMUTEX ReleaseKeyedMutexArgs;
|
||||
typedef D3DKMT_OPENKEYEDMUTEX OpenKeyedMutexArgs;
|
||||
typedef D3DKMT_DESTROYKEYEDMUTEX DestroyKeyedMutexArgs;
|
||||
typedef D3DKMT_QUERYVIDEOMEMORYINFO QueryVideoMemoryInfoArgs;
|
||||
typedef D3DKMT_CREATEHWQUEUE CreateHwQueueArgs;
|
||||
typedef D3DKMT_DESTROYHWQUEUE DestroyHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITCOMMANDTOHWQUEUE SubmitCommandToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITPRESENTTOHWQUEUE SubmitPresentToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITSIGNALSYNCOBJECTSTOHWQUEUE SubmitSignalSyncObjectsToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITWAITFORSYNCOBJECTSTOHWQUEUE SubmitWaitForSyncObjectsToHwQueueArgs;
|
||||
typedef D3DKMT_CREATESYNCFILE CreateSyncFileArgs;
|
||||
|
||||
inline ErrorCode MapGpuVirtualAddress(D3DDDI_MAPGPUVIRTUALADDRESS *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTMapGpuVirtualAddress(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode CreateAllocation(CreateAllocationArgs *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTCreateAllocation2(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode DestroyAllocation(
|
||||
WinDeviceHandle device,
|
||||
WinResourceHandle resource,
|
||||
size_t num_allocations,
|
||||
const WinAllocationHandle *alloc_handles) {
|
||||
|
||||
D3DKMT_DESTROYALLOCATION2 args{};
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.hDevice = device;
|
||||
if (resource) {
|
||||
args.hResource = resource;
|
||||
} else {
|
||||
args.phAllocationList = alloc_handles;
|
||||
args.AllocationCount = num_allocations;
|
||||
}
|
||||
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTDestroyAllocation2(&args)));
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(D3DDDI_RESERVEGPUVIRTUALADDRESS *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTReserveGpuVirtualAddress(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize size,
|
||||
gpusize base_address,
|
||||
gpusize *out_addr) {
|
||||
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
|
||||
args.hPagingQueue = handle;
|
||||
args.Size = size;
|
||||
args.BaseAddress = base_address;
|
||||
|
||||
auto code = ReserveGpuVirtualAddress(&args);
|
||||
if (code == ErrorCode::Success)
|
||||
*out_addr = args.VirtualAddress;
|
||||
return code;
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize size,
|
||||
gpusize minimum_address,
|
||||
gpusize maximum_address,
|
||||
gpusize *out_addr) {
|
||||
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
|
||||
args.hPagingQueue = handle;
|
||||
args.Size = size;
|
||||
args.MinimumAddress = minimum_address;
|
||||
args.MaximumAddress = maximum_address;
|
||||
|
||||
auto code = ReserveGpuVirtualAddress(&args);
|
||||
if (code == ErrorCode::Success)
|
||||
*out_addr = args.VirtualAddress;
|
||||
return code;
|
||||
}
|
||||
|
||||
inline ErrorCode FreeGpuVirtualAddress(FreeGpuVirtualAddressArgs *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTFreeGpuVirtualAddress(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode FreeGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize base_address,
|
||||
gpusize size) {
|
||||
FreeGpuVirtualAddressArgs args{};
|
||||
args.hAdapter = handle;
|
||||
args.Size = size;
|
||||
args.BaseAddress = base_address;
|
||||
return FreeGpuVirtualAddress(&args);
|
||||
}
|
||||
|
||||
inline ErrorCode MakeResident(D3DDDI_MAKERESIDENT *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTMakeResident(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode Evict(EvictArgs *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTEvict(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode ShareObjects(size_t num_allocations,
|
||||
WinResourceHandle resource,
|
||||
uint32_t flags,
|
||||
int* dmabuf_fd) {
|
||||
OBJECT_ATTRIBUTES obj_attr;
|
||||
HANDLE nt_handle;
|
||||
ErrorCode ret;
|
||||
|
||||
InitializeObjectAttributes(&obj_attr, nullptr, OBJ_INHERIT, nullptr, nullptr);
|
||||
ret = TranslateNtStatus(DXCORE_CALL(D3DKMTShareObjects(num_allocations,
|
||||
&resource, &obj_attr, flags, &nt_handle)));
|
||||
if (ret == ErrorCode::Success)
|
||||
*dmabuf_fd = *(reinterpret_cast<int*>(&nt_handle));
|
||||
else
|
||||
*dmabuf_fd = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline ErrorCode QueryResourceInfoFromNtHandle(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTQueryResourceInfoFromNtHandle(args)));
|
||||
}
|
||||
|
||||
inline ErrorCode OpenResourceFromNtHandle(D3DKMT_OPENRESOURCEFROMNTHANDLE *args) {
|
||||
return TranslateNtStatus(DXCORE_CALL(D3DKMTOpenResourceFromNtHandle(args)));
|
||||
}
|
||||
|
||||
} // namespace d3dthunk
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,101 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_TYPES_H_
|
||||
#define _WSL_INC_WDDM_TYPES_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <ntstatus.h>
|
||||
#include "impl/thunk_proxy/wddm_types.h"
|
||||
// windows wchar is 16bit, but linux is 32bit
|
||||
// seems libdxcore (not dxgkrnl.ko) convert thunk windows wchar to linux one
|
||||
// so only accept 32bit wchar args. note driver private data structure still
|
||||
// use 16bit wchar
|
||||
#define WCHAR wchar_t
|
||||
#define PCWSTR const wchar_t *
|
||||
#include <d3dkmthk.h>
|
||||
#undef WCHAR
|
||||
#undef PCWSTR
|
||||
|
||||
using gpusize = uint64_t; // Used to specify GPU addresses and sizes of GPU allocations
|
||||
using WinAllocationHandle = D3DKMT_HANDLE;
|
||||
using WinResourceHandle = D3DKMT_HANDLE;
|
||||
using WinContextHandle = D3DKMT_HANDLE;
|
||||
using WinDeviceHandle = D3DKMT_HANDLE;
|
||||
using WinAdapterHandle = D3DKMT_HANDLE;
|
||||
|
||||
//reference dk/winnt.h
|
||||
#define STANDARD_RIGHTS_REQUIRED (0x000F0000L)
|
||||
|
||||
//reference dk/ntdef.h
|
||||
#define OBJ_INHERIT (0x00000002L)
|
||||
typedef WCHAR *PWCHAR, *LPWCH, *PWCH;
|
||||
typedef struct _UNICODE_STRING {
|
||||
USHORT Length;
|
||||
USHORT MaximumLength;
|
||||
#ifdef MIDL_PASS
|
||||
[size_is(MaximumLength / 2), length_is((Length) / 2) ] USHORT * Buffer;
|
||||
#else // MIDL_PASS
|
||||
_Field_size_bytes_part_opt_(MaximumLength, Length) PWCH Buffer;
|
||||
#endif // MIDL_PASS
|
||||
} UNICODE_STRING;
|
||||
typedef UNICODE_STRING *PUNICODE_STRING;
|
||||
typedef const UNICODE_STRING *PCUNICODE_STRING;
|
||||
|
||||
typedef struct _OBJECT_ATTRIBUTES {
|
||||
ULONG Length;
|
||||
HANDLE RootDirectory;
|
||||
PUNICODE_STRING ObjectName;
|
||||
ULONG Attributes;
|
||||
PVOID SecurityDescriptor;
|
||||
PVOID SecurityQualityOfService;
|
||||
} OBJECT_ATTRIBUTES;
|
||||
#define InitializeObjectAttributes( p, n, a, r, s ) { \
|
||||
(p)->Length = sizeof( OBJECT_ATTRIBUTES ); \
|
||||
(p)->RootDirectory = r; \
|
||||
(p)->Attributes = a; \
|
||||
(p)->ObjectName = n; \
|
||||
(p)->SecurityDescriptor = s; \
|
||||
(p)->SecurityQualityOfService = NULL; \
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,86 @@
|
||||
#ifndef _WSL_INC_WDDM_VA_MGR_H_
|
||||
#define _WSL_INC_WDDM_VA_MGR_H_
|
||||
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include "util/utils.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class VaMgr {
|
||||
public:
|
||||
VaMgr(uint64_t start, uint64_t size, uint64_t min_align);
|
||||
~VaMgr();
|
||||
|
||||
/* Allocate `bytes` VA, if `align` is not zero, the returned address is aligned by `align`.
|
||||
* If `addr` parameter is not zero, try best to allocate VA from fixed address `addr`.
|
||||
*/
|
||||
uint64_t Alloc(uint64_t bytes, uint64_t align, uint64_t addr = 0);
|
||||
|
||||
void Free(uint64_t addr);
|
||||
|
||||
private:
|
||||
uint64_t AllocImpl(uint64_t bytes, uint64_t align);
|
||||
|
||||
struct Fragment {
|
||||
using ptr = std::multimap<uint64_t, uint64_t>::iterator;
|
||||
ptr free_list_entry_;
|
||||
|
||||
struct {
|
||||
uint64_t size : 63;
|
||||
bool is_free : 1;
|
||||
};
|
||||
|
||||
Fragment() : size(0), is_free(false) {}
|
||||
Fragment(ptr iterator, uint64_t len, bool is_free)
|
||||
: free_list_entry_(iterator), size(len), is_free(is_free) {}
|
||||
};
|
||||
|
||||
static inline Fragment make_fragment(typename Fragment::ptr iter, uint64_t len) {
|
||||
return {iter, len, true};
|
||||
}
|
||||
|
||||
inline Fragment make_fragment(uint64_t len) { return {free_list_.end(), len, false}; }
|
||||
|
||||
static inline bool is_free(const Fragment& f) { return f.is_free; }
|
||||
void set_used(Fragment& f) {
|
||||
f.is_free = false;
|
||||
f.free_list_entry_ = free_list_.end();
|
||||
}
|
||||
static void set_free(Fragment& f, typename Fragment::ptr iter) {
|
||||
f.free_list_entry_ = iter;
|
||||
f.is_free = true;
|
||||
}
|
||||
|
||||
inline void remove_free_list_entry(Fragment& frag) {
|
||||
if (frag.free_list_entry_ != free_list_.end()) {
|
||||
free_list_.erase(frag.free_list_entry_);
|
||||
frag.free_list_entry_ = free_list_.end();
|
||||
}
|
||||
}
|
||||
|
||||
inline void add_free_fragment(uint64_t size, uint64_t base) {
|
||||
auto it = free_list_.insert(std::make_pair(size, base));
|
||||
frag_map_[base] = make_fragment(it, size);
|
||||
}
|
||||
|
||||
inline void add_used_fragment(uint64_t size, uint64_t base) {
|
||||
frag_map_[base] = make_fragment(size);
|
||||
}
|
||||
// Indexed by size
|
||||
std::multimap<uint64_t, uint64_t> free_list_;
|
||||
// Indexed by VA, each fragment has no overlap
|
||||
std::map<uint64_t, Fragment> frag_map_;
|
||||
|
||||
uint64_t min_align_;
|
||||
|
||||
std::mutex lock_; // Mutex protecting allocation and free of va
|
||||
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(VaMgr);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
#endif
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren