Add definitions of HSA functions and types

Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>


[ROCm/ROCR-Runtime commit: 813af2b89a]
Šī revīzija ir iekļauta:
Oded Gabbay
2014-07-29 10:55:52 +03:00
vecāks 5935d171b6
revīzija 5b7d807b42
2 mainīti faili ar 1416 papildinājumiem un 0 dzēšanām
@@ -0,0 +1,565 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _HSAKMT_H_
#define _HSAKMT_H_
#include "hsakmttypes.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
"Opens" the HSA kernel driver for user-kernel mode communication.
On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
is responsible for user-kernel communication, this handle is used internally by
the thunk library to send device I/O control to the HSA kernel driver.
No other thunk library function may be called unless the user-kernel communication
channel is opened first.
On Linux this call opens the "/dev/kfd" device file to establish a communication
path to the kernel.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtOpenKFD( void );
/**
"Closes" the user-kernel communication path.
On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
no other communication with the kernel driver is possible after the successful
execution of the saKmdCloseKFD() function. Depending on the failure reason,
the user-kernel communication path may or may not be still active.
On Linux the function closes the "dev/kfd" device file.
No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
function is called again.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCloseKFD( void );
/**
Returns the user-kernel interface version supported by KFD.
Higher major numbers usually add new features to KFD and may break user-kernel
compatibility; higher minor numbers define additional functionality associated
within a major number.
The calling software should validate that it meets the minimum interface version
as described in the API specification.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetVersion(
HsaVersionInfo* VersionInfo //OUT
);
/**
The function takes a "snapshot" of the topology information within the KFD
to avoid any changes during the enumeration process.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAcquireSystemProperties(
HsaSystemProperties* SystemProperties //OUT
);
/**
Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReleaseSystemProperties( void ) ;
/**
Retrieves the discoverable sub-properties for a given HSA
node. The parameters returned allow the application or runtime to size the
management structures necessary to store the information.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeProperties(
HSAuint32 NodeId, //IN
HsaNodeProperties* NodeProperties //OUT
);
/**
Retrieves the memory properties of a specific HSA node.
the memory pointer passed as MemoryProperties is sized as
NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
hsaKmtGetNodeProperties() call.
Some of the data returned is optional. Not all implementations may return all
parameters in the hsaMemoryProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeMemoryProperties(
HSAuint32 NodeId, //IN
HSAuint32 NumBanks, //IN
HsaMemoryProperties* MemoryProperties //OUT
);
/**
Retrieves the cache properties of a specific HSA node and processor ID.
ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
via the hsaKmtGetNodeProperties() call.
The memory pointer passed as CacheProperties is sized as
NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
hsaKmtGetNodeProperties() call.
The data returned is optional. Not all implementations may return all
parameters in the CacheProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeCacheProperties(
HSAuint32 NodeId, //IN
HSAuint32 ProcessorId, //IN
HSAuint32 NumCaches, //IN
HsaCacheProperties* CacheProperties //OUT
);
/**
Retrieves the HSA IO affinity properties of a specific HSA node.
the memory pointer passed as Properties is sized as
NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
hsaKmtGetNodeProperties() call.
The data returned is optional. Not all implementations may return all
parameters in the IoLinkProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeIoLinkProperties(
HSAuint32 NodeId, //IN
HSAuint32 NumIoLinks, //IN
HsaIoLinkProperties* IoLinkProperties //OUT
);
/**
Creates an operating system event associated with a HSA event ID
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateEvent(
HsaEventDescriptor* EventDesc, //IN
bool ManualReset, //IN
bool IsSignaled, //IN
HsaEvent** Event //OUT
);
/**
Destroys an operating system event associated with a HSA event ID
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyEvent(
HsaEvent* Event //IN
);
/**
Sets the specified event object to the signaled state
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetEvent(
HsaEvent* Event //IN
);
/**
Sets the specified event object to the non-signaled state
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtResetEvent(
HsaEvent* Event //IN
);
/**
Queries the state of the specified event object
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryEventState(
HsaEvent* Event //IN
);
/**
Checks the current state of the event object. If the object's state is
nonsignaled, the calling thread enters the wait state.
The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEvent(
HsaEvent* Event, //IN
HSAuint32 Milliseconds //IN
);
/**
Checks the current state of multiple event objects.
The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
- if "WaitOnAll" is "true" the function returns when the state of all
objects in array is signaled
- if "WaitOnAll" is "false" the function returns when the state of any
one of the objects is set to signaled
- The time-out interval elapses.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEvents(
HsaEvent* Events[], //IN
HSAuint32 NumEvents, //IN
bool WaitOnAll, //IN
HSAuint32 Milliseconds //IN
);
/**
new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
If used on other platforms the function will return HSAKMT_STATUS_ERROR
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReportQueue(
HSA_QUEUEID QueueId, //IN
HsaQueueReport* QueueReport //OUT
);
/**
Creates a GPU queue with user-mode access rights
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateQueue(
HSAuint32 NodeId, //IN
HSA_QUEUE_TYPE Type, //IN
HSAuint32 QueuePercentage, //IN
HSA_QUEUE_PRIORITY Priority, //IN
void* QueueAddress, //IN
HSAuint64 QueueSizeInBytes, //IN
HsaEvent* Event, //IN
HsaQueueResource* QueueResource //OUT
);
/**
Updates a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtUpdateQueue(
HSA_QUEUEID QueueId, //IN
HSAuint32 QueuePercentage,//IN
HSA_QUEUE_PRIORITY Priority, //IN
void* QueueAddress, //IN
HSAuint64 QueueSize, //IN
HsaEvent* Event //IN
);
/**
Destroys a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyQueue(
HSA_QUEUEID QueueId //IN
);
/**
Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryPolicy(
HSAuint32 Node, //IN
HSAuint32 DefaultPolicy, //IN
HSAuint32 AlternatePolicy, //IN
void* MemoryAddressAlternate, //IN (page-aligned)
HSAuint64 MemorySizeInBytes //IN (page-aligned)
);
/**
Allocates a memory buffer that may be accessed by the GPU
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocMemory(
HSAuint32 PreferredNode, //IN
HSAuint64 SizeInBytes, //IN (multiple of page size)
HsaMemFlags MemFlags, //IN
void** MemoryAddress //OUT (page-aligned)
);
/**
Frees a memory buffer
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtFreeMemory(
void* MemoryAddress, //IN (page-aligned)
HSAuint64 SizeInBytes //IN
);
/**
Registers with KFD a memory buffer that may be accessed by the GPU
This function will never be required for Linux
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemory(
void* MemoryAddress, //IN (page-aligned)
HSAuint64 MemorySizeInBytes //IN (page-aligned)
);
/**
Unregisters with KFD a memory buffer
This function will never be required for Linux
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDeregisterMemory(
void* MemoryAddress //IN
);
/**
Ensures that the memory is resident and can be accessed by GPU
Not implemented yet
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapMemoryToGPU(
void* MemoryAddress, //IN (page-aligned)
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
HSAuint64* AlternateVAGPU //OUT (page-aligned)
);
/**
Releases the residency of the memory
Not implemented yet
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtUnmapMemoryToGPU(
void* MemoryAddress //IN (page-aligned)
);
/**
Notifies the kernel driver that a process wants to use GPU debugging facilities
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgRegister(
HSAuint32 NodeId //IN
);
/**
Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgUnregister(
HSAuint32 NodeId //IN
);
/**
Controls a wavefront
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgWavefrontControl(
HSAuint32 NodeId, //IN
HSA_DBG_WAVEOP Operand, //IN
HSA_DBG_WAVEMODE Mode, //IN
HSAuint32 TrapId, //IN
HsaDbgWaveMessage* DbgWaveMsgRing //IN
);
/**
Sets watch points on memory address ranges to generate exception events when the
watched addresses are accessed
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgAddressWatch(
HSAuint32 NodeId, //IN
HSAuint32 NumWatchPoints, //IN
HSA_DBG_WATCH_MODE WatchMode[], //IN
void* WatchAddress[], //IN
HSAuint64 WatchMask[], //IN, optional
HsaEvent* WatchEvent[] //IN, optional
);
/**
Gets GPU and CPU clock counters for particular Node
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetClockCounters(
HSAuint32 NodeId, //IN
HsaClockCounters* Counters //OUT
);
/**
Retrieves information on the available HSA counters
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterProperties(
HSAuint32 NodeId, //IN
HsaCounterProperties** CounterProperties //OUT
);
/**
Registers a set of (HW) counters to be used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcRegisterTrace(
HSAuint32 NodeId, //IN
HSAuint32 NumberOfCounters, //IN
HsaCounter* Counters, //IN
HsaPmcTraceRoot* TraceRoot //OUT
);
/**
Unregisters a set of (HW) counters used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcUnregisterTrace(
HSAuint32 NodeId, //IN
HSATraceId TraceId //IN
);
/**
Allows a user mode process to get exclusive access to the defined set of (HW) counters
used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcAcquireTraceAccess(
HSAuint32 NodeId, //IN
HSATraceId TraceId //IN
);
/**
Allows a user mode process to release exclusive access to the defined set of (HW) counters
used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcReleaseTraceAccess(
HSAuint32 NodeId, //IN
HSATraceId TraceId //IN
);
/**
Starts tracing operation on a previously established set of performance counters
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcStartTrace(
HSATraceId TraceId, //IN
void* TraceBuffer, //IN (page aligned)
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
);
/**
Forces an update of all the counters that a previously started trace operation has registered
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcQueryTrace(
HSATraceId TraceId //IN
);
/**
Stops tracing operation on a previously established set of performance counters
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcStopTrace(
HSATraceId TraceId //IN
);
#ifdef __cplusplus
} //extern "C"
#endif
#endif //_HSAKMT_H_
@@ -0,0 +1,851 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _HSAKMTTYPES_H_
#define _HSAKMTTYPES_H_
//the definitions and THUNK API are version specific - define the version numbers here
#define HSAKMT_VERSION_MAJOR 0
#define HSAKMT_VERSION_MINOR 99
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
#if defined(_WIN32)
#define HSAKMTAPI __stdcall
#else
#define HSAKMTAPI
#endif
typedef unsigned char HSAuint8;
typedef char HSAint8;
typedef unsigned short HSAuint16;
typedef signed short HSAint16;
typedef unsigned __int32 HSAuint32;
typedef signed __int64 HSAint64;
typedef unsigned __int64 HSAuint64;
#elif defined(__linux__)
#include <stdbool.h>
#include <stdint.h>
#define HSAKMTAPI
typedef uint8_t HSAuint8;
typedef int8_t HSAint8;
typedef uint16_t HSAuint16;
typedef int16_t HSAint16;
typedef uint32_t HSAuint32;
typedef int64_t HSAint64;
typedef uint64_t HSAuint64;
#endif
typedef void* HSA_HANDLE;
typedef HSAuint64 HSA_QUEUEID;
// This is included in order to force the alignments to be 4 bytes so that
// it avoids extra padding added by the compiler when a 64-bit binary is generated.
#pragma pack(push, hsakmttypes_h, 4)
//
// HSA STATUS codes returned by the KFD Interfaces
//
typedef enum _HSAKMT_STATUS
{
HSAKMT_STATUS_SUCCESS = 0, // Operation successful
HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified
HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver
HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid
HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid
HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid
HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory)
HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small
HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters
HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node
HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but
// may be at a later time)
HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure
HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened
HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available
// (IOMMU driver not installed or not-available)
HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed
HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out
HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered
HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered
HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned
} HSAKMT_STATUS;
//
// HSA KFD interface version information. Calling software has to validate that it meets
// the minimum interface version as described in the API specification.
// All future structures will be extended in a backward compatible fashion.
//
typedef struct _HsaVersionInfo
{
HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version
HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version
} HsaVersionInfo;
//
// HSA Topology Discovery Infrastructure structure definitions.
// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
// in the "Heterogeneous System Architecture Detail Topology" specification.
//
// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
// When the call is made within a process context, a "snapshot" of the topology information
// is taken within the KFD to avoid any changes during the enumeration process.
// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
// or when the process exits or is terminated.
//
typedef struct _HsaSystemProperties
{
HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes.
// each node represents a discoverable node of the system
// All other enumeration is done on a per-node basis
HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT
HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT
HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID
} HsaSystemProperties;
typedef union
{
HSAuint32 Value;
struct
{
unsigned int HotPluggable : 1; // the node may be removed by some system action
// (event will be sent)
unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible
// translation agent in the system (e.g. IOMMUv2)
unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary
// graphics render (= UI)
unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value
unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB
unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle
unsigned int VALimit : 1; // This node GPU has limited VA range for platform
// (typical 40bit). Affects shared VM use for 64bit apps
unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node.
unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value
unsigned int Reserved : 20;
} ui32;
} HSA_CAPABILITY;
//
// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
// The application or runtime can use the information herein to size the topology management structures
// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
// of throughput compute units (= SIMDs) associated with a H-NUMA node.
//
#define HSA_PUBLIC_NAME_SIZE 128
typedef struct _HsaNodeProperties
{
HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node.
// This value is 0 for a HSA node with no such cores,
// e.g a "discrete HSA GPU"
HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
// This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node.
HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node.
HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node
// connecting to other nodes.
HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU)
// cores available on this node
HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU)
// units available on this node
HSA_CAPABILITY Capability; // see above
HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD.
// If NumFComputeCores is 0, this value is ignored.
HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront
HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64,
// may be 32 or a different value for some HSA based architectures
HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2
HSAuint32 NumArrays; // Number of SIMD arrays per engine
HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array
HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU)
HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
// may be 0 if HW has no restrictions
HSAuint32 EngineId; // Identifier (rev) of teh GPU uEngine or Firmware, may be 0
HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes
HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes
HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device
// location in the overall system
HSAuint64 LocalMemSize; // Local memory size
HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and
HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities,
HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name).
// Unicode string
} HsaNodeProperties;
typedef enum _HSA_HEAPTYPE
{
HSA_HEAPTYPE_SYSTEM = 0,
HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
// All HSA accessible memory is per definition "CPU visible"
// "Private memory" is relevant for graphics interop only.
HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS)
HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS)
HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch)
HSA_HEAPTYPE_NUMHEAPTYPES,
HSA_HEAPTYPE_SIZE = 0xFFFFFFFF
} HSA_HEAPTYPE;
typedef union
{
HSAuint32 MemoryProperty;
struct
{
unsigned int HotPluggable : 1; // the memory may be removed by some system action,
// memory should be used for temporary data
unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle.
unsigned int Reserved :30;
} ui32;
} HSA_MEMORYPROPERTY;
//
// Discoverable HSA Memory properties.
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
//
typedef struct _HsaMemoryProperties
{
HSA_HEAPTYPE HeapType; // system or frame buffer,
union
{
HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes
struct
{
HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit)
HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
} ui32;
};
HSA_MEMORYPROPERTY Flags; // See definitions above
HSAuint32 Width; // memory width - the number of parallel bits of the memory interface
HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth
// to the memory when needed
HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory
// in process virtual space
} HsaMemoryProperties;
//
// Discoverable Cache Properties. (optional).
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
// Any of the parameters may be 0 (= not defined)
//
#define HSA_CPU_SIBLINGS 256
#define HSA_PROCESSORID_ALL 0xFFFFFFFF
typedef union
{
HSAuint32 Value;
struct
{
unsigned int Data : 1;
unsigned int Instruction : 1;
unsigned int CPU : 1;
unsigned int HSACU : 1;
unsigned int Reserved :28;
} ui32;
} HsaCacheType;
typedef struct _HaCacheProperties
{
HSAuint32 ProcessorIdLow; // Identifies the processor number
HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc
HSAuint32 CacheSize; // Size of the cache
HSAuint32 CacheLineSize; // Cache line size in bytes
HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag
HSAuint32 CacheAssociativity; // Cache Associativity
HSAuint32 CacheLatency; // Cache latency in ns
HsaCacheType CacheType;
HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
} HsaCacheProperties;
//
// Discoverable CPU Compute Properties. (optional).
// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
// Any of the parameters may be 0 (= not defined)
//
typedef struct _HsaCComputeProperties
{
HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
} HsaCComputeProperties;
//
// Discoverable IoLink Properties (optional).
// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
// Any of the parameters may be 0 (= not defined)
//
typedef enum _HSA_IOLINKTYPE {
HSA_IOLINKTYPE_UNDEFINED = 0,
HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
HSA_IOLINKTYPE_PCIEXPRESS = 2,
HSA_IOLINKTYPE_AMBA = 3,
HSA_IOLINKTYPE_MIPI = 4,
HSA_IOLINKTYPE_OTHER = 5,
HSA_IOLINKTYPE_NUMIOLINKTYPES,
HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF
} HSA_IOLINKTYPE;
typedef union
{
HSAuint32 LinkProperty;
struct
{
unsigned int Override : 1; // bus link properties are determined by this structure
// not by the HSA_IOLINKTYPE. The other flags are valid
// only if this bit is set to one
unsigned int NonCoherent : 1; // The link doesn't support coherent transactions
// memory accesses across must not be set to "host cacheable"!
unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions
unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions
unsigned int Reserved :28;
} ui32;
} HSA_LINKPROPERTY;
typedef struct _HsaIoLinkProperties
{
HSA_IOLINKTYPE IoLinkType; // see above
HSAuint32 VersionMajor; // Bus interface version (optional)
HSAuint32 VersionMinor; // Bus interface version (optional)
HSAuint32 NodeFrom; //
HSAuint32 NodeTo; //
HSAuint32 Weight; // weight factor (derived from CDIT)
HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns)
HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns)
HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s
HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s
HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes
HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms)
} HsaIoLinkProperties;
//
// Memory allocation definitions for the KFD HSA interface
//
typedef struct _HsaMemFlags
{
union
{
struct
{
unsigned int NonPaged : 1; // default = 0: pageable memory
unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
unsigned int ReadOnly : 1; // default = 0: Read/Write memory
unsigned int PageSize : 2; // see HSA_PAGE_SIZE
unsigned int HostAccess : 1; // default = 0: GPU access only
unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
// discrete GPU local), allocation may fall back to system memory node 0
// memory (= always available). Otherwise no allocation is possible.
unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap.
// HostAccess must be 0, all other flags (except NoSubstitute) should
// be 0 when setting this entry to 1. GDS allocation may fail due to
// limited resources. Application code is required to work without
// any allocated GDS memory using regular memory.
// Allocation fails on any node without GPU function.
unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
// HostAccess must be 0, all other flags (except NoSubstitute) should be 0
// when setting this entry to 1. Scratch allocation may fail due to limited
// resources. Application code is required to work without any allocation.
// Allocation fails on any node without GPU function.
unsigned int Reserved : 22;
} ui32;
HSAuint32 Value;
};
} HsaMemFlags;
typedef enum _HSA_CACHING_TYPE
{
HSA_CACHING_CACHED = 0,
HSA_CACHING_NONCACHED = 1,
HSA_CACHING_WRITECOMBINED = 2,
HSA_CACHING_RESERVED = 3,
HSA_CACHING_NUM_CACHING,
HSA_CACHING_SIZE = 0xFFFFFFFF
} HSA_CACHING_TYPE;
typedef enum _HSA_PAGE_SIZE
{
HSA_PAGE_SIZE_4KB = 0,
HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems
HSA_PAGE_SIZE_2MB = 2,
HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems
} HSA_PAGE_SIZE;
typedef enum _HSA_DEVICE
{
HSA_DEVICE_CPU = 0,
HSA_DEVICE_GPU = 1,
MAX_HSA_DEVICE = 2
} HSA_DEVICE;
typedef enum _HSA_QUEUE_PRIORITY
{
HSA_QUEUE_PRIORITY_MINIMUM = -3,
HSA_QUEUE_PRIORITY_LOW = -2,
HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1,
HSA_QUEUE_PRIORITY_NORMAL = 0,
HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1,
HSA_QUEUE_PRIORITY_HIGH = 2,
HSA_QUEUE_PRIORITY_MAXIMUM = 3,
HSA_QUEUE_PRIORITY_NUM_PRIORITY,
HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF
} HSA_QUEUE_PRIORITY;
typedef enum _HSA_QUEUE_TYPE
{
HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue
HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue
HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue
// the following values indicate a queue type permitted to reference OS graphics
// resources through the interoperation API. See [5] "HSA Graphics Interoperation
// specification" for more details on use of such resources.
HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue
HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue
HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue
HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue
// more types in the future
HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum
} HSA_QUEUE_TYPE;
typedef struct _HsaQueueResource
{
HSA_QUEUEID QueueId; /** queue ID */
/** Doorbell address to notify HW of a new dispatch */
union
{
HSAuint32* Queue_DoorBell;
HSAuint64* Queue_DoorBell_aql;
HSAuint64 QueueDoorBell;
};
/** virtual address to notify HW of queue write ptr value */
union
{
HSAuint32* Queue_write_ptr;
HSAuint64* Queue_write_ptr_aql;
HSAuint64 QueueWptrValue;
};
/** virtual address updated by HW to indicate current read location */
union
{
HSAuint32* Queue_read_ptr;
HSAuint64* Queue_read_ptr_aql;
HSAuint64 QueueRptrValue;
};
} HsaQueueResource;
//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
typedef struct _HsaQueueReport
{
HSAuint32 VMID; //Required on SI to dispatch IB in primary ring
void* QueueAddress; //virtual address of UM mapped compute ring
HSAuint64 QueueSize; //size of the UM mapped compute ring
} HsaQueueReport;
typedef enum _HSA_DBG_WAVEOP
{
HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront
HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront
HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront
HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode
HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap
HSA_DBG_NUM_WAVEOP = 5,
HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
} HSA_DBG_WAVEOP;
typedef enum _HSA_DBG_WAVEMODE
{
HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave
//Broadcast to all wavefronts of all processes is not supported for HSA user mode
HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process
HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU
HSA_DBG_NUM_WAVEMODE = 3,
HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
} HSA_DBG_WAVEMODE;
typedef enum _HSA_DBG_WAVEMSG_TYPE
{
HSA_DBG_WAVEMSG_AUTO = 0,
HSA_DBG_WAVEMSG_USER = 1,
HSA_DBG_WAVEMSG_ERROR = 2,
HSA_DBG_NUM_WAVEMSG,
HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
} HSA_DBG_WAVEMSG_TYPE;
typedef enum _HSA_DBG_WATCH_MODE
{
HSA_DBG_WATCH_READ = 0, //Read operations only
HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only
HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only
HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations
HSA_DBG_WATCH_NUM,
HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
} HSA_DBG_WATCH_MODE;
//This structure is hardware specific and may change in the future
typedef struct _HsaDbgWaveMsgAMDGen2
{
HSAuint32 Value;
HSAuint32 Reserved2;
} HsaDbgWaveMsgAMDGen2;
typedef union _HsaDbgWaveMessageAMD
{
HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
//for future HsaDbgWaveMsgAMDGen3;
} HsaDbgWaveMessageAMD;
typedef struct _HsaDbgWaveMessage
{
void* MemoryVA; // ptr to associated host-accessible data
HsaDbgWaveMessageAMD DbgWaveMsg;
} HsaDbgWaveMessage;
//
// HSA sync primitive, Event and HW Exception notification API definitions
// The API functions allow the runtime to define a so-called sync-primitive, a SW object
// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
// through a defined GPU interrupt. A syncvar is a process virtual memory location of
// a certain size that can be accessed by CPU and GPU shader code within the process to set
// and query the content within that memory. The definition of the content is determined by
// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
// user mode instruction stream.
// The OS scheduler event is typically associated and signaled by an interrupt issued by
// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
// surfaced by the KFD by this mechanism, too.
//
// these are the new definitions for events
typedef enum _HSA_EVENTTYPE
{
HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal
HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach)
HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop )
HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event
HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info
HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging
HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling
HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4)
//...
HSA_EVENTTYPE_MAXID,
HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
} HSA_EVENTTYPE;
typedef HSAuint32 HSA_EVENTID;
//
// Subdefinitions for various event types: Syncvar
//
typedef struct _HsaSyncVar
{
union
{
void* UserData; //pointer to user mode data
HSAuint64 UserDataPtrValue; //64bit compatibility of value
} SyncVar;
HSAuint64 SyncVarSize;
} HsaSyncVar;
//
// Subdefinitions for various event types: NodeChange
//
typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
{
HSA_EVENTTYPE_NODECHANGE_ADD = 0,
HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
} HSA_EVENTTYPE_NODECHANGE_FLAGS;
typedef struct _HsaNodeChange
{
HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform
} HsaNodeChange;
//
// Sub-definitions for various event types: DeviceStateChange
//
typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
{
HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available)
HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable)
HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
typedef struct _HsaDeviceStateChange
{
HSAuint32 NodeId; // F-NUMA node that contains the device
HSA_DEVICE Device; // device type: GPU or CPU
HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags
} HsaDeviceStateChange;
typedef struct _HsaEventData
{
HSA_EVENTTYPE EventType; //event type
union
{
// return data associated with HSA_EVENTTYPE_SIGNAL and other events
HsaSyncVar SyncVar;
// data associated with HSA_EVENTTYPE_NODE_CHANGE
HsaNodeChange NodeChangeState;
// data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
HsaDeviceStateChange DeviceState;
} EventData;
// the following data entries are internal to the KFD & thunk itself.
HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle)
HSAuint64 HWData2; // internal thunk store for Event data (HWAddress)
HSAuint32 HWData3; // internal thunk store for Event data (HWData)
} HsaEventData;
typedef struct _HsaEventDescriptor
{
HSA_EVENTTYPE EventType; // event type to allocate
HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source
HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
} HsaEventDescriptor;
typedef struct _HsaEvent
{
HSA_EVENTID EventId;
HsaEventData EventData;
} HsaEvent;
typedef enum _HsaEventTimeout
{
HSA_EVENTTIMEOUT_IMMEDIATE = 0,
HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF
} HsaEventTimeOut;
typedef struct _HsaClockCounters
{
HSAuint64 GPUClockCounter;
HSAuint64 CPUClockCounter;
HSAuint64 SystemClockCounter;
HSAuint64 SystemClockFrequencyHz;
} HsaClockCounters;
#ifndef DEFINE_GUID
typedef struct _HSA_UUID
{
HSAuint32 Data1;
HSAuint16 Data2;
HSAuint16 Data3;
HSAuint8 Data4[8];
} HSA_UUID;
#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
#else
#define HSA_UUID GUID
#define HSA_DEFINE_UUID DEFINE_GUID
#endif
// GUID that identifies the GPU Shader Sequencer (SQ) block
// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
// GUID that identifies the GPU Memory Controller (MC) block
// {13900B57-4956-4D98-81D0-68521937F59C}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
// GUID that identifies the IMOMMUv2 HW device
// {80969879-B0F6-4BE6-97F6-6A6300F5101D}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
// GUID that identifies the KFD
// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
typedef enum _HSA_PROFILE_TYPE
{
HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
//writes to memory on updates (KFD access only)
HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter
HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter
//...
HSA_PROFILE_TYPE_NUM,
HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value
} HSA_PROFILE_TYPE;
typedef struct _HsaCounterFlags
{
union
{
struct
{
unsigned int Global : 1; // counter is global
// (not tied to VMID/WAVE/CU, ...)
unsigned int Resettable : 1; // counter can be reset by SW
// (always to 0?)
unsigned int ReadOnly : 1; // counter is read-only
// (but may be reset, if indicated)
unsigned int Stream : 1; // counter has streaming capability
// (after trigger, updates buffer)
unsigned int Reserved : 28;
} ui32;
HSAuint32 Value;
};
} HsaCounterFlags;
typedef struct _HsaCounter
{
HSA_PROFILE_TYPE Type; // specifies the counter type
HSAuint64 CounterId; // indicates counter register offset
HSAuint32 CounterSizeInBits; // indicates relevant counter bits
HSAuint64 CounterMask; // bitmask for counter value (if applicable)
HsaCounterFlags Flags; // Property flags (see above)
HSAuint32 BlockIndex; // identifies block the counter belongs to,
// value may be 0 to NumBlocks
} HsaCounter;
typedef struct _HsaCounterBlockProperties
{
HSA_UUID BlockId; // specifies the block location
HSAuint32 NumCounters; // How many counters are available?
// (sizes Counters[] array below)
HSAuint32 NumConcurrent; // How many counter slots are available
// in block?
HsaCounter Counters[1]; // Start of counter array
// (NumCounters elements total)
} HsaCounterBlockProperties;
typedef struct _HsaCounterProperties
{
HSAuint32 NumBlocks; // How many profilable block are available?
// (sizes Blocks[] array below)
HSAuint32 NumConcurrent; // How many blocks slots can be queried
// concurrently by HW?
HsaCounterBlockProperties Blocks[1]; // Start of block array
// (NumBlocks elements total)
} HsaCounterProperties;
typedef HSAuint64 HSATraceId;
typedef struct _HsaPmcTraceRoot
{
HSAuint64 TraceBufferMinSizeBytes;// (page aligned)
HSAuint32 NumberOfPasses;
HSATraceId TraceId;
} HsaPmcTraceRoot;
#pragma pack(pop, hsakmttypes_h)
#ifdef __cplusplus
} //extern "C"
#endif
#endif //_HSAKMTTYPES_H_