Add definitions of HSA functions and types
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
This commit is contained in:
@@ -0,0 +1,565 @@
|
||||
/*
|
||||
* Copyright © 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including
|
||||
* the next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _HSAKMT_H_
|
||||
#define _HSAKMT_H_
|
||||
|
||||
#include "hsakmttypes.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
"Opens" the HSA kernel driver for user-kernel mode communication.
|
||||
|
||||
On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
|
||||
is responsible for user-kernel communication, this handle is used internally by
|
||||
the thunk library to send device I/O control to the HSA kernel driver.
|
||||
No other thunk library function may be called unless the user-kernel communication
|
||||
channel is opened first.
|
||||
|
||||
On Linux this call opens the "/dev/kfd" device file to establish a communication
|
||||
path to the kernel.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtOpenKFD( void );
|
||||
|
||||
/**
|
||||
"Closes" the user-kernel communication path.
|
||||
|
||||
On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
|
||||
no other communication with the kernel driver is possible after the successful
|
||||
execution of the saKmdCloseKFD() function. Depending on the failure reason,
|
||||
the user-kernel communication path may or may not be still active.
|
||||
|
||||
On Linux the function closes the "dev/kfd" device file.
|
||||
No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
|
||||
function is called again.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCloseKFD( void );
|
||||
|
||||
|
||||
/**
|
||||
Returns the user-kernel interface version supported by KFD.
|
||||
Higher major numbers usually add new features to KFD and may break user-kernel
|
||||
compatibility; higher minor numbers define additional functionality associated
|
||||
within a major number.
|
||||
The calling software should validate that it meets the minimum interface version
|
||||
as described in the API specification.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetVersion(
|
||||
HsaVersionInfo* VersionInfo //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
The function takes a "snapshot" of the topology information within the KFD
|
||||
to avoid any changes during the enumeration process.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAcquireSystemProperties(
|
||||
HsaSystemProperties* SystemProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtReleaseSystemProperties( void ) ;
|
||||
|
||||
/**
|
||||
Retrieves the discoverable sub-properties for a given HSA
|
||||
node. The parameters returned allow the application or runtime to size the
|
||||
management structures necessary to store the information.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeProperties(
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaNodeProperties* NodeProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the memory properties of a specific HSA node.
|
||||
the memory pointer passed as MemoryProperties is sized as
|
||||
NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
|
||||
hsaKmtGetNodeProperties() call.
|
||||
|
||||
Some of the data returned is optional. Not all implementations may return all
|
||||
parameters in the hsaMemoryProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeMemoryProperties(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumBanks, //IN
|
||||
HsaMemoryProperties* MemoryProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the cache properties of a specific HSA node and processor ID.
|
||||
ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
|
||||
via the hsaKmtGetNodeProperties() call.
|
||||
The memory pointer passed as CacheProperties is sized as
|
||||
NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
|
||||
hsaKmtGetNodeProperties() call.
|
||||
|
||||
The data returned is optional. Not all implementations may return all
|
||||
parameters in the CacheProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeCacheProperties(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 ProcessorId, //IN
|
||||
HSAuint32 NumCaches, //IN
|
||||
HsaCacheProperties* CacheProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the HSA IO affinity properties of a specific HSA node.
|
||||
the memory pointer passed as Properties is sized as
|
||||
NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
|
||||
hsaKmtGetNodeProperties() call.
|
||||
|
||||
The data returned is optional. Not all implementations may return all
|
||||
parameters in the IoLinkProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeIoLinkProperties(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumIoLinks, //IN
|
||||
HsaIoLinkProperties* IoLinkProperties //OUT
|
||||
);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Creates an operating system event associated with a HSA event ID
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCreateEvent(
|
||||
HsaEventDescriptor* EventDesc, //IN
|
||||
bool ManualReset, //IN
|
||||
bool IsSignaled, //IN
|
||||
HsaEvent** Event //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Destroys an operating system event associated with a HSA event ID
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDestroyEvent(
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Sets the specified event object to the signaled state
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetEvent(
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Sets the specified event object to the non-signaled state
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtResetEvent(
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Queries the state of the specified event object
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtQueryEventState(
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of the event object. If the object's state is
|
||||
nonsignaled, the calling thread enters the wait state.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- The specified event object is in the signaled state.
|
||||
- The time-out interval elapses.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnEvent(
|
||||
HsaEvent* Event, //IN
|
||||
HSAuint32 Milliseconds //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of multiple event objects.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- Either any one or all of the specified objects are in the signaled state
|
||||
- if "WaitOnAll" is "true" the function returns when the state of all
|
||||
objects in array is signaled
|
||||
- if "WaitOnAll" is "false" the function returns when the state of any
|
||||
one of the objects is set to signaled
|
||||
- The time-out interval elapses.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnMultipleEvents(
|
||||
HsaEvent* Events[], //IN
|
||||
HSAuint32 NumEvents, //IN
|
||||
bool WaitOnAll, //IN
|
||||
HSAuint32 Milliseconds //IN
|
||||
);
|
||||
|
||||
/**
|
||||
new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
|
||||
If used on other platforms the function will return HSAKMT_STATUS_ERROR
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtReportQueue(
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HsaQueueReport* QueueReport //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Creates a GPU queue with user-mode access rights
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCreateQueue(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSA_QUEUE_TYPE Type, //IN
|
||||
HSAuint32 QueuePercentage, //IN
|
||||
HSA_QUEUE_PRIORITY Priority, //IN
|
||||
void* QueueAddress, //IN
|
||||
HSAuint64 QueueSizeInBytes, //IN
|
||||
HsaEvent* Event, //IN
|
||||
HsaQueueResource* QueueResource //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Updates a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUpdateQueue(
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HSAuint32 QueuePercentage,//IN
|
||||
HSA_QUEUE_PRIORITY Priority, //IN
|
||||
void* QueueAddress, //IN
|
||||
HSAuint64 QueueSize, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Destroys a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDestroyQueue(
|
||||
HSA_QUEUEID QueueId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetMemoryPolicy(
|
||||
HSAuint32 Node, //IN
|
||||
HSAuint32 DefaultPolicy, //IN
|
||||
HSAuint32 AlternatePolicy, //IN
|
||||
void* MemoryAddressAlternate, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes //IN (page-aligned)
|
||||
);
|
||||
/**
|
||||
Allocates a memory buffer that may be accessed by the GPU
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocMemory(
|
||||
HSAuint32 PreferredNode, //IN
|
||||
HSAuint64 SizeInBytes, //IN (multiple of page size)
|
||||
HsaMemFlags MemFlags, //IN
|
||||
void** MemoryAddress //OUT (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Frees a memory buffer
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtFreeMemory(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 SizeInBytes //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Registers with KFD a memory buffer that may be accessed by the GPU
|
||||
This function will never be required for Linux
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterMemory(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes //IN (page-aligned)
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Unregisters with KFD a memory buffer
|
||||
This function will never be required for Linux
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDeregisterMemory(
|
||||
void* MemoryAddress //IN
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Ensures that the memory is resident and can be accessed by GPU
|
||||
Not implemented yet
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtMapMemoryToGPU(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
|
||||
HSAuint64* AlternateVAGPU //OUT (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Releases the residency of the memory
|
||||
Not implemented yet
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUnmapMemoryToGPU(
|
||||
void* MemoryAddress //IN (page-aligned)
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Notifies the kernel driver that a process wants to use GPU debugging facilities
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgRegister(
|
||||
HSAuint32 NodeId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgUnregister(
|
||||
HSAuint32 NodeId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Controls a wavefront
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgWavefrontControl(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSA_DBG_WAVEOP Operand, //IN
|
||||
HSA_DBG_WAVEMODE Mode, //IN
|
||||
HSAuint32 TrapId, //IN
|
||||
HsaDbgWaveMessage* DbgWaveMsgRing //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Sets watch points on memory address ranges to generate exception events when the
|
||||
watched addresses are accessed
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgAddressWatch(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumWatchPoints, //IN
|
||||
HSA_DBG_WATCH_MODE WatchMode[], //IN
|
||||
void* WatchAddress[], //IN
|
||||
HSAuint64 WatchMask[], //IN, optional
|
||||
HsaEvent* WatchEvent[] //IN, optional
|
||||
);
|
||||
|
||||
/**
|
||||
Gets GPU and CPU clock counters for particular Node
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetClockCounters(
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaClockCounters* Counters //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves information on the available HSA counters
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcGetCounterProperties(
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaCounterProperties** CounterProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Registers a set of (HW) counters to be used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcRegisterTrace(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumberOfCounters, //IN
|
||||
HsaCounter* Counters, //IN
|
||||
HsaPmcTraceRoot* TraceRoot //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Unregisters a set of (HW) counters used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcUnregisterTrace(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allows a user mode process to get exclusive access to the defined set of (HW) counters
|
||||
used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcAcquireTraceAccess(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allows a user mode process to release exclusive access to the defined set of (HW) counters
|
||||
used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcReleaseTraceAccess(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Starts tracing operation on a previously established set of performance counters
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcStartTrace(
|
||||
HSATraceId TraceId, //IN
|
||||
void* TraceBuffer, //IN (page aligned)
|
||||
HSAuint64 TraceBufferSizeBytes //IN (page aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Forces an update of all the counters that a previously started trace operation has registered
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcQueryTrace(
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Stops tracing operation on a previously established set of performance counters
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcStopTrace(
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
#endif //_HSAKMT_H_
|
||||
|
||||
@@ -0,0 +1,851 @@
|
||||
/*
|
||||
* Copyright © 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including
|
||||
* the next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _HSAKMTTYPES_H_
|
||||
#define _HSAKMTTYPES_H_
|
||||
|
||||
//the definitions and THUNK API are version specific - define the version numbers here
|
||||
#define HSAKMT_VERSION_MAJOR 0
|
||||
#define HSAKMT_VERSION_MINOR 99
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define HSAKMTAPI __stdcall
|
||||
#else
|
||||
#define HSAKMTAPI
|
||||
#endif
|
||||
|
||||
typedef unsigned char HSAuint8;
|
||||
typedef char HSAint8;
|
||||
typedef unsigned short HSAuint16;
|
||||
typedef signed short HSAint16;
|
||||
typedef unsigned __int32 HSAuint32;
|
||||
typedef signed __int64 HSAint64;
|
||||
typedef unsigned __int64 HSAuint64;
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define HSAKMTAPI
|
||||
|
||||
typedef uint8_t HSAuint8;
|
||||
typedef int8_t HSAint8;
|
||||
typedef uint16_t HSAuint16;
|
||||
typedef int16_t HSAint16;
|
||||
typedef uint32_t HSAuint32;
|
||||
typedef int64_t HSAint64;
|
||||
typedef uint64_t HSAuint64;
|
||||
|
||||
#endif
|
||||
|
||||
typedef void* HSA_HANDLE;
|
||||
typedef HSAuint64 HSA_QUEUEID;
|
||||
|
||||
// This is included in order to force the alignments to be 4 bytes so that
|
||||
// it avoids extra padding added by the compiler when a 64-bit binary is generated.
|
||||
#pragma pack(push, hsakmttypes_h, 4)
|
||||
|
||||
//
|
||||
// HSA STATUS codes returned by the KFD Interfaces
|
||||
//
|
||||
|
||||
typedef enum _HSAKMT_STATUS
|
||||
{
|
||||
HSAKMT_STATUS_SUCCESS = 0, // Operation successful
|
||||
HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified
|
||||
HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver
|
||||
|
||||
HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid
|
||||
HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid
|
||||
HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid
|
||||
|
||||
HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory)
|
||||
HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small
|
||||
|
||||
HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters
|
||||
HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node
|
||||
HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but
|
||||
// may be at a later time)
|
||||
|
||||
HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
|
||||
HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure
|
||||
HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened
|
||||
HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available
|
||||
// (IOMMU driver not installed or not-available)
|
||||
|
||||
HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed
|
||||
HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out
|
||||
|
||||
HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered
|
||||
HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered
|
||||
HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned
|
||||
|
||||
} HSAKMT_STATUS;
|
||||
|
||||
//
|
||||
// HSA KFD interface version information. Calling software has to validate that it meets
|
||||
// the minimum interface version as described in the API specification.
|
||||
// All future structures will be extended in a backward compatible fashion.
|
||||
//
|
||||
|
||||
typedef struct _HsaVersionInfo
|
||||
{
|
||||
HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version
|
||||
HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version
|
||||
} HsaVersionInfo;
|
||||
|
||||
//
|
||||
// HSA Topology Discovery Infrastructure structure definitions.
|
||||
// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
|
||||
// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
|
||||
// in the "Heterogeneous System Architecture Detail Topology" specification.
|
||||
//
|
||||
// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
|
||||
// When the call is made within a process context, a "snapshot" of the topology information
|
||||
// is taken within the KFD to avoid any changes during the enumeration process.
|
||||
// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
|
||||
// or when the process exits or is terminated.
|
||||
//
|
||||
|
||||
typedef struct _HsaSystemProperties
|
||||
{
|
||||
HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes.
|
||||
// each node represents a discoverable node of the system
|
||||
// All other enumeration is done on a per-node basis
|
||||
|
||||
HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT
|
||||
HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT
|
||||
HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID
|
||||
} HsaSystemProperties;
|
||||
|
||||
|
||||
typedef union
|
||||
{
|
||||
HSAuint32 Value;
|
||||
struct
|
||||
{
|
||||
unsigned int HotPluggable : 1; // the node may be removed by some system action
|
||||
// (event will be sent)
|
||||
unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible
|
||||
// translation agent in the system (e.g. IOMMUv2)
|
||||
unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary
|
||||
// graphics render (= UI)
|
||||
unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value
|
||||
unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB
|
||||
unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle
|
||||
unsigned int VALimit : 1; // This node GPU has limited VA range for platform
|
||||
// (typical 40bit). Affects shared VM use for 64bit apps
|
||||
unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node.
|
||||
unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value
|
||||
|
||||
unsigned int Reserved : 20;
|
||||
} ui32;
|
||||
} HSA_CAPABILITY;
|
||||
|
||||
|
||||
//
|
||||
// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
|
||||
// The application or runtime can use the information herein to size the topology management structures
|
||||
// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
|
||||
// of throughput compute units (= SIMDs) associated with a H-NUMA node.
|
||||
//
|
||||
|
||||
#define HSA_PUBLIC_NAME_SIZE 128
|
||||
|
||||
typedef struct _HsaNodeProperties
|
||||
{
|
||||
HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node.
|
||||
// This value is 0 for a HSA node with no such cores,
|
||||
// e.g a "discrete HSA GPU"
|
||||
HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
|
||||
// This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
|
||||
HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node.
|
||||
HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node.
|
||||
|
||||
HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node
|
||||
// connecting to other nodes.
|
||||
|
||||
HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU)
|
||||
// cores available on this node
|
||||
HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU)
|
||||
// units available on this node
|
||||
|
||||
HSA_CAPABILITY Capability; // see above
|
||||
|
||||
HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD.
|
||||
// If NumFComputeCores is 0, this value is ignored.
|
||||
HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront
|
||||
HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
|
||||
|
||||
HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64,
|
||||
// may be 32 or a different value for some HSA based architectures
|
||||
|
||||
HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2
|
||||
|
||||
|
||||
HSAuint32 NumArrays; // Number of SIMD arrays per engine
|
||||
HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array
|
||||
HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU)
|
||||
|
||||
HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
|
||||
// may be 0 if HW has no restrictions
|
||||
|
||||
HSAuint32 EngineId; // Identifier (rev) of teh GPU uEngine or Firmware, may be 0
|
||||
|
||||
HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes
|
||||
HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes
|
||||
|
||||
HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device
|
||||
// location in the overall system
|
||||
HSAuint64 LocalMemSize; // Local memory size
|
||||
HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and
|
||||
HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities,
|
||||
|
||||
HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name).
|
||||
// Unicode string
|
||||
} HsaNodeProperties;
|
||||
|
||||
|
||||
typedef enum _HSA_HEAPTYPE
|
||||
{
|
||||
HSA_HEAPTYPE_SYSTEM = 0,
|
||||
HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
|
||||
HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
|
||||
// All HSA accessible memory is per definition "CPU visible"
|
||||
// "Private memory" is relevant for graphics interop only.
|
||||
HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS)
|
||||
HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS)
|
||||
HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch)
|
||||
|
||||
HSA_HEAPTYPE_NUMHEAPTYPES,
|
||||
HSA_HEAPTYPE_SIZE = 0xFFFFFFFF
|
||||
} HSA_HEAPTYPE;
|
||||
|
||||
typedef union
|
||||
{
|
||||
HSAuint32 MemoryProperty;
|
||||
struct
|
||||
{
|
||||
unsigned int HotPluggable : 1; // the memory may be removed by some system action,
|
||||
// memory should be used for temporary data
|
||||
unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle.
|
||||
unsigned int Reserved :30;
|
||||
} ui32;
|
||||
} HSA_MEMORYPROPERTY;
|
||||
|
||||
|
||||
//
|
||||
// Discoverable HSA Memory properties.
|
||||
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
|
||||
//
|
||||
|
||||
typedef struct _HsaMemoryProperties
|
||||
{
|
||||
HSA_HEAPTYPE HeapType; // system or frame buffer,
|
||||
union
|
||||
{
|
||||
HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes
|
||||
struct
|
||||
{
|
||||
HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit)
|
||||
HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
|
||||
} ui32;
|
||||
};
|
||||
HSA_MEMORYPROPERTY Flags; // See definitions above
|
||||
|
||||
HSAuint32 Width; // memory width - the number of parallel bits of the memory interface
|
||||
HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth
|
||||
// to the memory when needed
|
||||
HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory
|
||||
// in process virtual space
|
||||
} HsaMemoryProperties;
|
||||
|
||||
//
|
||||
// Discoverable Cache Properties. (optional).
|
||||
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
|
||||
// Any of the parameters may be 0 (= not defined)
|
||||
//
|
||||
|
||||
#define HSA_CPU_SIBLINGS 256
|
||||
#define HSA_PROCESSORID_ALL 0xFFFFFFFF
|
||||
|
||||
typedef union
|
||||
{
|
||||
HSAuint32 Value;
|
||||
struct
|
||||
{
|
||||
unsigned int Data : 1;
|
||||
unsigned int Instruction : 1;
|
||||
unsigned int CPU : 1;
|
||||
unsigned int HSACU : 1;
|
||||
unsigned int Reserved :28;
|
||||
} ui32;
|
||||
} HsaCacheType;
|
||||
|
||||
typedef struct _HaCacheProperties
|
||||
{
|
||||
HSAuint32 ProcessorIdLow; // Identifies the processor number
|
||||
|
||||
HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc
|
||||
HSAuint32 CacheSize; // Size of the cache
|
||||
HSAuint32 CacheLineSize; // Cache line size in bytes
|
||||
HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag
|
||||
HSAuint32 CacheAssociativity; // Cache Associativity
|
||||
HSAuint32 CacheLatency; // Cache latency in ns
|
||||
HsaCacheType CacheType;
|
||||
HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
|
||||
} HsaCacheProperties;
|
||||
|
||||
|
||||
//
|
||||
// Discoverable CPU Compute Properties. (optional).
|
||||
// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
|
||||
// Any of the parameters may be 0 (= not defined)
|
||||
//
|
||||
|
||||
typedef struct _HsaCComputeProperties
|
||||
{
|
||||
HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
|
||||
} HsaCComputeProperties;
|
||||
|
||||
//
|
||||
// Discoverable IoLink Properties (optional).
|
||||
// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
|
||||
// Any of the parameters may be 0 (= not defined)
|
||||
//
|
||||
|
||||
typedef enum _HSA_IOLINKTYPE {
|
||||
HSA_IOLINKTYPE_UNDEFINED = 0,
|
||||
HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
|
||||
HSA_IOLINKTYPE_PCIEXPRESS = 2,
|
||||
HSA_IOLINKTYPE_AMBA = 3,
|
||||
HSA_IOLINKTYPE_MIPI = 4,
|
||||
HSA_IOLINKTYPE_OTHER = 5,
|
||||
HSA_IOLINKTYPE_NUMIOLINKTYPES,
|
||||
HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF
|
||||
} HSA_IOLINKTYPE;
|
||||
|
||||
typedef union
|
||||
{
|
||||
HSAuint32 LinkProperty;
|
||||
struct
|
||||
{
|
||||
unsigned int Override : 1; // bus link properties are determined by this structure
|
||||
// not by the HSA_IOLINKTYPE. The other flags are valid
|
||||
// only if this bit is set to one
|
||||
unsigned int NonCoherent : 1; // The link doesn't support coherent transactions
|
||||
// memory accesses across must not be set to "host cacheable"!
|
||||
unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions
|
||||
unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions
|
||||
unsigned int Reserved :28;
|
||||
} ui32;
|
||||
} HSA_LINKPROPERTY;
|
||||
|
||||
|
||||
typedef struct _HsaIoLinkProperties
|
||||
{
|
||||
HSA_IOLINKTYPE IoLinkType; // see above
|
||||
HSAuint32 VersionMajor; // Bus interface version (optional)
|
||||
HSAuint32 VersionMinor; // Bus interface version (optional)
|
||||
|
||||
HSAuint32 NodeFrom; //
|
||||
HSAuint32 NodeTo; //
|
||||
|
||||
HSAuint32 Weight; // weight factor (derived from CDIT)
|
||||
|
||||
HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns)
|
||||
HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns)
|
||||
HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s
|
||||
HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s
|
||||
HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes
|
||||
HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms)
|
||||
} HsaIoLinkProperties;
|
||||
|
||||
//
|
||||
// Memory allocation definitions for the KFD HSA interface
|
||||
//
|
||||
|
||||
typedef struct _HsaMemFlags
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned int NonPaged : 1; // default = 0: pageable memory
|
||||
unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
|
||||
unsigned int ReadOnly : 1; // default = 0: Read/Write memory
|
||||
unsigned int PageSize : 2; // see HSA_PAGE_SIZE
|
||||
unsigned int HostAccess : 1; // default = 0: GPU access only
|
||||
unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
|
||||
// discrete GPU local), allocation may fall back to system memory node 0
|
||||
// memory (= always available). Otherwise no allocation is possible.
|
||||
unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap.
|
||||
// HostAccess must be 0, all other flags (except NoSubstitute) should
|
||||
// be 0 when setting this entry to 1. GDS allocation may fail due to
|
||||
// limited resources. Application code is required to work without
|
||||
// any allocated GDS memory using regular memory.
|
||||
// Allocation fails on any node without GPU function.
|
||||
unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
|
||||
// HostAccess must be 0, all other flags (except NoSubstitute) should be 0
|
||||
// when setting this entry to 1. Scratch allocation may fail due to limited
|
||||
// resources. Application code is required to work without any allocation.
|
||||
// Allocation fails on any node without GPU function.
|
||||
unsigned int Reserved : 22;
|
||||
} ui32;
|
||||
HSAuint32 Value;
|
||||
};
|
||||
} HsaMemFlags;
|
||||
|
||||
typedef enum _HSA_CACHING_TYPE
|
||||
{
|
||||
HSA_CACHING_CACHED = 0,
|
||||
HSA_CACHING_NONCACHED = 1,
|
||||
HSA_CACHING_WRITECOMBINED = 2,
|
||||
HSA_CACHING_RESERVED = 3,
|
||||
HSA_CACHING_NUM_CACHING,
|
||||
HSA_CACHING_SIZE = 0xFFFFFFFF
|
||||
} HSA_CACHING_TYPE;
|
||||
|
||||
typedef enum _HSA_PAGE_SIZE
|
||||
{
|
||||
HSA_PAGE_SIZE_4KB = 0,
|
||||
HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems
|
||||
HSA_PAGE_SIZE_2MB = 2,
|
||||
HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems
|
||||
} HSA_PAGE_SIZE;
|
||||
|
||||
|
||||
typedef enum _HSA_DEVICE
|
||||
{
|
||||
HSA_DEVICE_CPU = 0,
|
||||
HSA_DEVICE_GPU = 1,
|
||||
MAX_HSA_DEVICE = 2
|
||||
} HSA_DEVICE;
|
||||
|
||||
|
||||
typedef enum _HSA_QUEUE_PRIORITY
|
||||
{
|
||||
HSA_QUEUE_PRIORITY_MINIMUM = -3,
|
||||
HSA_QUEUE_PRIORITY_LOW = -2,
|
||||
HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1,
|
||||
HSA_QUEUE_PRIORITY_NORMAL = 0,
|
||||
HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1,
|
||||
HSA_QUEUE_PRIORITY_HIGH = 2,
|
||||
HSA_QUEUE_PRIORITY_MAXIMUM = 3,
|
||||
HSA_QUEUE_PRIORITY_NUM_PRIORITY,
|
||||
HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF
|
||||
} HSA_QUEUE_PRIORITY;
|
||||
|
||||
typedef enum _HSA_QUEUE_TYPE
|
||||
{
|
||||
HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue
|
||||
HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
|
||||
HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue
|
||||
HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue
|
||||
|
||||
// the following values indicate a queue type permitted to reference OS graphics
|
||||
// resources through the interoperation API. See [5] "HSA Graphics Interoperation
|
||||
// specification" for more details on use of such resources.
|
||||
|
||||
HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue
|
||||
HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
|
||||
HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
|
||||
HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue
|
||||
|
||||
HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue
|
||||
HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue
|
||||
|
||||
// more types in the future
|
||||
|
||||
HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum
|
||||
} HSA_QUEUE_TYPE;
|
||||
|
||||
typedef struct _HsaQueueResource
|
||||
{
|
||||
HSA_QUEUEID QueueId; /** queue ID */
|
||||
/** Doorbell address to notify HW of a new dispatch */
|
||||
union
|
||||
{
|
||||
HSAuint32* Queue_DoorBell;
|
||||
HSAuint64* Queue_DoorBell_aql;
|
||||
HSAuint64 QueueDoorBell;
|
||||
};
|
||||
|
||||
/** virtual address to notify HW of queue write ptr value */
|
||||
union
|
||||
{
|
||||
HSAuint32* Queue_write_ptr;
|
||||
HSAuint64* Queue_write_ptr_aql;
|
||||
HSAuint64 QueueWptrValue;
|
||||
};
|
||||
|
||||
/** virtual address updated by HW to indicate current read location */
|
||||
union
|
||||
{
|
||||
HSAuint32* Queue_read_ptr;
|
||||
HSAuint64* Queue_read_ptr_aql;
|
||||
HSAuint64 QueueRptrValue;
|
||||
};
|
||||
|
||||
} HsaQueueResource;
|
||||
|
||||
|
||||
//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
|
||||
typedef struct _HsaQueueReport
|
||||
{
|
||||
HSAuint32 VMID; //Required on SI to dispatch IB in primary ring
|
||||
void* QueueAddress; //virtual address of UM mapped compute ring
|
||||
HSAuint64 QueueSize; //size of the UM mapped compute ring
|
||||
} HsaQueueReport;
|
||||
|
||||
|
||||
|
||||
typedef enum _HSA_DBG_WAVEOP
|
||||
{
|
||||
HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront
|
||||
HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront
|
||||
HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront
|
||||
HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode
|
||||
HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap
|
||||
HSA_DBG_NUM_WAVEOP = 5,
|
||||
HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
|
||||
} HSA_DBG_WAVEOP;
|
||||
|
||||
typedef enum _HSA_DBG_WAVEMODE
|
||||
{
|
||||
HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave
|
||||
//Broadcast to all wavefronts of all processes is not supported for HSA user mode
|
||||
HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process
|
||||
HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU
|
||||
HSA_DBG_NUM_WAVEMODE = 3,
|
||||
HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
|
||||
} HSA_DBG_WAVEMODE;
|
||||
|
||||
|
||||
typedef enum _HSA_DBG_WAVEMSG_TYPE
|
||||
{
|
||||
HSA_DBG_WAVEMSG_AUTO = 0,
|
||||
HSA_DBG_WAVEMSG_USER = 1,
|
||||
HSA_DBG_WAVEMSG_ERROR = 2,
|
||||
HSA_DBG_NUM_WAVEMSG,
|
||||
HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
|
||||
} HSA_DBG_WAVEMSG_TYPE;
|
||||
|
||||
typedef enum _HSA_DBG_WATCH_MODE
|
||||
{
|
||||
HSA_DBG_WATCH_READ = 0, //Read operations only
|
||||
HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only
|
||||
HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only
|
||||
HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations
|
||||
HSA_DBG_WATCH_NUM,
|
||||
HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
|
||||
} HSA_DBG_WATCH_MODE;
|
||||
|
||||
|
||||
//This structure is hardware specific and may change in the future
|
||||
typedef struct _HsaDbgWaveMsgAMDGen2
|
||||
{
|
||||
HSAuint32 Value;
|
||||
HSAuint32 Reserved2;
|
||||
|
||||
} HsaDbgWaveMsgAMDGen2;
|
||||
|
||||
typedef union _HsaDbgWaveMessageAMD
|
||||
{
|
||||
HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
|
||||
//for future HsaDbgWaveMsgAMDGen3;
|
||||
} HsaDbgWaveMessageAMD;
|
||||
|
||||
typedef struct _HsaDbgWaveMessage
|
||||
{
|
||||
void* MemoryVA; // ptr to associated host-accessible data
|
||||
HsaDbgWaveMessageAMD DbgWaveMsg;
|
||||
} HsaDbgWaveMessage;
|
||||
|
||||
|
||||
//
|
||||
// HSA sync primitive, Event and HW Exception notification API definitions
|
||||
// The API functions allow the runtime to define a so-called sync-primitive, a SW object
|
||||
// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
|
||||
// through a defined GPU interrupt. A syncvar is a process virtual memory location of
|
||||
// a certain size that can be accessed by CPU and GPU shader code within the process to set
|
||||
// and query the content within that memory. The definition of the content is determined by
|
||||
// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
|
||||
// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
|
||||
// user mode instruction stream.
|
||||
// The OS scheduler event is typically associated and signaled by an interrupt issued by
|
||||
// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
|
||||
// surfaced by the KFD by this mechanism, too.
|
||||
//
|
||||
|
||||
// these are the new definitions for events
|
||||
typedef enum _HSA_EVENTTYPE
|
||||
{
|
||||
HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal
|
||||
HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach)
|
||||
HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop )
|
||||
HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event
|
||||
HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info
|
||||
HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging
|
||||
HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling
|
||||
HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4)
|
||||
//...
|
||||
HSA_EVENTTYPE_MAXID,
|
||||
HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
|
||||
} HSA_EVENTTYPE;
|
||||
|
||||
typedef HSAuint32 HSA_EVENTID;
|
||||
|
||||
//
|
||||
// Subdefinitions for various event types: Syncvar
|
||||
//
|
||||
|
||||
typedef struct _HsaSyncVar
|
||||
{
|
||||
union
|
||||
{
|
||||
void* UserData; //pointer to user mode data
|
||||
HSAuint64 UserDataPtrValue; //64bit compatibility of value
|
||||
} SyncVar;
|
||||
HSAuint64 SyncVarSize;
|
||||
} HsaSyncVar;
|
||||
|
||||
//
|
||||
// Subdefinitions for various event types: NodeChange
|
||||
//
|
||||
|
||||
typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
|
||||
{
|
||||
HSA_EVENTTYPE_NODECHANGE_ADD = 0,
|
||||
HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
|
||||
HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
|
||||
} HSA_EVENTTYPE_NODECHANGE_FLAGS;
|
||||
|
||||
typedef struct _HsaNodeChange
|
||||
{
|
||||
HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform
|
||||
} HsaNodeChange;
|
||||
|
||||
//
|
||||
// Sub-definitions for various event types: DeviceStateChange
|
||||
//
|
||||
|
||||
typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
|
||||
{
|
||||
HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available)
|
||||
HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable)
|
||||
HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
|
||||
} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
|
||||
|
||||
typedef struct _HsaDeviceStateChange
|
||||
{
|
||||
HSAuint32 NodeId; // F-NUMA node that contains the device
|
||||
HSA_DEVICE Device; // device type: GPU or CPU
|
||||
HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags
|
||||
} HsaDeviceStateChange;
|
||||
|
||||
|
||||
typedef struct _HsaEventData
|
||||
{
|
||||
HSA_EVENTTYPE EventType; //event type
|
||||
union
|
||||
{
|
||||
// return data associated with HSA_EVENTTYPE_SIGNAL and other events
|
||||
HsaSyncVar SyncVar;
|
||||
|
||||
// data associated with HSA_EVENTTYPE_NODE_CHANGE
|
||||
HsaNodeChange NodeChangeState;
|
||||
|
||||
// data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
|
||||
HsaDeviceStateChange DeviceState;
|
||||
} EventData;
|
||||
|
||||
// the following data entries are internal to the KFD & thunk itself.
|
||||
|
||||
HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle)
|
||||
HSAuint64 HWData2; // internal thunk store for Event data (HWAddress)
|
||||
HSAuint32 HWData3; // internal thunk store for Event data (HWData)
|
||||
} HsaEventData;
|
||||
|
||||
|
||||
typedef struct _HsaEventDescriptor
|
||||
{
|
||||
HSA_EVENTTYPE EventType; // event type to allocate
|
||||
HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source
|
||||
HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
|
||||
} HsaEventDescriptor;
|
||||
|
||||
|
||||
typedef struct _HsaEvent
|
||||
{
|
||||
HSA_EVENTID EventId;
|
||||
HsaEventData EventData;
|
||||
} HsaEvent;
|
||||
|
||||
typedef enum _HsaEventTimeout
|
||||
{
|
||||
HSA_EVENTTIMEOUT_IMMEDIATE = 0,
|
||||
HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF
|
||||
} HsaEventTimeOut;
|
||||
|
||||
typedef struct _HsaClockCounters
|
||||
{
|
||||
HSAuint64 GPUClockCounter;
|
||||
HSAuint64 CPUClockCounter;
|
||||
HSAuint64 SystemClockCounter;
|
||||
HSAuint64 SystemClockFrequencyHz;
|
||||
} HsaClockCounters;
|
||||
|
||||
#ifndef DEFINE_GUID
|
||||
typedef struct _HSA_UUID
|
||||
{
|
||||
HSAuint32 Data1;
|
||||
HSAuint16 Data2;
|
||||
HSAuint16 Data3;
|
||||
HSAuint8 Data4[8];
|
||||
} HSA_UUID;
|
||||
|
||||
#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
|
||||
static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
|
||||
#else
|
||||
#define HSA_UUID GUID
|
||||
#define HSA_DEFINE_UUID DEFINE_GUID
|
||||
#endif
|
||||
|
||||
|
||||
// GUID that identifies the GPU Shader Sequencer (SQ) block
|
||||
// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
|
||||
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
|
||||
0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
|
||||
|
||||
// GUID that identifies the GPU Memory Controller (MC) block
|
||||
// {13900B57-4956-4D98-81D0-68521937F59C}
|
||||
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
|
||||
0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
|
||||
|
||||
// GUID that identifies the IMOMMUv2 HW device
|
||||
// {80969879-B0F6-4BE6-97F6-6A6300F5101D}
|
||||
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
|
||||
0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
|
||||
|
||||
// GUID that identifies the KFD
|
||||
// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
|
||||
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
|
||||
0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
|
||||
|
||||
typedef enum _HSA_PROFILE_TYPE
|
||||
{
|
||||
HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
|
||||
HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
|
||||
//writes to memory on updates (KFD access only)
|
||||
HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter
|
||||
HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter
|
||||
//...
|
||||
HSA_PROFILE_TYPE_NUM,
|
||||
|
||||
HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value
|
||||
} HSA_PROFILE_TYPE;
|
||||
|
||||
|
||||
typedef struct _HsaCounterFlags
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned int Global : 1; // counter is global
|
||||
// (not tied to VMID/WAVE/CU, ...)
|
||||
unsigned int Resettable : 1; // counter can be reset by SW
|
||||
// (always to 0?)
|
||||
unsigned int ReadOnly : 1; // counter is read-only
|
||||
// (but may be reset, if indicated)
|
||||
unsigned int Stream : 1; // counter has streaming capability
|
||||
// (after trigger, updates buffer)
|
||||
unsigned int Reserved : 28;
|
||||
} ui32;
|
||||
HSAuint32 Value;
|
||||
};
|
||||
} HsaCounterFlags;
|
||||
|
||||
|
||||
typedef struct _HsaCounter
|
||||
{
|
||||
HSA_PROFILE_TYPE Type; // specifies the counter type
|
||||
HSAuint64 CounterId; // indicates counter register offset
|
||||
HSAuint32 CounterSizeInBits; // indicates relevant counter bits
|
||||
HSAuint64 CounterMask; // bitmask for counter value (if applicable)
|
||||
HsaCounterFlags Flags; // Property flags (see above)
|
||||
HSAuint32 BlockIndex; // identifies block the counter belongs to,
|
||||
// value may be 0 to NumBlocks
|
||||
} HsaCounter;
|
||||
|
||||
|
||||
typedef struct _HsaCounterBlockProperties
|
||||
{
|
||||
HSA_UUID BlockId; // specifies the block location
|
||||
HSAuint32 NumCounters; // How many counters are available?
|
||||
// (sizes Counters[] array below)
|
||||
HSAuint32 NumConcurrent; // How many counter slots are available
|
||||
// in block?
|
||||
HsaCounter Counters[1]; // Start of counter array
|
||||
// (NumCounters elements total)
|
||||
} HsaCounterBlockProperties;
|
||||
|
||||
|
||||
typedef struct _HsaCounterProperties
|
||||
{
|
||||
HSAuint32 NumBlocks; // How many profilable block are available?
|
||||
// (sizes Blocks[] array below)
|
||||
HSAuint32 NumConcurrent; // How many blocks slots can be queried
|
||||
// concurrently by HW?
|
||||
HsaCounterBlockProperties Blocks[1]; // Start of block array
|
||||
// (NumBlocks elements total)
|
||||
} HsaCounterProperties;
|
||||
|
||||
typedef HSAuint64 HSATraceId;
|
||||
|
||||
typedef struct _HsaPmcTraceRoot
|
||||
{
|
||||
HSAuint64 TraceBufferMinSizeBytes;// (page aligned)
|
||||
HSAuint32 NumberOfPasses;
|
||||
HSATraceId TraceId;
|
||||
} HsaPmcTraceRoot;
|
||||
|
||||
#pragma pack(pop, hsakmttypes_h)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
#endif //_HSAKMTTYPES_H_
|
||||
Fai riferimento in un nuovo problema
Block a user