|
|
|
@@ -21,45 +21,43 @@ THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @file hcc_detail/hip_runtime_api.h
|
|
|
|
|
* @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
|
|
|
|
|
#include <hcc_detail/host_defines.h>
|
|
|
|
|
|
|
|
|
|
#if defined (__HCC__) && (__hcc_workweek__ < 1602)
|
|
|
|
|
#if defined (__HCC__) && (__hcc_workweek__ < 1602)
|
|
|
|
|
#error("This version of HIP requires a newer version of HCC.");
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// hip_api_hcc.h
|
|
|
|
|
// Contains C function APIs for HIP runtime.
|
|
|
|
|
// This file does not use any HCC builtins or special language extensions (-hc mode) ; those functions in hip_hcc.h.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Structure definitions:
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @addtogroup GlobalDefs More
|
|
|
|
|
* @addtogroup GlobalDefs More
|
|
|
|
|
* @{
|
|
|
|
|
*/
|
|
|
|
|
//! Flags that can be used with hipStreamCreateWithFlags
|
|
|
|
|
#define hipStreamDefault 0x00 ///< Default stream creation flags. These are used with hipStreamCreate().
|
|
|
|
|
#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
|
|
|
|
|
#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//! Flags that can be used with hipEventCreateWithFlags:
|
|
|
|
|
#define hipEventDefault 0x0 ///< Default flags
|
|
|
|
|
#define hipEventBlockingSync 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
|
|
|
|
|
#define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance.
|
|
|
|
|
#define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance.
|
|
|
|
|
#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @warning On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
|
|
|
|
|
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum hipFuncCache {
|
|
|
|
|
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
|
|
|
|
@@ -70,7 +68,7 @@ typedef enum hipFuncCache {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @warning On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
|
|
|
|
|
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
|
|
|
|
|
*/
|
|
|
|
|
typedef enum hipSharedMemConfig {
|
|
|
|
|
hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking.
|
|
|
|
@@ -118,11 +116,11 @@ enum hipMemcpyKind {
|
|
|
|
|
typedef struct ihipStream_t * hipStream_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
/*
|
|
|
|
|
* Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the surrounding hipEvent_t goes out-of-scope.
|
|
|
|
|
* This is handy for cases where the hipEvent_t goes out-of-scope but the true event is being written by some async queue or device */
|
|
|
|
|
typedef struct hipEvent_t {
|
|
|
|
|
struct ihipEvent_t *_handle;
|
|
|
|
|
struct ihipEvent_t *_handle;
|
|
|
|
|
} hipEvent_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -158,8 +156,8 @@ extern "C" {
|
|
|
|
|
* @{
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Blocks until the default device has completed all preceding requested tasks.
|
|
|
|
|
/**
|
|
|
|
|
* @brief Blocks until the default device has completed all preceding requested tasks.
|
|
|
|
|
*
|
|
|
|
|
* This function waits for all streams on the default device to complete execution, and then returns.
|
|
|
|
|
*
|
|
|
|
@@ -169,10 +167,10 @@ hipError_t hipDeviceSynchronize(void);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Destroy all resources and reset all state on the default device in the current process.
|
|
|
|
|
*
|
|
|
|
|
* Explicity destroy all memory allocations, events, and queues associated with the default device in the current process.
|
|
|
|
|
* Explicity destroy all memory allocations, events, and queues associated with the default device in the current process.
|
|
|
|
|
*
|
|
|
|
|
* This function will reset the device immmediately, and then return after all resources have been freed.
|
|
|
|
|
* The caller must ensure that the device is not being accessed by any other host threads from the active process when this function is called.
|
|
|
|
@@ -185,23 +183,23 @@ hipError_t hipDeviceReset(void) ;
|
|
|
|
|
/**
|
|
|
|
|
* @brief Set default device to be used for subsequent hip API calls from this thread.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] device Valid device in range 0...hipGetDeviceCount().
|
|
|
|
|
* @param[in] device Valid device in range 0...hipGetDeviceCount().
|
|
|
|
|
*
|
|
|
|
|
* Sets @p device as the default device for the calling host thread. Valid device id's are 0... (hipGetDeviceCount()-1).
|
|
|
|
|
*
|
|
|
|
|
* Many HIP APIs implicitly use the "default device" :
|
|
|
|
|
*
|
|
|
|
|
* - Any device memory subsequently allocated from this host thread (using hipMalloc) will be allocated on device.
|
|
|
|
|
* - Any device memory subsequently allocated from this host thread (using hipMalloc) will be allocated on device.
|
|
|
|
|
* - Any streams or events created from this host thread will be associated with device.
|
|
|
|
|
* - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device (unless a specific stream is specified,
|
|
|
|
|
* - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device (unless a specific stream is specified,
|
|
|
|
|
* in which case the device associated with that stream will be used).
|
|
|
|
|
*
|
|
|
|
|
* This function may be called from any host thread. Multiple host threads may use the same device.
|
|
|
|
|
* This function does no synchronization with the previous or new device, and has very little runtime overhead.
|
|
|
|
|
* This function may be called from any host thread. Multiple host threads may use the same device.
|
|
|
|
|
* This function does no synchronization with the previous or new device, and has very little runtime overhead.
|
|
|
|
|
* Applications can use hipSetDevice to quickly switch the default device before making a HIP runtime call which uses the default device.
|
|
|
|
|
*
|
|
|
|
|
* The default device is stored in thread-local-storage for each thread.
|
|
|
|
|
* Thread-pool implementations may inherit the default device of the previous thread. A good practice is to always call hipSetDevice
|
|
|
|
|
* The default device is stored in thread-local-storage for each thread.
|
|
|
|
|
* Thread-pool implementations may inherit the default device of the previous thread. A good practice is to always call hipSetDevice
|
|
|
|
|
* at the start of HIP coding sequency to establish a known standard device.
|
|
|
|
|
*
|
|
|
|
|
* @see hipGetDevice, hipGetDeviceCount
|
|
|
|
@@ -216,7 +214,7 @@ hipError_t hipSetDevice(int device);
|
|
|
|
|
*
|
|
|
|
|
* HIP maintains an default device for each thread using thread-local-storage.
|
|
|
|
|
* This device is used implicitly for HIP runtime APIs called by this thread.
|
|
|
|
|
* hipGetDevice returns in * @p device the default device for the calling host thread.
|
|
|
|
|
* hipGetDevice returns in * @p device the default device for the calling host thread.
|
|
|
|
|
*
|
|
|
|
|
* @see hipSetDevice, hipGetDevicesizeBytes
|
|
|
|
|
*/
|
|
|
|
@@ -224,10 +222,10 @@ hipError_t hipGetDevice(int *device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Return number of compute-capable devices.
|
|
|
|
|
* @param [output] count Returns number of compute-capable devices.
|
|
|
|
|
* @brief Return number of compute-capable devices.
|
|
|
|
|
* @param [output] count Returns number of compute-capable devices.
|
|
|
|
|
*
|
|
|
|
|
* Returns in @p *count the number of devices that have ability to run compute commands. If there are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice.
|
|
|
|
|
* Returns in @p *count the number of devices that have ability to run compute commands. If there are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice.
|
|
|
|
|
* If 1 or more devices can be found, then hipGetDeviceCount returns #hipSuccess.
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipGetDeviceCount(int *count);
|
|
|
|
@@ -241,7 +239,7 @@ hipError_t hipGetDeviceCount(int *count);
|
|
|
|
|
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Returns device properties.
|
|
|
|
|
* @brief Returns device properties.
|
|
|
|
|
*
|
|
|
|
|
* @param [out] prop written with device properties
|
|
|
|
|
* @param [in] device which device to query for information
|
|
|
|
@@ -256,8 +254,8 @@ hipError_t hipDeviceGetProperties(hipDeviceProp_t* prop, int device);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Set L1/Shared cache partition.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
|
|
|
|
@@ -265,8 +263,8 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Set Cache configuration for a specific function
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
|
|
|
|
@@ -274,31 +272,31 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Set Cache configuration for a specific function
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
|
|
|
|
|
hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
|
|
|
|
|
|
|
|
|
|
//---
|
|
|
|
|
//---
|
|
|
|
|
//Shared bank config functions:
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Get Shared memory bank configuration.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
|
|
|
|
|
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Set Shared memory bank configuration.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
|
|
|
|
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// end doxygen Device
|
|
|
|
@@ -320,21 +318,21 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
|
|
|
|
* Returns the last error that has been returned by any of the runtime calls in the same host thread,
|
|
|
|
|
* and then resets the saved error to #hipSuccess.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipGetLastError(void);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Return last error returned by any HIP runtime API call.
|
|
|
|
|
* @brief Return last error returned by any HIP runtime API call.
|
|
|
|
|
*
|
|
|
|
|
* @return #hipSuccess
|
|
|
|
|
*
|
|
|
|
|
* Returns the last error that has been returned by any of the runtime calls in the same host thread.
|
|
|
|
|
* Returns the last error that has been returned by any of the runtime calls in the same host thread.
|
|
|
|
|
* Unlike hipGetLastError, this function does not reset the saved error code.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipPeekAtLastError(void);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -364,7 +362,7 @@ const char *hipGetErrorString(hipError_t hip_error);
|
|
|
|
|
|
|
|
|
|
// end doxygen Error
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -380,7 +378,7 @@ const char *hipGetErrorString(hipError_t hip_error);
|
|
|
|
|
* - cudaStreamGetPriority
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Create an asynchronous stream.
|
|
|
|
|
*
|
|
|
|
|
* @param[in, out] stream Pointer to new stream
|
|
|
|
@@ -396,7 +394,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Create an asynchronous stream.
|
|
|
|
|
*
|
|
|
|
|
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the newly created stream.
|
|
|
|
@@ -405,13 +403,13 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
|
|
|
|
|
* Create a new asynchronous stream.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
static inline hipError_t hipStreamCreate(hipStream_t *stream)
|
|
|
|
|
static inline hipError_t hipStreamCreate(hipStream_t *stream)
|
|
|
|
|
{
|
|
|
|
|
return hipStreamCreateWithFlags(stream, hipStreamDefault);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Make the specified compute stream wait for an event
|
|
|
|
|
*
|
|
|
|
|
* @param[in] stream stream to make wait.
|
|
|
|
@@ -420,7 +418,7 @@ static inline hipError_t hipStreamCreate(hipStream_t *stream)
|
|
|
|
|
*
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidResourceHandle
|
|
|
|
|
*
|
|
|
|
|
* This function inserts a wait operation into the specified stream.
|
|
|
|
|
* This function inserts a wait operation into the specified stream.
|
|
|
|
|
* All future work submitted to @p stream will wait until @p event reports completion before beginning execution.
|
|
|
|
|
* This function is host-asynchronous and the function may return before the wait has completed.
|
|
|
|
|
*
|
|
|
|
@@ -429,10 +427,10 @@ static inline hipError_t hipStreamCreate(hipStream_t *stream)
|
|
|
|
|
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Wait for all commands in stream to complete.
|
|
|
|
|
*
|
|
|
|
|
* If the null stream is specified, this command blocks until all
|
|
|
|
|
* If the null stream is specified, this command blocks until all
|
|
|
|
|
*
|
|
|
|
|
* This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active or blocking.
|
|
|
|
|
*
|
|
|
|
@@ -443,17 +441,17 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
|
|
|
|
hipError_t hipStreamSynchronize(hipStream_t stream);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
/**
|
|
|
|
|
* @brief Destroys the specified stream.
|
|
|
|
|
*
|
|
|
|
|
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the newly created stream.
|
|
|
|
|
* @return #hipSuccess
|
|
|
|
|
*
|
|
|
|
|
* Destroys the specified stream.
|
|
|
|
|
* Destroys the specified stream.
|
|
|
|
|
*
|
|
|
|
|
* If commands are still executing on the specified stream, some may complete execution before the queue is deleted.
|
|
|
|
|
*
|
|
|
|
|
* The queue may be destroyed while some commands are still inflight, or may wait for all commands queued to the stream
|
|
|
|
|
* The queue may be destroyed while some commands are still inflight, or may wait for all commands queued to the stream
|
|
|
|
|
* before destroying it.
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipStreamDestroy(hipStream_t stream);
|
|
|
|
@@ -462,11 +460,11 @@ hipError_t hipStreamDestroy(hipStream_t stream);
|
|
|
|
|
/**
|
|
|
|
|
* @brief Return flags associated with this stream.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] stream
|
|
|
|
|
* @param[in] stream
|
|
|
|
|
* @param[in,out] flags
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle
|
|
|
|
|
*
|
|
|
|
|
* Return flags associated with this stream in *@p flags.
|
|
|
|
|
* Return flags associated with this stream in *@p flags.
|
|
|
|
|
*
|
|
|
|
|
* @see hipStreamCreateWithFlags
|
|
|
|
|
*
|
|
|
|
@@ -477,7 +475,7 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
|
|
|
|
|
|
|
|
|
|
// end doxygen Stream
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -505,7 +503,7 @@ hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create an event
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* @param[in] event Creates an event
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
@@ -522,22 +520,22 @@ static inline hipError_t hipEventCreate(hipEvent_t* event)
|
|
|
|
|
* @param[in] stream stream in which to record event.
|
|
|
|
|
* @returns #hipSuccess, #hipErrorInvalidResourceHandle
|
|
|
|
|
*
|
|
|
|
|
* hipEventQuery or hipEventSynchronize must be used to determine when the event
|
|
|
|
|
* hipEventQuery or hipEventSynchronize must be used to determine when the event
|
|
|
|
|
* transitions from "recording" (after eventRecord is called) to "recorded"
|
|
|
|
|
* (when timestamps are set, if requested).
|
|
|
|
|
*
|
|
|
|
|
* Events which are recorded in a non-NULL stream will transition to
|
|
|
|
|
* from recording to "recorded" state when they reach the head of
|
|
|
|
|
* the specified stream, after all previous
|
|
|
|
|
* commands in that stream have completed executing.
|
|
|
|
|
* from recording to "recorded" state when they reach the head of
|
|
|
|
|
* the specified stream, after all previous
|
|
|
|
|
* commands in that stream have completed executing.
|
|
|
|
|
*
|
|
|
|
|
* If hipEventRecord has been previously called aon event, then this call will overwrite any existing state in event.
|
|
|
|
|
*
|
|
|
|
|
* If this function is called on a an event that is currently being recorded, results are undefined - either
|
|
|
|
|
* If this function is called on a an event that is currently being recorded, results are undefined - either
|
|
|
|
|
* outstanding recording may save state into the event, and the order is not guaranteed. This shoul be avoided.
|
|
|
|
|
*
|
|
|
|
|
* @see hipEventElapsedTime
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
|
|
|
|
@@ -547,11 +545,11 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
|
|
|
|
|
* @brief Destroy the specified event.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] event Event to destroy.
|
|
|
|
|
* @return : #hipSuccess,
|
|
|
|
|
* @return : #hipSuccess,
|
|
|
|
|
*
|
|
|
|
|
* Releases memory associated with the event. If the event is recording but has not completed recording when hipEventDestroy is called,
|
|
|
|
|
* the function will return immediately and the completion_future resources will be released later, when the hipDevice is synchronized.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipEventDestroy(hipEvent_t event);
|
|
|
|
|
|
|
|
|
@@ -566,7 +564,7 @@ hipError_t hipEventDestroy(hipEvent_t event);
|
|
|
|
|
* TODO-hcc - This function needs to support hipEventBlockingSync parameter.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] event Event on which to wait.
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidResourceHandle,
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidResourceHandle,
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipEventSynchronize(hipEvent_t event);
|
|
|
|
@@ -580,15 +578,15 @@ hipError_t hipEventSynchronize(hipEvent_t event);
|
|
|
|
|
* @param[in] stop : Stop event.
|
|
|
|
|
* @return : #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotReady,
|
|
|
|
|
*
|
|
|
|
|
* Computes the elapsed time between two events. Time is computed in ms, with
|
|
|
|
|
* a resolution of approximately 1 us.
|
|
|
|
|
* Computes the elapsed time between two events. Time is computed in ms, with
|
|
|
|
|
* a resolution of approximately 1 us.
|
|
|
|
|
*
|
|
|
|
|
* Events which are recorded in a NULL stream will block until all commands
|
|
|
|
|
* on all other streams complete execution, and then record the timestamp.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* Events which are recorded in a non-NULL stream will record their timestamp
|
|
|
|
|
* when they reach the head of the specified stream, after all previous
|
|
|
|
|
* commands in that stream have completed executing. Thus the time that
|
|
|
|
|
* when they reach the head of the specified stream, after all previous
|
|
|
|
|
* commands in that stream have completed executing. Thus the time that
|
|
|
|
|
* the event recorded may be significantly after the host calls hipEventRecord.
|
|
|
|
|
*
|
|
|
|
|
* If hipEventRecord has not been called on either event, then #hipErrorInvalidResourceHandle is returned.
|
|
|
|
@@ -607,8 +605,8 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop);
|
|
|
|
|
*
|
|
|
|
|
* Query the status of the specified event. This function will return #hipErrorNotReady if all commands
|
|
|
|
|
* in the appropriate stream (specified to hipEventRecord) have completed. If that work has not completed,
|
|
|
|
|
* or if hipEventRecord was not called on the event, then cudaSuccess is returned.
|
|
|
|
|
*
|
|
|
|
|
* or if hipEventRecord was not called on the event, then hipSuccess is returned.
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipEventQuery(hipEvent_t event) ;
|
|
|
|
@@ -616,7 +614,7 @@ hipError_t hipEventQuery(hipEvent_t event) ;
|
|
|
|
|
|
|
|
|
|
// end doxygen Events
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -638,7 +636,7 @@ hipError_t hipEventQuery(hipEvent_t event) ;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Allocate memory on the default accelerator
|
|
|
|
|
* @brief Allocate memory on the default accelerator
|
|
|
|
|
*
|
|
|
|
|
* @param[out] ptr Pointer to the allocated memory
|
|
|
|
|
* @param[in] size Requested memory size
|
|
|
|
@@ -648,89 +646,118 @@ hipError_t hipMalloc(void** ptr, size_t size) ;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Allocate pinned host memory
|
|
|
|
|
*
|
|
|
|
|
* @param[in] ptr Pointer to the allocated host pinned memory
|
|
|
|
|
* @param[out] size Requested memory size
|
|
|
|
|
* @brief Allocate pinned host memory
|
|
|
|
|
*
|
|
|
|
|
* @param[out] ptr Pointer to the allocated host pinned memory
|
|
|
|
|
* @param[in] size Requested memory size
|
|
|
|
|
* @return Error code
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMallocHost(void** ptr, size_t size) ;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO-doc (error codes)
|
|
|
|
|
/**
|
|
|
|
|
* Free memory allocated by the hcc hip memory allocation API.
|
|
|
|
|
* @brief Free memory allocated by the hcc hip memory allocation API.
|
|
|
|
|
* This API performs an implicit hipDeviceSynchronize() call.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] ptr Pointer to memory to be freed
|
|
|
|
|
* @return Error code
|
|
|
|
|
* @return #hipSuccess, #hipErrorMemoryFree
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipFree(void* ptr);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO-doc (error codes)
|
|
|
|
|
/**
|
|
|
|
|
* Free memory allocated by the hcc hip host memory allocation API
|
|
|
|
|
* @brief Free memory allocated by the hcc hip host memory allocation API
|
|
|
|
|
*
|
|
|
|
|
* @param[in] ptr Pointer to memory to be freed
|
|
|
|
|
* @return Error code
|
|
|
|
|
* @return #hipSuccess, #hipErrorMemoryFree
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipFreeHost(void* ptr);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO-doc (error codes)
|
|
|
|
|
/**
|
|
|
|
|
* Copy data from src to dst. It supports memory from host to device,
|
|
|
|
|
* @brief Copy data from src to dst.
|
|
|
|
|
*
|
|
|
|
|
* It supports memory from host to device,
|
|
|
|
|
* device to host, device to device and host to host
|
|
|
|
|
* The src and dst must not overlap.
|
|
|
|
|
* If the
|
|
|
|
|
* TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now
|
|
|
|
|
*
|
|
|
|
|
* This function is host-synchronous for most inputs.
|
|
|
|
|
* It uses the default NULL stream and will synchronize with other blocking streams on the same device.
|
|
|
|
|
*
|
|
|
|
|
* @param[ being copy to
|
|
|
|
|
* @param[out] dst Data being copy to
|
|
|
|
|
* @param[in] src Data being copy from
|
|
|
|
|
* @param[in] sizeBytes Data size in bytes
|
|
|
|
|
* @param[in] copyType Memory copy type
|
|
|
|
|
* @return Error code
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol.
|
|
|
|
|
*
|
|
|
|
|
* The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string,
|
|
|
|
|
* naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
|
|
|
|
|
* TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now.
|
|
|
|
|
*
|
|
|
|
|
* @param[in] symbolName - Symbol destination on device
|
|
|
|
|
* @param[in] src - Data being copy from
|
|
|
|
|
* @param[in] sizeBytes - Data size in bytes
|
|
|
|
|
* @param[in] offset - Offset from start of symbol in bytes
|
|
|
|
|
* @param[in] kind - Type of transfer
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO-doc (error codes)
|
|
|
|
|
/**
|
|
|
|
|
* Copy data from src to dst asynchronously. It supports memory from host to device,
|
|
|
|
|
* @brief Copy data from src to dst asynchronously.
|
|
|
|
|
*
|
|
|
|
|
* TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now
|
|
|
|
|
*
|
|
|
|
|
* @param[out] dst Data being copy to
|
|
|
|
|
* @param[in] src Data being copy from
|
|
|
|
|
* @param[in] sizeBytes Data size in bytes
|
|
|
|
|
* @param[in] accelerator_view Accelerator view which the copy is being enqueued
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Copy data from src to dst asynchronously.
|
|
|
|
|
*
|
|
|
|
|
* It supports memory from host to device,
|
|
|
|
|
* device to host, device to device and host to host.
|
|
|
|
|
*
|
|
|
|
|
* @param[out] dst Data being copy to
|
|
|
|
|
* @param[in] src Data being copy from
|
|
|
|
|
* @param[in] sizeBytes Data size in bytes
|
|
|
|
|
* @param[in] accelerator_view Accelerator view which the copy is being enqueued
|
|
|
|
|
* @return Error code
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO-doc
|
|
|
|
|
/*
|
|
|
|
|
* This function is host-asynchronous and may return before the memset operation completes.
|
|
|
|
|
* Same as hipMemsetAsync with null stream.
|
|
|
|
|
*
|
|
|
|
|
* */
|
|
|
|
|
hipError_t hipMemset(void* dst, int value, size_t sizeBytes );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value.
|
|
|
|
|
*
|
|
|
|
|
* hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the memset is complete.
|
|
|
|
|
* The operation can optionally be associated to a stream by passing a non-zero stream argument.
|
|
|
|
|
* If stream is non-zero, the operation may overlap with operations in other streams.
|
|
|
|
|
*
|
|
|
|
|
* @param[out] dst Pointer to device memory
|
|
|
|
|
* @param[in] value - Value to set for each byte of specified memory
|
|
|
|
|
* @param[in] sizeBytes - Size in bytes to set
|
|
|
|
|
* @param[in] stream - Stream identifier
|
|
|
|
|
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t = 0 );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* @brief Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
|
|
|
|
|
* @brief Query memory info.
|
|
|
|
|
* Return snapshot of free memory, and total allocatable memory on the device.
|
|
|
|
|
*
|
|
|
|
|
* Returns in *free a snapshot of the current free memory o
|
|
|
|
|
**/
|
|
|
|
@@ -738,7 +765,7 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
|
|
|
|
|
|
|
|
|
|
// doxygen end Memory
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -759,24 +786,64 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
|
|
|
|
|
* @param [in] peerDevice
|
|
|
|
|
*
|
|
|
|
|
* Returns "1" in @p canAccessPeer if the specified @p device is capable
|
|
|
|
|
* of directly accessing memory phyically located on peerDevice , or "0" if not.
|
|
|
|
|
* of directly accessing memory physically located on peerDevice , or "0" if not.
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice );
|
|
|
|
|
|
|
|
|
|
// TODO-DOC
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Disables registering memory on peerDevice for direct access from the current device.
|
|
|
|
|
*
|
|
|
|
|
* If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered.
|
|
|
|
|
* Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device.
|
|
|
|
|
*
|
|
|
|
|
* @param [in] peerDevice
|
|
|
|
|
* TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown
|
|
|
|
|
* Returns #hipSuccess, #hipErrorUnknown
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceDisablePeerAccess ( int peerDevice );
|
|
|
|
|
|
|
|
|
|
// TODO-DOC
|
|
|
|
|
/**
|
|
|
|
|
* @brief Enables registering memory on peerDevice for direct access from the current device.
|
|
|
|
|
*
|
|
|
|
|
* @param [in] peerDevice
|
|
|
|
|
* @param [in] flags
|
|
|
|
|
*
|
|
|
|
|
* TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown
|
|
|
|
|
* Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags );
|
|
|
|
|
|
|
|
|
|
// TODO-DOC
|
|
|
|
|
/**
|
|
|
|
|
* @brief Copies memory from one device to memory on another device.
|
|
|
|
|
*
|
|
|
|
|
* @param [out] dst - Destination device pointer.
|
|
|
|
|
* @param [in] dstDevice - Destination device
|
|
|
|
|
* @param [in] src - Source device pointer
|
|
|
|
|
* @param [in] srcDevice - Source device
|
|
|
|
|
* @param [in] sizeBytes - Size of memory copy in bytes
|
|
|
|
|
*
|
|
|
|
|
* Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes );
|
|
|
|
|
|
|
|
|
|
// TODO-DOC
|
|
|
|
|
/**
|
|
|
|
|
* @brief Copies memory from one device to memory on another device.
|
|
|
|
|
*
|
|
|
|
|
* @param [out] dst - Destination device pointer.
|
|
|
|
|
* @param [in] dstDevice - Destination device
|
|
|
|
|
* @param [in] src - Source device pointer
|
|
|
|
|
* @param [in] srcDevice - Source device
|
|
|
|
|
* @param [in] sizeBytes - Size of memory copy in bytes
|
|
|
|
|
* @param [in] stream - Stream identifier
|
|
|
|
|
*
|
|
|
|
|
* Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 );
|
|
|
|
|
// doxygen end PeerToPeer
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -788,14 +855,14 @@ hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Returns the approximate HIP driver versin.
|
|
|
|
|
/**
|
|
|
|
|
* @brief Returns the approximate HIP driver version.
|
|
|
|
|
*
|
|
|
|
|
* @warning The HIP feature set does not correpond to an exact CUDA SDK driver revision.
|
|
|
|
|
* @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision.
|
|
|
|
|
* This function always set *driverVersion to 4 as an approximation though HIP supports
|
|
|
|
|
* some features which were introduced in later CUDA SDK revisions.
|
|
|
|
|
* some features which were introduced in later CUDA SDK revisions.
|
|
|
|
|
* HIP apps code should not rely on the driver revision number here and should
|
|
|
|
|
* use arch feature flags to test device capabiliies or conditional compilation.
|
|
|
|
|
* use arch feature flags to test device capabilities or conditional compilation.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
@@ -804,7 +871,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
|
|
|
|
|
|
// doxygen end Version Management
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -813,9 +880,9 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
|
*-------------------------------------------------------------------------------------------------
|
|
|
|
|
* @defgroup Profiler Control
|
|
|
|
|
* @{
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* The cudaProfilerInitialize API format for "configFile" is not supported.
|
|
|
|
|
*
|
|
|
|
|
* @warning The cudaProfilerInitialize API format for "configFile" is not supported.
|
|
|
|
|
*
|
|
|
|
|
* On AMD platforms, hipProfilerStart and hipProfilerStop require installation of AMD's GPU
|
|
|
|
|
* perf counter API and defining GPU_PERF
|
|
|
|
@@ -823,7 +890,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -839,7 +906,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
|
*-------------------------------------------------------------------------------------------------
|
|
|
|
|
* @defgroup HCC_Specific HCC-Specific Accessors
|
|
|
|
|
* @{
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* The following calls are only supported when compiler HIP with HCC.
|
|
|
|
|
* To produce portable code, use of these calls must be guarded #ifdef checks:
|
|
|
|
|
* @code
|
|
|
|
@@ -851,15 +918,15 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifdef __HCC__
|
|
|
|
|
#ifdef __HCC__
|
|
|
|
|
#include <hc.hpp>
|
|
|
|
|
/**
|
|
|
|
|
* @brief Return hc::acclerator associated with the specified deviceId
|
|
|
|
|
* @brief Return hc::accelerator associated with the specified deviceId
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Return hc::acclerator_view associated with the specified stream
|
|
|
|
|
* @brief Return hc::accelerator_view associated with the specified stream
|
|
|
|
|
*/
|
|
|
|
|
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
|
|
|
|
|
#endif
|
|
|
|
@@ -867,7 +934,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
|
|
|
|
|
|
|
|
|
|
// end-group HCC_Specific
|
|
|
|
|
/**
|
|
|
|
|
* @}
|
|
|
|
|
* @}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|