minor doc touchup

This commit is contained in:
Ben Sander
2016-02-08 22:11:11 -06:00
6 changed files with 269 additions and 190 deletions
+10 -10
View File
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/**
* @file hcc_detail/hip_runtime.h
*
* @brief Contains definitions of APIs for HIP runtime.
*/
#pragma once
@@ -285,42 +285,42 @@ __device__ inline unsigned long long int atomicXor(unsigned long long int* addre
#include <hc.hpp>
// integer intrinsic function __poc __clz __ffs __brev
__device__ inline unsigned int __popc( unsigned int input)
__device__ inline unsigned int __popc( unsigned int input)
{
return hc::__popcount_u32_b32( input);
}
__device__ inline unsigned int __popcll( unsigned long long int input)
__device__ inline unsigned int __popcll( unsigned long long int input)
{
return hc::__popcount_u32_b64(input);
}
__device__ inline unsigned int __clz(unsigned int input)
__device__ inline unsigned int __clz(unsigned int input)
{
return hc::__firstbit_u32_u32( input);
}
__device__ inline unsigned int __clzll(unsigned long long int input)
__device__ inline unsigned int __clzll(unsigned long long int input)
{
return hc::__firstbit_u32_u64( input);
}
__device__ inline unsigned int __clz(int input)
__device__ inline unsigned int __clz(int input)
{
return hc::__firstbit_u32_s32( input);
}
__device__ inline unsigned int __clzll(long long int input)
__device__ inline unsigned int __clzll(long long int input)
{
return hc::__firstbit_u32_s64( input);
}
__device__ inline unsigned int __ffs(unsigned int input)
__device__ inline unsigned int __ffs(unsigned int input)
{
return hc::__lastbit_u32_u32( input)+1;
}
__device__ inline unsigned int __ffsll(unsigned long long int input)
__device__ inline unsigned int __ffsll(unsigned long long int input)
{
return hc::__lastbit_u32_u64( input)+1;
}
@@ -340,7 +340,7 @@ __device__ inline unsigned int __brev( unsigned int input)
return hc::__bitrev_b32( input);
}
__device__ inline unsigned long long int __brevll( unsigned long long int input)
__device__ inline unsigned long long int __brevll( unsigned long long int input)
{
return hc::__bitrev_b64( input);
}
+203 -136
View File
@@ -21,45 +21,43 @@ THE SOFTWARE.
*/
#pragma once
/**
* @file hcc_detail/hip_runtime_api.h
* @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.
*/
#include <stdint.h>
#include <stddef.h>
#include <hcc_detail/host_defines.h>
#if defined (__HCC__) && (__hcc_workweek__ < 1602)
#if defined (__HCC__) && (__hcc_workweek__ < 1602)
#error("This version of HIP requires a newer version of HCC.");
#endif
// hip_api_hcc.h
// Contains C function APIs for HIP runtime.
// This file does not use any HCC builtins or special language extensions (-hc mode) ; those functions in hip_hcc.h.
// Structure definitions:
#ifdef __cplusplus
extern "C" {
#endif
/**
* @addtogroup GlobalDefs More
* @addtogroup GlobalDefs More
* @{
*/
//! Flags that can be used with hipStreamCreateWithFlags
#define hipStreamDefault 0x00 ///< Default stream creation flags. These are used with hipStreamCreate().
#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
//! Flags that can be used with hipEventCreateWithFlags:
#define hipEventDefault 0x0 ///< Default flags
#define hipEventBlockingSync 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
#define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance.
#define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance.
#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP.
/**
* @warning On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipFuncCache {
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
@@ -70,7 +68,7 @@ typedef enum hipFuncCache {
/**
* @warning On AMD devices and recent NVIDIA devices, these hints and controls are ignored.
* @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipSharedMemConfig {
hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking.
@@ -118,11 +116,11 @@ enum hipMemcpyKind {
typedef struct ihipStream_t * hipStream_t;
/*
/*
* Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the surrounding hipEvent_t goes out-of-scope.
* This is handy for cases where the hipEvent_t goes out-of-scope but the true event is being written by some async queue or device */
typedef struct hipEvent_t {
struct ihipEvent_t *_handle;
struct ihipEvent_t *_handle;
} hipEvent_t;
@@ -158,8 +156,8 @@ extern "C" {
* @{
*/
/**
* @brief Blocks until the default device has completed all preceding requested tasks.
/**
* @brief Blocks until the default device has completed all preceding requested tasks.
*
* This function waits for all streams on the default device to complete execution, and then returns.
*
@@ -169,10 +167,10 @@ hipError_t hipDeviceSynchronize(void);
/**
/**
* @brief Destroy all resources and reset all state on the default device in the current process.
*
* Explicity destroy all memory allocations, events, and queues associated with the default device in the current process.
* Explicity destroy all memory allocations, events, and queues associated with the default device in the current process.
*
* This function will reset the device immmediately, and then return after all resources have been freed.
* The caller must ensure that the device is not being accessed by any other host threads from the active process when this function is called.
@@ -185,23 +183,23 @@ hipError_t hipDeviceReset(void) ;
/**
* @brief Set default device to be used for subsequent hip API calls from this thread.
*
* @param[in] device Valid device in range 0...hipGetDeviceCount().
* @param[in] device Valid device in range 0...hipGetDeviceCount().
*
* Sets @p device as the default device for the calling host thread. Valid device id's are 0... (hipGetDeviceCount()-1).
*
* Many HIP APIs implicitly use the "default device" :
*
* - Any device memory subsequently allocated from this host thread (using hipMalloc) will be allocated on device.
* - Any device memory subsequently allocated from this host thread (using hipMalloc) will be allocated on device.
* - Any streams or events created from this host thread will be associated with device.
* - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device (unless a specific stream is specified,
* - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device (unless a specific stream is specified,
* in which case the device associated with that stream will be used).
*
* This function may be called from any host thread. Multiple host threads may use the same device.
* This function does no synchronization with the previous or new device, and has very little runtime overhead.
* This function may be called from any host thread. Multiple host threads may use the same device.
* This function does no synchronization with the previous or new device, and has very little runtime overhead.
* Applications can use hipSetDevice to quickly switch the default device before making a HIP runtime call which uses the default device.
*
* The default device is stored in thread-local-storage for each thread.
* Thread-pool implementations may inherit the default device of the previous thread. A good practice is to always call hipSetDevice
* The default device is stored in thread-local-storage for each thread.
* Thread-pool implementations may inherit the default device of the previous thread. A good practice is to always call hipSetDevice
* at the start of HIP coding sequency to establish a known standard device.
*
* @see hipGetDevice, hipGetDeviceCount
@@ -216,7 +214,7 @@ hipError_t hipSetDevice(int device);
*
* HIP maintains an default device for each thread using thread-local-storage.
* This device is used implicitly for HIP runtime APIs called by this thread.
* hipGetDevice returns in * @p device the default device for the calling host thread.
* hipGetDevice returns in * @p device the default device for the calling host thread.
*
* @see hipSetDevice, hipGetDevicesizeBytes
*/
@@ -224,10 +222,10 @@ hipError_t hipGetDevice(int *device);
/**
* @brief Return number of compute-capable devices.
* @param [output] count Returns number of compute-capable devices.
* @brief Return number of compute-capable devices.
* @param [output] count Returns number of compute-capable devices.
*
* Returns in @p *count the number of devices that have ability to run compute commands. If there are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice.
* Returns in @p *count the number of devices that have ability to run compute commands. If there are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice.
* If 1 or more devices can be found, then hipGetDeviceCount returns #hipSuccess.
*/
hipError_t hipGetDeviceCount(int *count);
@@ -241,7 +239,7 @@ hipError_t hipGetDeviceCount(int *count);
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device);
/**
* @brief Returns device properties.
* @brief Returns device properties.
*
* @param [out] prop written with device properties
* @param [in] device which device to query for information
@@ -256,8 +254,8 @@ hipError_t hipDeviceGetProperties(hipDeviceProp_t* prop, int device);
/**
* @brief Set L1/Shared cache partition.
*
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
@@ -265,8 +263,8 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
/**
* @brief Set Cache configuration for a specific function
*
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
@@ -274,31 +272,31 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
/**
* @brief Set Cache configuration for a specific function
*
* Note: AMD devices and recent NVIDIA GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
//---
//---
//Shared bank config functions:
/**
* @brief Get Shared memory bank configuration.
*
* Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
*/
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
/**
* @brief Set Shared memory bank configuration.
*
* Note: AMD devices and recent NVIDIA GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
*/
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
// end doxygen Device
@@ -320,21 +318,21 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
* Returns the last error that has been returned by any of the runtime calls in the same host thread,
* and then resets the saved error to #hipSuccess.
*
*/
*/
hipError_t hipGetLastError(void);
/**
* @brief Return last error returned by any HIP runtime API call.
* @brief Return last error returned by any HIP runtime API call.
*
* @return #hipSuccess
*
* Returns the last error that has been returned by any of the runtime calls in the same host thread.
* Returns the last error that has been returned by any of the runtime calls in the same host thread.
* Unlike hipGetLastError, this function does not reset the saved error code.
*
*
*
*/
*/
hipError_t hipPeekAtLastError(void);
@@ -364,7 +362,7 @@ const char *hipGetErrorString(hipError_t hip_error);
// end doxygen Error
/**
* @}
* @}
*/
@@ -380,7 +378,7 @@ const char *hipGetErrorString(hipError_t hip_error);
* - cudaStreamGetPriority
*/
/**
/**
* @brief Create an asynchronous stream.
*
* @param[in, out] stream Pointer to new stream
@@ -396,7 +394,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
/**
/**
* @brief Create an asynchronous stream.
*
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the newly created stream.
@@ -405,13 +403,13 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags);
* Create a new asynchronous stream.
*
*/
static inline hipError_t hipStreamCreate(hipStream_t *stream)
static inline hipError_t hipStreamCreate(hipStream_t *stream)
{
return hipStreamCreateWithFlags(stream, hipStreamDefault);
}
/**
/**
* @brief Make the specified compute stream wait for an event
*
* @param[in] stream stream to make wait.
@@ -420,7 +418,7 @@ static inline hipError_t hipStreamCreate(hipStream_t *stream)
*
* @return #hipSuccess, #hipErrorInvalidResourceHandle
*
* This function inserts a wait operation into the specified stream.
* This function inserts a wait operation into the specified stream.
* All future work submitted to @p stream will wait until @p event reports completion before beginning execution.
* This function is host-asynchronous and the function may return before the wait has completed.
*
@@ -429,10 +427,10 @@ static inline hipError_t hipStreamCreate(hipStream_t *stream)
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
/**
/**
* @brief Wait for all commands in stream to complete.
*
* If the null stream is specified, this command blocks until all
* If the null stream is specified, this command blocks until all
*
* This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active or blocking.
*
@@ -443,17 +441,17 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
hipError_t hipStreamSynchronize(hipStream_t stream);
/**
/**
* @brief Destroys the specified stream.
*
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the newly created stream.
* @return #hipSuccess
*
* Destroys the specified stream.
* Destroys the specified stream.
*
* If commands are still executing on the specified stream, some may complete execution before the queue is deleted.
*
* The queue may be destroyed while some commands are still inflight, or may wait for all commands queued to the stream
* The queue may be destroyed while some commands are still inflight, or may wait for all commands queued to the stream
* before destroying it.
*/
hipError_t hipStreamDestroy(hipStream_t stream);
@@ -462,11 +460,11 @@ hipError_t hipStreamDestroy(hipStream_t stream);
/**
* @brief Return flags associated with this stream.
*
* @param[in] stream
* @param[in] stream
* @param[in,out] flags
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle
*
* Return flags associated with this stream in *@p flags.
* Return flags associated with this stream in *@p flags.
*
* @see hipStreamCreateWithFlags
*
@@ -477,7 +475,7 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags);
// end doxygen Stream
/**
* @}
* @}
*/
@@ -505,7 +503,7 @@ hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);
/**
* Create an event
*
*
* @param[in] event Creates an event
*
*/
@@ -522,22 +520,22 @@ static inline hipError_t hipEventCreate(hipEvent_t* event)
* @param[in] stream stream in which to record event.
* @returns #hipSuccess, #hipErrorInvalidResourceHandle
*
* hipEventQuery or hipEventSynchronize must be used to determine when the event
* hipEventQuery or hipEventSynchronize must be used to determine when the event
* transitions from "recording" (after eventRecord is called) to "recorded"
* (when timestamps are set, if requested).
*
* Events which are recorded in a non-NULL stream will transition to
* from recording to "recorded" state when they reach the head of
* the specified stream, after all previous
* commands in that stream have completed executing.
* from recording to "recorded" state when they reach the head of
* the specified stream, after all previous
* commands in that stream have completed executing.
*
* If hipEventRecord has been previously called aon event, then this call will overwrite any existing state in event.
*
* If this function is called on a an event that is currently being recorded, results are undefined - either
* If this function is called on a an event that is currently being recorded, results are undefined - either
* outstanding recording may save state into the event, and the order is not guaranteed. This shoul be avoided.
*
* @see hipEventElapsedTime
*
*
*/
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
@@ -547,11 +545,11 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL);
* @brief Destroy the specified event.
*
* @param[in] event Event to destroy.
* @return : #hipSuccess,
* @return : #hipSuccess,
*
* Releases memory associated with the event. If the event is recording but has not completed recording when hipEventDestroy is called,
* the function will return immediately and the completion_future resources will be released later, when the hipDevice is synchronized.
*
*
*/
hipError_t hipEventDestroy(hipEvent_t event);
@@ -566,7 +564,7 @@ hipError_t hipEventDestroy(hipEvent_t event);
* TODO-hcc - This function needs to support hipEventBlockingSync parameter.
*
* @param[in] event Event on which to wait.
* @return #hipSuccess, #hipErrorInvalidResourceHandle,
* @return #hipSuccess, #hipErrorInvalidResourceHandle,
*
*/
hipError_t hipEventSynchronize(hipEvent_t event);
@@ -580,15 +578,15 @@ hipError_t hipEventSynchronize(hipEvent_t event);
* @param[in] stop : Stop event.
* @return : #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotReady,
*
* Computes the elapsed time between two events. Time is computed in ms, with
* a resolution of approximately 1 us.
* Computes the elapsed time between two events. Time is computed in ms, with
* a resolution of approximately 1 us.
*
* Events which are recorded in a NULL stream will block until all commands
* on all other streams complete execution, and then record the timestamp.
*
*
* Events which are recorded in a non-NULL stream will record their timestamp
* when they reach the head of the specified stream, after all previous
* commands in that stream have completed executing. Thus the time that
* when they reach the head of the specified stream, after all previous
* commands in that stream have completed executing. Thus the time that
* the event recorded may be significantly after the host calls hipEventRecord.
*
* If hipEventRecord has not been called on either event, then #hipErrorInvalidResourceHandle is returned.
@@ -607,8 +605,8 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop);
*
* Query the status of the specified event. This function will return #hipErrorNotReady if all commands
* in the appropriate stream (specified to hipEventRecord) have completed. If that work has not completed,
* or if hipEventRecord was not called on the event, then cudaSuccess is returned.
*
* or if hipEventRecord was not called on the event, then hipSuccess is returned.
*
*
*/
hipError_t hipEventQuery(hipEvent_t event) ;
@@ -616,7 +614,7 @@ hipError_t hipEventQuery(hipEvent_t event) ;
// end doxygen Events
/**
* @}
* @}
*/
@@ -638,7 +636,7 @@ hipError_t hipEventQuery(hipEvent_t event) ;
/**
* Allocate memory on the default accelerator
* @brief Allocate memory on the default accelerator
*
* @param[out] ptr Pointer to the allocated memory
* @param[in] size Requested memory size
@@ -648,89 +646,118 @@ hipError_t hipMalloc(void** ptr, size_t size) ;
/**
* Allocate pinned host memory
*
* @param[in] ptr Pointer to the allocated host pinned memory
* @param[out] size Requested memory size
* @brief Allocate pinned host memory
*
* @param[out] ptr Pointer to the allocated host pinned memory
* @param[in] size Requested memory size
* @return Error code
*/
hipError_t hipMallocHost(void** ptr, size_t size) ;
// TODO-doc (error codes)
/**
* Free memory allocated by the hcc hip memory allocation API.
* @brief Free memory allocated by the hcc hip memory allocation API.
* This API performs an implicit hipDeviceSynchronize() call.
*
* @param[in] ptr Pointer to memory to be freed
* @return Error code
* @return #hipSuccess, #hipErrorMemoryFree
*/
hipError_t hipFree(void* ptr);
// TODO-doc (error codes)
/**
* Free memory allocated by the hcc hip host memory allocation API
* @brief Free memory allocated by the hcc hip host memory allocation API
*
* @param[in] ptr Pointer to memory to be freed
* @return Error code
* @return #hipSuccess, #hipErrorMemoryFree
*/
hipError_t hipFreeHost(void* ptr);
// TODO-doc (error codes)
/**
* Copy data from src to dst. It supports memory from host to device,
* @brief Copy data from src to dst.
*
* It supports memory from host to device,
* device to host, device to device and host to host
* The src and dst must not overlap.
* If the
* TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now
*
* This function is host-synchronous for most inputs.
* It uses the default NULL stream and will synchronize with other blocking streams on the same device.
*
* @param[ being copy to
* @param[out] dst Data being copy to
* @param[in] src Data being copy from
* @param[in] sizeBytes Data size in bytes
* @param[in] copyType Memory copy type
* @return Error code
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
*/
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
/**
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area pointed to by @p offset bytes from the start of symbol @p symbol.
*
* The memory areas may not overlap. Symbol can either be a variable that resides in global or constant memory space, or it can be a character string,
* naming a variable that resides in global or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
* TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use hipErrorUnknown for now.
*
* @param[in] symbolName - Symbol destination on device
* @param[in] src - Data being copy from
* @param[in] sizeBytes - Data size in bytes
* @param[in] offset - Offset from start of symbol in bytes
* @param[in] kind - Type of transfer
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
*/
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t sizeBytes, size_t offset, hipMemcpyKind kind);
// TODO-doc (error codes)
/**
* Copy data from src to dst asynchronously. It supports memory from host to device,
* @brief Copy data from src to dst asynchronously.
*
* TODO: cudaErrorInvalidMemcpyDirection error code is not supported right now, use hipErrorUnknown for now
*
* @param[out] dst Data being copy to
* @param[in] src Data being copy from
* @param[in] sizeBytes Data size in bytes
* @param[in] accelerator_view Accelerator view which the copy is being enqueued
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
*/
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
/**
* @brief Copy data from src to dst asynchronously.
*
* It supports memory from host to device,
* device to host, device to device and host to host.
*
* @param[out] dst Data being copy to
* @param[in] src Data being copy from
* @param[in] sizeBytes Data size in bytes
* @param[in] accelerator_view Accelerator view which the copy is being enqueued
* @return Error code
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
*/
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream=0);
// TODO-doc
/*
* This function is host-asynchronous and may return before the memset operation completes.
* Same as hipMemsetAsync with null stream.
*
* */
hipError_t hipMemset(void* dst, int value, size_t sizeBytes );
/**
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant byte value value.
*
* hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the memset is complete.
* The operation can optionally be associated to a stream by passing a non-zero stream argument.
* If stream is non-zero, the operation may overlap with operations in other streams.
*
* @param[out] dst Pointer to device memory
* @param[in] value - Value to set for each byte of specified memory
* @param[in] sizeBytes - Size in bytes to set
* @param[in] stream - Stream identifier
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
*/
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t = 0 );
/*
* @brief Query memory info. Return snapshot of free memory, and total allocatable memory on the device.
* @brief Query memory info.
* Return snapshot of free memory, and total allocatable memory on the device.
*
* Returns in *free a snapshot of the current free memory o
**/
@@ -738,7 +765,7 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
// doxygen end Memory
/**
* @}
* @}
*/
@@ -759,24 +786,64 @@ hipError_t hipMemGetInfo (size_t * free, size_t * total) ;
* @param [in] peerDevice
*
* Returns "1" in @p canAccessPeer if the specified @p device is capable
* of directly accessing memory phyically located on peerDevice , or "0" if not.
* of directly accessing memory physically located on peerDevice , or "0" if not.
*/
hipError_t hipDeviceCanAccessPeer ( int* canAccessPeer, int device, int peerDevice );
// TODO-DOC
/**
* @brief Disables registering memory on peerDevice for direct access from the current device.
*
* If there are any allocations on peerDevice which were registered in the current device using hipPeerRegister() then these allocations will be automatically unregistered.
* Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been enabled from the current device.
*
* @param [in] peerDevice
* TODO:cudaErrorPeerAccessNotEnabled and cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown
* Returns #hipSuccess, #hipErrorUnknown
*/
hipError_t hipDeviceDisablePeerAccess ( int peerDevice );
// TODO-DOC
/**
* @brief Enables registering memory on peerDevice for direct access from the current device.
*
* @param [in] peerDevice
* @param [in] flags
*
* TODO:cudaErrorInvalidDevice error not supported in HIP, return hipErrorUnknown
* Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorUnknown
*/
hipError_t hipDeviceEnablePeerAccess ( int peerDevice, unsigned int flags );
// TODO-DOC
/**
* @brief Copies memory from one device to memory on another device.
*
* @param [out] dst - Destination device pointer.
* @param [in] dstDevice - Destination device
* @param [in] src - Source device pointer
* @param [in] srcDevice - Source device
* @param [in] sizeBytes - Size of memory copy in bytes
*
* Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
*/
hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes );
// TODO-DOC
/**
* @brief Copies memory from one device to memory on another device.
*
* @param [out] dst - Destination device pointer.
* @param [in] dstDevice - Destination device
* @param [in] src - Source device pointer
* @param [in] srcDevice - Source device
* @param [in] sizeBytes - Size of memory copy in bytes
* @param [in] stream - Stream identifier
*
* Returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice
*/
hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t sizeBytes, hipStream_t stream=0 );
// doxygen end PeerToPeer
/**
* @}
* @}
*/
@@ -788,14 +855,14 @@ hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int
*
*/
/**
* @brief Returns the approximate HIP driver versin.
/**
* @brief Returns the approximate HIP driver version.
*
* @warning The HIP feature set does not correpond to an exact CUDA SDK driver revision.
* @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision.
* This function always set *driverVersion to 4 as an approximation though HIP supports
* some features which were introduced in later CUDA SDK revisions.
* some features which were introduced in later CUDA SDK revisions.
* HIP apps code should not rely on the driver revision number here and should
* use arch feature flags to test device capabiliies or conditional compilation.
* use arch feature flags to test device capabilities or conditional compilation.
*
*/
hipError_t hipDriverGetVersion(int *driverVersion) ;
@@ -804,7 +871,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
// doxygen end Version Management
/**
* @}
* @}
*/
@@ -813,9 +880,9 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
*-------------------------------------------------------------------------------------------------
* @defgroup Profiler Control
* @{
*
*
* The cudaProfilerInitialize API format for "configFile" is not supported.
*
* @warning The cudaProfilerInitialize API format for "configFile" is not supported.
*
* On AMD platforms, hipProfilerStart and hipProfilerStop require installation of AMD's GPU
* perf counter API and defining GPU_PERF
@@ -823,7 +890,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
/**
* @}
* @}
*/
@@ -839,7 +906,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
*-------------------------------------------------------------------------------------------------
* @defgroup HCC_Specific HCC-Specific Accessors
* @{
*
*
* The following calls are only supported when compiler HIP with HCC.
* To produce portable code, use of these calls must be guarded #ifdef checks:
* @code
@@ -851,15 +918,15 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
*
*/
#ifdef __HCC__
#ifdef __HCC__
#include <hc.hpp>
/**
* @brief Return hc::acclerator associated with the specified deviceId
* @brief Return hc::accelerator associated with the specified deviceId
*/
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc);
/**
* @brief Return hc::acclerator_view associated with the specified stream
* @brief Return hc::accelerator_view associated with the specified stream
*/
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av);
#endif
@@ -867,7 +934,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
// end-group HCC_Specific
/**
* @}
* @}
*/
+21 -21
View File
@@ -21,11 +21,11 @@ THE SOFTWARE.
*/
#pragma once
/**
* @file hip_kalmar_texture.h
* @file hcc_detail/hip_texture.h
* @brief HIP C++ Texture API for hcc compiler
*/
#include <limits.h>
#include <limits.h>
#include <hip_runtime.h>
@@ -38,13 +38,13 @@ typedef struct hipChannelFormatDesc {
int _dummy;
} hipChannelFormatDesc;
typedef enum hipTextureReadMode
typedef enum hipTextureReadMode
{
hipReadModeElementType, ///< Read texture as specified element type
//! @warning cudaReadModeNormalizedFloat is not supported.
} hipTextureReadMode;
typedef enum hipTextureFilterMode
typedef enum hipTextureFilterMode
{
hipFilterModePoint, ///< Point filter mode.
//! @warning cudaFilterModeLinear is not supported.
@@ -86,26 +86,26 @@ struct texture : public textureReference {
* @defgroup Texture Texture Reference Management
* @{
*
*
*
* @warning The HIP texture API implements a small subset of full texture API. Known limitations include:
* - Only point sampling is supported.
* - Only C++ APIs are provided.
* - Many APIs and modes are not implemented.
* - Many APIs and modes are not implemented.
*
* The HIP texture support is intended to allow use of texture cache on hardware where this is beneficial.
*
* The following CUDA APIs are not currently supported:
* - cudaBindTexture2D
* - cudaBindTexture2D
* - cudaBindTextureToArray
* - cudaBindTextureToMipmappedArray
* - cudaGetChannelDesc
* - cudaBindTextureToMipmappedArray
* - cudaGetChannelDesc
* - cudaGetTextureReference
*
*/
// C API:
#if 0
hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX)
{
tex->_dataPtr = devPtr;
}
@@ -116,7 +116,7 @@ hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *te
**/
// TODO
template <class T>
hipChannelFormatDesc hipCreateChannelDesc()
hipChannelFormatDesc hipCreateChannelDesc()
{
hipChannelFormatDesc desc;
return desc;
@@ -127,11 +127,11 @@ hipChannelFormatDesc hipCreateChannelDesc()
**/
// TODO-doc
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> &tex,
const void *devPtr,
const struct hipChannelFormatDesc *desc,
size_t size=UINT_MAX)
hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> &tex,
const void *devPtr,
const struct hipChannelFormatDesc *desc,
size_t size=UINT_MAX)
{
tex._dataPtr = static_cast<const T*>(devPtr);
@@ -144,10 +144,10 @@ hipError_t hipBindTexture(size_t *offset,
**/
// TODO-doc
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> &tex,
const void *devPtr,
size_t size=UINT_MAX)
hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> &tex,
const void *devPtr,
size_t size=UINT_MAX)
{
return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size);
}
@@ -169,7 +169,7 @@ hipError_t hipUnbindTexture(struct texture<T, dim, readMode> *tex)
// doxygen end Texture
/**
* @}
* @}
*/
+6
View File
@@ -19,6 +19,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file hcc_detail/hip_vector_types.h
* @brief Defines the different newt vector types for HIP runtime.
*/
#if defined (__HCC__) && (__hcc_workweek__ < 16032)
#error("This version of HIP requires a newer version of HCC.");
#endif
+15 -9
View File
@@ -19,6 +19,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file hcc_detail/host_defines.h
* @brief TODO-doc
*/
#ifdef __HCC__
/**
* Function and kernel markers
@@ -29,7 +35,7 @@ THE SOFTWARE.
#ifndef DISABLE_GRID_LAUNCH
#define __global__ __attribute__((hc_grid_launch))
#else
#define __global__
#define __global__
#endif
#define __noinline__ __attribute__((noinline))
@@ -44,20 +50,20 @@ THE SOFTWARE.
#define __shared__ tile_static
#define __constant__ __attribute__((address_space(2)))
#else
#else
// Non-HCC compiler
/**
* Function and kernel markers
*/
#define __host__
#define __device__
#define __host__
#define __device__
#define __global__
#define __global__
#define __noinline__
#define __forceinline__
#define __noinline__
#define __forceinline__
#define __shared__
#define __constant__
#define __shared__
#define __constant__
#endif
+14 -14
View File
@@ -22,7 +22,7 @@ THE SOFTWARE.
/**
* @file hip_runtime_api.h
*
* Defines the API signatures for HIP runtime.
* @brief Defines the API signatures for HIP runtime.
* This file can be compiled with a standard compiler.
*/
@@ -48,8 +48,8 @@ typedef struct {
unsigned hasDoubles : 1; ///< double-precision floating point.
// Warp cross-lane operations:
unsigned hasWarpVote : 1; ///< warp vote instructions (__any, __all)
unsigned hasWarpBallot : 1; ///< warp ballot instructions (__ballot)
unsigned hasWarpVote : 1; ///< warp vote instructions (__any, __all)
unsigned hasWarpBallot : 1; ///< warp ballot instructions (__ballot)
unsigned hasWarpShuffle : 1; ///< warp shuffle operations. (__shfl_*)
unsigned hasFunnelShift : 1; ///< funnel two words into one, with shift&mask caps
@@ -60,7 +60,7 @@ typedef struct {
// Misc
unsigned hasSurfaceFuncs : 1; ///< Surface functions
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D)
unsigned hasDynamicParallelism : 1; ///< Dynamic parallellism
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism
} hipDeviceArch_t;
@@ -68,16 +68,16 @@ typedef struct {
// Common headers for both NVCC and HCC paths:
/**
* hipDeviceProp
* hipDeviceProp
*
*/
typedef struct hipDeviceProp_t {
char name[256]; ///< Device name
size_t totalGlobalMem; ///< Size of global memory region (in bytes)
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes)
int regsPerBlock ; ///< registers per block
int warpSize ; ///< warp size
int maxThreadsPerBlock; ///< max work items per work group or workgroup max size
int regsPerBlock ; ///< registers per block
int warpSize ; ///< warp size
int maxThreadsPerBlock; ///< max work items per work group or workgroup max size
int maxThreadsDim[3]; ///< max number of threads in each dimension (XYZ) of a block
int maxGridSize[3]; ///< max grid dimensions (XYZ)
int clockRate ; ///< max clock frequency of the multiProcessors, in khz.
@@ -89,7 +89,7 @@ typedef struct hipDeviceProp_t {
int l2CacheSize; ///< L2 cache size
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor
int computeMode; ///< Compute mode
int clockInstructionRate ; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
@@ -97,10 +97,10 @@ typedef struct hipDeviceProp_t {
// hack to get these to show up in Doxygen:
/**
/**
* @defgroup GlobalDefs Global enum and defines
* @{
*
*
*/
@@ -115,10 +115,10 @@ typedef enum hipError_t {
,hipErrorMemoryFree ///< Memory free error.
,hipErrorUnknownSymbol ///< Unknown symbol
,hipErrorOutOfResources ///< Out of resources error
,hipErrorInvalidValue ///< One or more of the paramters passed to the API call is NULL or not in an acceptable range.
,hipErrorInvalidValue ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range.
,hipErrorInvalidResourceHandle ///< Resource handle (hipEvent_t or hipStream_t) invalid.
,hipErrorInvalidDevice ///< DeviceID must be in range 0...#compute-devices.
,hipErrorNoDevice ///< Call to cudaGetDeviceCount returned 0 devices
,hipErrorNoDevice ///< Call to hipGetDeviceCount returned 0 devices
,hipErrorNotReady ///< indicates that asynchronous operations enqueued earlier are not ready. This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion). APIs that return this error include hipEventQuery and hipStreamQuery.
,hipErrorUnknown ///< Unknown error
@@ -159,7 +159,7 @@ typedef enum hipDeviceAttribute_t {
#include "hcc_detail/hip_runtime_api.h"
#elif defined(__HIP_PLATFORM_NVCC__) and not defined (__HIP_PLATFORM_HCC__)
#include "nvcc_detail/hip_runtime_api.h"
#else
#else
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
#endif