diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index 186c357f03..33a05e51a4 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -621,7 +621,7 @@ enum hipLimit_t { /** * Host memory allocation will follow numa policy set by user. -* @note This numa allocation falg is applicable on Linux, under development on Windows. +* @note This numa allocation flag is applicable on Linux, under development on Windows. */ #define hipHostMallocNumaUser 0x20000000 @@ -2837,13 +2837,30 @@ hipError_t hipMallocHost(void** ptr, size_t size); DEPRECATED("use hipHostMalloc instead") hipError_t hipMemAllocHost(void** ptr, size_t size); /** - * @brief Allocate device accessible page locked host memory + * @brief Allocates device accessible page locked (pinned) host memory * + * This API allocates pinned host memory which is mapped into the address space of all GPUs + * in the system, the memory can be accessed directly by the GPU device, and can be read or + * written with much higher bandwidth than pageable memory obtained with functions such as + * malloc(). + * + * Using the pinned host memory, applications can implement faster data transfers for HostToDevice + * and DeviceToHost. The runtime tracks the hipHostMalloc allocations and can avoid some of the + * setup required for regular unpinned memory. + * + * When the memory accesses are infrequent, zero-copy memory can be a good choice, for coherent + * allocation. GPU can directly access the host memory over the CPU/GPU interconnect, without need + * to copy the data. + * + * Currently the allocation granularity is 4KB for the API. + * + * Developers need to choose proper allocation flag with consideration of synchronization. + * * @param[out] ptr Pointer to the allocated host pinned memory * @param[in] size Requested memory size in bytes + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. * @param[in] flags Type of host memory allocation * - * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. * If no input for flags, it will be the default pinned memory allocation on the host. * * @return #hipSuccess, #hipErrorOutOfMemory @@ -2858,16 +2875,25 @@ hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); * * @ingroup Memory * @{ - * This section describes the managed memory management functions of HIP runtime API. + * This section describes the managed memory management functions of HIP runtime API. * - * @note The managed memory management APIs are implemented on Linux, under developement on Windows. + * @note The managed memory management APIs are implemented on Linux, under developement + * on Windows. * */ /** * @brief Allocates memory that will be automatically managed by HIP. * + * This API is used for managed memory, allows data be shared and accessible to both the CPU and + * GPU using a single pointer. + * + * The API returns the allocation pointer, managed by HMM, can be used further to execute kernels + * on device and fetch data between the host and device as needed. + * + * @note It is recommend to do the capability check before call this API. + * * @param [out] dev_ptr - pointer to allocated device memory - * @param [in] size - requested allocation size in bytes + * @param [in] size - requested allocation size in bytes, it should be granularity of 4KB * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost * (defaults to hipMemAttachGlobal) * @@ -2897,7 +2923,7 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, * @brief Advise about the usage of a given memory range to HIP. * * @param [in] dev_ptr pointer to memory to set the advice for - * @param [in] count size in bytes of the memory range + * @param [in] count size in bytes of the memory range, it should be 4KB alligned. * @param [in] advice advice to be applied for the specified memory range * @param [in] device device to apply the advice for * @@ -3006,12 +3032,12 @@ hipError_t hipStreamAttachMemAsync(hipStream_t stream, * The allocation comes from the memory pool associated with the stream's device. * * @note The default memory pool of a device contains device memory from that device. - * @note Basic stream ordering allows future work submitted into the same stream to use the allocation. - * Stream query, stream synchronize, and HIP events can be used to guarantee that the allocation - * operation completes before work submitted in a separate stream runs. - * @note During stream capture, this function results in the creation of an allocation node. In this case, - * the allocation is owned by the graph instead of the memory pool. The memory pool's properties - * are used to set the node's creation parameters. + * @note Basic stream ordering allows future work submitted into the same stream to use the + * allocation. Stream query, stream synchronize, and HIP events can be used to guarantee that + * the allocation operation completes before work submitted in a separate stream runs. + * @note During stream capture, this function results in the creation of an allocation node. + * In this case, the allocation is owned by the graph instead of the memory pool. The memory + * pool's properties are used to set the node's creation parameters. * * @param [out] dev_ptr Returned device pointer of memory allocation * @param [in] size Number of bytes to allocate @@ -3410,7 +3436,7 @@ hipError_t hipMemPoolImportPointer( * * @return #hipSuccess, #hipErrorOutOfMemory * - * @deprecated use hipHostMalloc() instead + * @warning This API is deprecated, use hipHostMalloc() instead */ DEPRECATED("use hipHostMalloc instead") hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); @@ -4572,8 +4598,8 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int s /** * @brief Create a context and set it as current/default context * - * @param [out] ctx Context to create - * @param [in] flags Context creation flags + * @param [out] ctx Context to create + * @param [in] flags Context creation flags * @param [in] device device handle * * @return #hipSuccess