diff --git a/CHANGELOG.md b/CHANGELOG.md index 088571ad0c..c780e7c57d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs - `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream - `hipStreamGetAttribute` returns attributes such as priority for a given stream - `hipModuleLoadFatBinary` loads fatbin binary to a module + - `hipMemcpyBatchAsync` Performs a batch of 1D or 2D memory copied asynchronously + - `hipMemcpy3DBatchAsync` Performs a batch of 3D memory copied asynchronously + - `hipMemcpy3DPeer` Copies memory between devices + - `hipMemcpy3DPeerAsync`Copied memory between devices asynchronously ## HIP 7.0 for ROCm 7.0 diff --git a/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/hipamd/include/hip/amd_detail/hip_api_trace.hpp index 2df94bf9f0..ca3eac3355 100644 --- a/hipamd/include/hip/amd_detail/hip_api_trace.hpp +++ b/hipamd/include/hip/amd_detail/hip_api_trace.hpp @@ -1064,6 +1064,14 @@ typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrI typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr, hipStreamAttrValue *value_out); typedef hipError_t (*t_hipModuleLoadFatBinary)(hipModule_t* module, const void* fatbin); +typedef hipError_t (*t_hipMemcpyBatchAsync) (void **dsts, void **srcs, size_t *sizes, size_t count, + hipMemcpyAttributes *attrs, size_t *attrsIdxs, + size_t numAttrs, size_t *failIdx, hipStream_t stream); +typedef hipError_t (*t_hipMemcpy3DBatchAsync) (size_t numOps, struct hipMemcpy3DBatchOp *opList, + size_t *failIdx, unsigned long long flags, + hipStream_t stream); +typedef hipError_t (*t_hipMemcpy3DPeer) (hipMemcpy3DPeerParms *p); +typedef hipError_t (*t_hipMemcpy3DPeerAsync) (hipMemcpy3DPeerParms *p, hipStream_t stream); // HIP Compiler dispatch table struct HipCompilerDispatchTable { @@ -1617,6 +1625,10 @@ struct HipDispatchTable { t_hipStreamGetAttribute hipStreamGetAttribute_fn; t_hipStreamSetAttribute hipStreamSetAttribute_fn; t_hipModuleLoadFatBinary hipModuleLoadFatBinary_fn; + t_hipMemcpyBatchAsync hipMemcpyBatchAsync_fn; + t_hipMemcpy3DBatchAsync hipMemcpy3DBatchAsync_fn; + t_hipMemcpy3DPeer hipMemcpy3DPeer_fn; + t_hipMemcpy3DPeerAsync hipMemcpy3DPeerAsync_fn; // HIP_RUNTIME_API_TABLE_STEP_VERSION = 14 // removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams @@ -1659,4 +1671,4 @@ struct HipToolsDispatchTable { // 4) GENERATE COMMENT FOR NEXT STEP VERSION // 5) ADD "DO NOT EDIT ABOVE!" COMMENT // ******************************************************************************************* // -}; +}; \ No newline at end of file diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 38cd9657e4..102848d2e5 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -448,7 +448,11 @@ enum hip_api_id_t { HIP_API_ID_hipStreamGetAttribute = 428, HIP_API_ID_hipStreamSetAttribute = 429, HIP_API_ID_hipModuleLoadFatBinary = 430, - HIP_API_ID_LAST = 430, + HIP_API_ID_hipMemcpy3DBatchAsync = 431, + HIP_API_ID_hipMemcpy3DPeer = 432, + HIP_API_ID_hipMemcpy3DPeerAsync = 433, + HIP_API_ID_hipMemcpyBatchAsync = 434, + HIP_API_ID_LAST = 434, HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), @@ -778,11 +782,15 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipMemcpy2DToArrayAsync: return "hipMemcpy2DToArrayAsync"; case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D"; case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync"; + case HIP_API_ID_hipMemcpy3DBatchAsync: return "hipMemcpy3DBatchAsync"; + case HIP_API_ID_hipMemcpy3DPeer: return "hipMemcpy3DPeer"; + case HIP_API_ID_hipMemcpy3DPeerAsync: return "hipMemcpy3DPeerAsync"; case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync"; case HIP_API_ID_hipMemcpyAtoA: return "hipMemcpyAtoA"; case HIP_API_ID_hipMemcpyAtoD: return "hipMemcpyAtoD"; case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH"; case HIP_API_ID_hipMemcpyAtoHAsync: return "hipMemcpyAtoHAsync"; + case HIP_API_ID_hipMemcpyBatchAsync: return "hipMemcpyBatchAsync"; case HIP_API_ID_hipMemcpyDtoA: return "hipMemcpyDtoA"; case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD"; case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync"; @@ -1203,11 +1211,15 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipMemcpy2DToArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DToArrayAsync; if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D; if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync; + if (strcmp("hipMemcpy3DBatchAsync", name) == 0) return HIP_API_ID_hipMemcpy3DBatchAsync; + if (strcmp("hipMemcpy3DPeer", name) == 0) return HIP_API_ID_hipMemcpy3DPeer; + if (strcmp("hipMemcpy3DPeerAsync", name) == 0) return HIP_API_ID_hipMemcpy3DPeerAsync; if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync; if (strcmp("hipMemcpyAtoA", name) == 0) return HIP_API_ID_hipMemcpyAtoA; if (strcmp("hipMemcpyAtoD", name) == 0) return HIP_API_ID_hipMemcpyAtoD; if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH; if (strcmp("hipMemcpyAtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyAtoHAsync; + if (strcmp("hipMemcpyBatchAsync", name) == 0) return HIP_API_ID_hipMemcpyBatchAsync; if (strcmp("hipMemcpyDtoA", name) == 0) return HIP_API_ID_hipMemcpyDtoA; if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD; if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync; @@ -3064,6 +3076,24 @@ typedef struct hip_api_data_s { hipMemcpy3DParms p__val; hipStream_t stream; } hipMemcpy3DAsync; + struct { + size_t numOps; + hipMemcpy3DBatchOp* opList; + hipMemcpy3DBatchOp opList__val; + size_t* failIdx; + size_t failIdx__val; + unsigned long long flags; + hipStream_t stream; + } hipMemcpy3DBatchAsync; + struct { + hipMemcpy3DPeerParms* p; + hipMemcpy3DPeerParms p__val; + } hipMemcpy3DPeer; + struct { + hipMemcpy3DPeerParms* p; + hipMemcpy3DPeerParms p__val; + hipStream_t stream; + } hipMemcpy3DPeerAsync; struct { void* dst; const void* src; @@ -3097,6 +3127,23 @@ typedef struct hip_api_data_s { size_t ByteCount; hipStream_t stream; } hipMemcpyAtoHAsync; + struct { + void** dsts; + void* dsts__val; + void** srcs; + void* srcs__val; + size_t* sizes; + size_t sizes__val; + size_t count; + hipMemcpyAttributes* attrs; + hipMemcpyAttributes attrs__val; + size_t* attrsIdxs; + size_t attrsIdxs__val; + size_t numAttrs; + size_t* failIdx; + size_t failIdx__val; + hipStream_t stream; + } hipMemcpyBatchAsync; struct { hipArray_t dstArray; size_t dstOffset; @@ -5586,6 +5633,23 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \ cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \ }; +// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DBatchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DBatchAsync.numOps = (size_t)numOps; \ + cb_data.args.hipMemcpy3DBatchAsync.opList = (hipMemcpy3DBatchOp*)opList; \ + cb_data.args.hipMemcpy3DBatchAsync.failIdx = (size_t*)failIdx; \ + cb_data.args.hipMemcpy3DBatchAsync.flags = (unsigned long long)flags; \ + cb_data.args.hipMemcpy3DBatchAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')] +#define INIT_hipMemcpy3DPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DPeer.p = (hipMemcpy3DPeerParms*)p; \ +}; +// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DPeerAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DPeerAsync.p = (hipMemcpy3DPeerParms*)p; \ + cb_data.args.hipMemcpy3DPeerAsync.stream = (hipStream_t)stream; \ +}; // hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] #define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipMemcpyAsync.dst = (void*)dst; \ @@ -5624,6 +5688,18 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpyAtoHAsync.ByteCount = (size_t)ByteCount; \ cb_data.args.hipMemcpyAtoHAsync.stream = (hipStream_t)stream; \ }; +// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyBatchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyBatchAsync.dsts = (void**)dsts; \ + cb_data.args.hipMemcpyBatchAsync.srcs = (void**)srcs; \ + cb_data.args.hipMemcpyBatchAsync.sizes = (size_t*)sizes; \ + cb_data.args.hipMemcpyBatchAsync.count = (size_t)count; \ + cb_data.args.hipMemcpyBatchAsync.attrs = (hipMemcpyAttributes*)attrs; \ + cb_data.args.hipMemcpyBatchAsync.attrsIdxs = (size_t*)attrsIdxs; \ + cb_data.args.hipMemcpyBatchAsync.numAttrs = (size_t)numAttrs; \ + cb_data.args.hipMemcpyBatchAsync.failIdx = (size_t*)failIdx; \ + cb_data.args.hipMemcpyBatchAsync.stream = (hipStream_t)stream; \ +}; // hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')] #define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipMemcpyDtoA.dstArray = (hipArray_t)dstArray; \ @@ -7627,6 +7703,19 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { case HIP_API_ID_hipMemcpy3DAsync: if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p); break; +// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DBatchAsync: + if (data->args.hipMemcpy3DBatchAsync.opList) data->args.hipMemcpy3DBatchAsync.opList__val = *(data->args.hipMemcpy3DBatchAsync.opList); + if (data->args.hipMemcpy3DBatchAsync.failIdx) data->args.hipMemcpy3DBatchAsync.failIdx__val = *(data->args.hipMemcpy3DBatchAsync.failIdx); + break; +// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')] + case HIP_API_ID_hipMemcpy3DPeer: + if (data->args.hipMemcpy3DPeer.p) data->args.hipMemcpy3DPeer.p__val = *(data->args.hipMemcpy3DPeer.p); + break; +// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DPeerAsync: + if (data->args.hipMemcpy3DPeerAsync.p) data->args.hipMemcpy3DPeerAsync.p__val = *(data->args.hipMemcpy3DPeerAsync.p); + break; // hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpyAsync: break; @@ -7642,6 +7731,15 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipMemcpyAtoHAsync[('void*', 'dstHost'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpyAtoHAsync: break; +// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyBatchAsync: + if (data->args.hipMemcpyBatchAsync.dsts) data->args.hipMemcpyBatchAsync.dsts__val = *(data->args.hipMemcpyBatchAsync.dsts); + if (data->args.hipMemcpyBatchAsync.srcs) data->args.hipMemcpyBatchAsync.srcs__val = *(data->args.hipMemcpyBatchAsync.srcs); + if (data->args.hipMemcpyBatchAsync.sizes) data->args.hipMemcpyBatchAsync.sizes__val = *(data->args.hipMemcpyBatchAsync.sizes); + if (data->args.hipMemcpyBatchAsync.attrs) data->args.hipMemcpyBatchAsync.attrs__val = *(data->args.hipMemcpyBatchAsync.attrs); + if (data->args.hipMemcpyBatchAsync.attrsIdxs) data->args.hipMemcpyBatchAsync.attrsIdxs__val = *(data->args.hipMemcpyBatchAsync.attrsIdxs); + if (data->args.hipMemcpyBatchAsync.failIdx) data->args.hipMemcpyBatchAsync.failIdx__val = *(data->args.hipMemcpyBatchAsync.failIdx); + break; // hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')] case HIP_API_ID_hipMemcpyDtoA: break; @@ -10436,6 +10534,30 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.stream); oss << ")"; break; + case HIP_API_ID_hipMemcpy3DBatchAsync: + oss << "hipMemcpy3DBatchAsync("; + oss << "numOps="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.numOps); + if (data->args.hipMemcpy3DBatchAsync.opList == NULL) oss << ", opList=NULL"; + else { oss << ", opList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.opList__val); } + if (data->args.hipMemcpy3DBatchAsync.failIdx == NULL) oss << ", failIdx=NULL"; + else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.failIdx__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.flags); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DPeer: + oss << "hipMemcpy3DPeer("; + if (data->args.hipMemcpy3DPeer.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeer.p__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DPeerAsync: + oss << "hipMemcpy3DPeerAsync("; + if (data->args.hipMemcpy3DPeerAsync.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.p__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.stream); + oss << ")"; + break; case HIP_API_ID_hipMemcpyAsync: oss << "hipMemcpyAsync("; oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.dst); @@ -10479,6 +10601,25 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.stream); oss << ")"; break; + case HIP_API_ID_hipMemcpyBatchAsync: + oss << "hipMemcpyBatchAsync("; + if (data->args.hipMemcpyBatchAsync.dsts == NULL) oss << "dsts=NULL"; + else { oss << "dsts="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.dsts__val); } + if (data->args.hipMemcpyBatchAsync.srcs == NULL) oss << ", srcs=NULL"; + else { oss << ", srcs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.srcs__val); } + if (data->args.hipMemcpyBatchAsync.sizes == NULL) oss << ", sizes=NULL"; + else { oss << ", sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.sizes__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.count); + if (data->args.hipMemcpyBatchAsync.attrs == NULL) oss << ", attrs=NULL"; + else { oss << ", attrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrs__val); } + if (data->args.hipMemcpyBatchAsync.attrsIdxs == NULL) oss << ", attrsIdxs=NULL"; + else { oss << ", attrsIdxs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrsIdxs__val); } + oss << ", numAttrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.numAttrs); + if (data->args.hipMemcpyBatchAsync.failIdx == NULL) oss << ", failIdx=NULL"; + else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.failIdx__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.stream); + oss << ")"; + break; case HIP_API_ID_hipMemcpyDtoA: oss << "hipMemcpyDtoA("; oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.dstArray); diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index 20bf383857..fde675a3da 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -502,3 +502,7 @@ hipMemsetD2D32Async hipStreamGetAttribute hipStreamSetAttribute hipModuleLoadFatBinary +hipMemcpyBatchAsync +hipMemcpy3DBatchAsync +hipMemcpy3DPeer +hipMemcpy3DPeerAsync \ No newline at end of file diff --git a/hipamd/src/hip_api_trace.cpp b/hipamd/src/hip_api_trace.cpp index 910d5b5dd9..08dd982f9a 100644 --- a/hipamd/src/hip_api_trace.cpp +++ b/hipamd/src/hip_api_trace.cpp @@ -848,6 +848,13 @@ hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, hipStreamAttrValue *value); hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, const hipStreamAttrValue *value); +hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count, + hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs, + size_t *failIdx, hipStream_t stream); +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx, + unsigned long long flags, hipStream_t stream); +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p); +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream); } // namespace hip namespace hip { @@ -1373,6 +1380,10 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) { ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async; ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute; ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute; + ptrDispatchTable->hipMemcpyBatchAsync_fn = hip::hipMemcpyBatchAsync; + ptrDispatchTable->hipMemcpy3DBatchAsync_fn = hip::hipMemcpy3DBatchAsync; + ptrDispatchTable->hipMemcpy3DPeer_fn = hip::hipMemcpy3DPeer; + ptrDispatchTable->hipMemcpy3DPeerAsync_fn = hip::hipMemcpy3DPeerAsync; } #if HIP_ROCPROFILER_REGISTER > 0 @@ -2027,13 +2038,17 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483); HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484); HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485); HIP_ENFORCE_ABI(HipDispatchTable, hipModuleLoadFatBinary_fn, 486); +HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpyBatchAsync_fn, 487); +HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DBatchAsync_fn, 488); +HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeer_fn, 489); +HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeerAsync_fn, 490); // if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below // will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.: // // HIP_ENFORCE_ABI(, , 8) // // HIP_ENFORCE_ABI_VERSIONING(
, 9) <- 8 + 1 = 9 -HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 487) +HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 491) static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14, "If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function " diff --git a/hipamd/src/hip_conversions.hpp b/hipamd/src/hip_conversions.hpp index b985f08782..a0c5982f51 100644 --- a/hipamd/src/hip_conversions.hpp +++ b/hipamd/src/hip_conversions.hpp @@ -978,4 +978,77 @@ inline size_t getElementSize(const hipChannelFormatDesc &desc) { return (desc.x / 8) * getNumChannels(desc); } -}; + +inline +hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DBatchOp& desc) { + hipMemcpy3DParms params; + params.extent = desc.extent; + params.kind = hipMemcpyDefault; + + // infer elementSize + size_t elementSize = 1; + if (desc.src.type == hipMemcpyOperandTypeArray) { + elementSize = getElementSize(desc.src.op.array.array); + } else if (desc.dst.type == hipMemcpyOperandTypeArray) { + elementSize = getElementSize(desc.dst.op.array.array); + } + + // source + if (desc.src.type == hipMemcpyOperandTypePointer) { + size_t row = desc.src.op.ptr.rowLength; + size_t height = desc.src.op.ptr.layerHeight; + size_t spitch = (row ? row : desc.extent.width) * elementSize; + size_t swidth = (row ? row : desc.extent.width); + size_t sheight = (height ? height : desc.extent.height); + params.srcPtr = make_hipPitchedPtr( + desc.src.op.ptr.ptr, + spitch, + swidth, + sheight + ); + } else if (desc.src.type == hipMemcpyOperandTypeArray) { + params.srcArray = desc.src.op.array.array; + params.srcPos = make_hipPos( + desc.src.op.array.offset.x, + desc.src.op.array.offset.y, + desc.src.op.array.offset.z + ); + } + // dest + if (desc.dst.type == hipMemcpyOperandTypePointer) { + size_t row = desc.dst.op.ptr.rowLength; + size_t height = desc.dst.op.ptr.layerHeight; + size_t spitch = (row ? row : desc.extent.width) * elementSize; + size_t swidth = (row ? row : desc.extent.width); + size_t sheight = (height ? height : desc.extent.height); + params.dstPtr = make_hipPitchedPtr( + desc.dst.op.ptr.ptr, + spitch, + swidth, + sheight + ); + } else if (desc.dst.type == hipMemcpyOperandTypeArray) { + params.dstArray = desc.dst.op.array.array; + params.dstPos = make_hipPos( + desc.dst.op.array.offset.x, + desc.dst.op.array.offset.y, + desc.dst.op.array.offset.z + ); + } + return params; +} + +inline +hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DPeerParms& desc) { + hipMemcpy3DParms params; + params.srcArray = desc.srcArray; + params.srcPos = desc.srcPos; + params.srcPtr = desc.srcPtr; + params.dstArray = desc.dstArray; + params.dstPos = desc.dstPos; + params.dstPtr = desc.dstPtr; + params.extent = desc.extent; + params.kind = hipMemcpyDeviceToDevice; + return params; +} +}; \ No newline at end of file diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index 1bc44391c4..06ec368c6c 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -618,6 +618,10 @@ global: hipStreamGetAttribute; hipStreamSetAttribute; hipModuleLoadFatBinary; + hipMemcpyBatchAsync; + hipMemcpy3DBatchAsync; + hipMemcpy3DPeer; + hipMemcpy3DPeerAsync; local: *; } hip_6.5; \ No newline at end of file diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp index c218f27a9a..f21e761bed 100644 --- a/hipamd/src/hip_internal.hpp +++ b/hipamd/src/hip_internal.hpp @@ -680,6 +680,8 @@ public: size_t sizeBytes); hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hip::Stream& stream, bool isHostAsync = false, bool isGPUAsync = true); + hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream = nullptr, + bool isAsync = false); constexpr bool kOptionChangeable = true; constexpr bool kNewDevProg = false; diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index ad6f1c0821..d316082f5c 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -2759,7 +2759,7 @@ hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p return ihipGetMemcpyParam3DCommand(command, &desc, stream); } -hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { +hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) { hipError_t status = ihipMemcpy3D_validate(p); if (status != hipSuccess) { return status; @@ -2808,10 +2808,72 @@ hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream); - HIP_RETURN_DURATION(ihipMemcpyParam3D(pCopy, stream, true)); } +hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count, + hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs, + size_t *failIdx, hipStream_t stream) { + HIP_INIT_API(hipMemcpyBatchAsync, dsts, srcs, sizes, count, attrs, attrsIdxs, numAttrs, failIdx, + stream); + // validate stream + if(!hip::isValid(stream)) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + // validate inputs + if (dsts == nullptr || srcs == nullptr || sizes == nullptr || failIdx == nullptr || + count == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + // no support for memcpy attributes + if (numAttrs > 0) { + HIP_RETURN(hipErrorNotSupported); + } + + hipError_t status = hipSuccess; + + *failIdx = SIZE_MAX; + for (int i = 0; i < count; ++i) { + if (sizes[i] == 0) { + *failIdx = i; + status = hipErrorInvalidValue; + break; + } + status = ihipMemcpy(dsts[i], srcs[i], sizes[i], hipMemcpyDefault, *hip::getStream(stream), + true, true); + if (status != hipSuccess) { + *failIdx = i; + break; + } + } + HIP_RETURN(status); +} + +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx, + unsigned long long flags, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DBatchAsync, numOps, opList, failIdx, flags, stream); + if (flags != 0 || opList == nullptr || numOps == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + hipError_t status = hipSuccess; + + *failIdx = SIZE_MAX; + for (int i = 0; i < numOps; ++i) { + hipMemcpy3DParms parms = getMemcpy3DParms(opList[i]); + status = ihipMemcpy3D(&parms, stream, true); + if (status != hipSuccess) { + *failIdx = i; + break; + } + } + HIP_RETURN(status); +} + hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset, int64_t value, size_t valueSize, size_t sizeBytes, hip::Stream* stream) { diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index 6eb2ae0b59..b642977216 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -21,6 +21,8 @@ #include #include "hip_internal.hpp" +#include "hip_conversions.hpp" + namespace hip { @@ -217,6 +219,39 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice, *hip_stream, true, true)); } +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) { + HIP_INIT_API(hipMemcpy3DPeer, p); + if (p == NULL) { + HIP_RETURN(hipErrorInvalidValue); + } + if (p->srcDevice >= static_cast(g_devices.size()) || + p->dstDevice >= static_cast(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + hipMemcpy3DParms copyParms = getMemcpy3DParms(*p); + HIP_RETURN(ihipMemcpy3D(©Parms, nullptr)); +} + +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DPeerAsync, p, stream); + if (p == NULL) { + HIP_RETURN(hipErrorInvalidValue); + } + if (p->srcDevice >= static_cast(g_devices.size()) || + p->dstDevice >= static_cast(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + + getStreamPerThread(stream); + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + + hipMemcpy3DParms copyParms = getMemcpy3DParms(*p); + HIP_RETURN(ihipMemcpy3D(©Parms), stream, true); +} + hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); diff --git a/hipamd/src/hip_table_interface.cpp b/hipamd/src/hip_table_interface.cpp index 8ec4e2a382..f3616a18ee 100644 --- a/hipamd/src/hip_table_interface.cpp +++ b/hipamd/src/hip_table_interface.cpp @@ -1926,4 +1926,21 @@ hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, hipStreamAttrValue *value) { return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value); +} +hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count, + hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs, + size_t *failIdx, hipStream_t stream) { + return hip::GetHipDispatchTable()->hipMemcpyBatchAsync_fn(dsts, srcs, sizes, count, attrs, + attrsIdxs, numAttrs, failIdx, stream); +} +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx, + unsigned long long flags, hipStream_t stream) { + return hip::GetHipDispatchTable()->hipMemcpy3DBatchAsync_fn(numOps, opList, failIdx, flags, + stream); +} +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) { + return hip::GetHipDispatchTable()->hipMemcpy3DPeer_fn(p); +} +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) { + return hip::GetHipDispatchTable()->hipMemcpy3DPeerAsync_fn(p, stream); } \ No newline at end of file