SWDEV-546295 - Add new APIs hipMemcpy3DBatchAsync and hipMemcpy3DPeer (#818)

Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Этот коммит содержится в:
Manocha, Rahul
2025-08-15 13:18:29 -07:00
коммит произвёл GitHub
родитель 1385d159ba
Коммит e3cf50c96d
11 изменённых файлов: 375 добавлений и 6 удалений
+4
Просмотреть файл
@@ -17,6 +17,10 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
- `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream
- `hipStreamGetAttribute` returns attributes such as priority for a given stream
- `hipModuleLoadFatBinary` loads fatbin binary to a module
- `hipMemcpyBatchAsync` Performs a batch of 1D or 2D memory copied asynchronously
- `hipMemcpy3DBatchAsync` Performs a batch of 3D memory copied asynchronously
- `hipMemcpy3DPeer` Copies memory between devices
- `hipMemcpy3DPeerAsync`Copied memory between devices asynchronously
## HIP 7.0 for ROCm 7.0
+13 -1
Просмотреть файл
@@ -1064,6 +1064,14 @@ typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrI
typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value_out);
typedef hipError_t (*t_hipModuleLoadFatBinary)(hipModule_t* module, const void* fatbin);
typedef hipError_t (*t_hipMemcpyBatchAsync) (void **dsts, void **srcs, size_t *sizes, size_t count,
hipMemcpyAttributes *attrs, size_t *attrsIdxs,
size_t numAttrs, size_t *failIdx, hipStream_t stream);
typedef hipError_t (*t_hipMemcpy3DBatchAsync) (size_t numOps, struct hipMemcpy3DBatchOp *opList,
size_t *failIdx, unsigned long long flags,
hipStream_t stream);
typedef hipError_t (*t_hipMemcpy3DPeer) (hipMemcpy3DPeerParms *p);
typedef hipError_t (*t_hipMemcpy3DPeerAsync) (hipMemcpy3DPeerParms *p, hipStream_t stream);
// HIP Compiler dispatch table
struct HipCompilerDispatchTable {
@@ -1617,6 +1625,10 @@ struct HipDispatchTable {
t_hipStreamGetAttribute hipStreamGetAttribute_fn;
t_hipStreamSetAttribute hipStreamSetAttribute_fn;
t_hipModuleLoadFatBinary hipModuleLoadFatBinary_fn;
t_hipMemcpyBatchAsync hipMemcpyBatchAsync_fn;
t_hipMemcpy3DBatchAsync hipMemcpy3DBatchAsync_fn;
t_hipMemcpy3DPeer hipMemcpy3DPeer_fn;
t_hipMemcpy3DPeerAsync hipMemcpy3DPeerAsync_fn;
// HIP_RUNTIME_API_TABLE_STEP_VERSION = 14
// removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams
@@ -1659,4 +1671,4 @@ struct HipToolsDispatchTable {
// 4) GENERATE COMMENT FOR NEXT STEP VERSION
// 5) ADD "DO NOT EDIT ABOVE!" COMMENT
// ******************************************************************************************* //
};
};
+142 -1
Просмотреть файл
@@ -448,7 +448,11 @@ enum hip_api_id_t {
HIP_API_ID_hipStreamGetAttribute = 428,
HIP_API_ID_hipStreamSetAttribute = 429,
HIP_API_ID_hipModuleLoadFatBinary = 430,
HIP_API_ID_LAST = 430,
HIP_API_ID_hipMemcpy3DBatchAsync = 431,
HIP_API_ID_hipMemcpy3DPeer = 432,
HIP_API_ID_hipMemcpy3DPeerAsync = 433,
HIP_API_ID_hipMemcpyBatchAsync = 434,
HIP_API_ID_LAST = 434,
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -778,11 +782,15 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipMemcpy2DToArrayAsync: return "hipMemcpy2DToArrayAsync";
case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D";
case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync";
case HIP_API_ID_hipMemcpy3DBatchAsync: return "hipMemcpy3DBatchAsync";
case HIP_API_ID_hipMemcpy3DPeer: return "hipMemcpy3DPeer";
case HIP_API_ID_hipMemcpy3DPeerAsync: return "hipMemcpy3DPeerAsync";
case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync";
case HIP_API_ID_hipMemcpyAtoA: return "hipMemcpyAtoA";
case HIP_API_ID_hipMemcpyAtoD: return "hipMemcpyAtoD";
case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH";
case HIP_API_ID_hipMemcpyAtoHAsync: return "hipMemcpyAtoHAsync";
case HIP_API_ID_hipMemcpyBatchAsync: return "hipMemcpyBatchAsync";
case HIP_API_ID_hipMemcpyDtoA: return "hipMemcpyDtoA";
case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD";
case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync";
@@ -1203,11 +1211,15 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipMemcpy2DToArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DToArrayAsync;
if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D;
if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync;
if (strcmp("hipMemcpy3DBatchAsync", name) == 0) return HIP_API_ID_hipMemcpy3DBatchAsync;
if (strcmp("hipMemcpy3DPeer", name) == 0) return HIP_API_ID_hipMemcpy3DPeer;
if (strcmp("hipMemcpy3DPeerAsync", name) == 0) return HIP_API_ID_hipMemcpy3DPeerAsync;
if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync;
if (strcmp("hipMemcpyAtoA", name) == 0) return HIP_API_ID_hipMemcpyAtoA;
if (strcmp("hipMemcpyAtoD", name) == 0) return HIP_API_ID_hipMemcpyAtoD;
if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH;
if (strcmp("hipMemcpyAtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyAtoHAsync;
if (strcmp("hipMemcpyBatchAsync", name) == 0) return HIP_API_ID_hipMemcpyBatchAsync;
if (strcmp("hipMemcpyDtoA", name) == 0) return HIP_API_ID_hipMemcpyDtoA;
if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD;
if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync;
@@ -3064,6 +3076,24 @@ typedef struct hip_api_data_s {
hipMemcpy3DParms p__val;
hipStream_t stream;
} hipMemcpy3DAsync;
struct {
size_t numOps;
hipMemcpy3DBatchOp* opList;
hipMemcpy3DBatchOp opList__val;
size_t* failIdx;
size_t failIdx__val;
unsigned long long flags;
hipStream_t stream;
} hipMemcpy3DBatchAsync;
struct {
hipMemcpy3DPeerParms* p;
hipMemcpy3DPeerParms p__val;
} hipMemcpy3DPeer;
struct {
hipMemcpy3DPeerParms* p;
hipMemcpy3DPeerParms p__val;
hipStream_t stream;
} hipMemcpy3DPeerAsync;
struct {
void* dst;
const void* src;
@@ -3097,6 +3127,23 @@ typedef struct hip_api_data_s {
size_t ByteCount;
hipStream_t stream;
} hipMemcpyAtoHAsync;
struct {
void** dsts;
void* dsts__val;
void** srcs;
void* srcs__val;
size_t* sizes;
size_t sizes__val;
size_t count;
hipMemcpyAttributes* attrs;
hipMemcpyAttributes attrs__val;
size_t* attrsIdxs;
size_t attrsIdxs__val;
size_t numAttrs;
size_t* failIdx;
size_t failIdx__val;
hipStream_t stream;
} hipMemcpyBatchAsync;
struct {
hipArray_t dstArray;
size_t dstOffset;
@@ -5586,6 +5633,23 @@ typedef struct hip_api_data_s {
cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \
cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \
};
// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
#define INIT_hipMemcpy3DBatchAsync_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpy3DBatchAsync.numOps = (size_t)numOps; \
cb_data.args.hipMemcpy3DBatchAsync.opList = (hipMemcpy3DBatchOp*)opList; \
cb_data.args.hipMemcpy3DBatchAsync.failIdx = (size_t*)failIdx; \
cb_data.args.hipMemcpy3DBatchAsync.flags = (unsigned long long)flags; \
cb_data.args.hipMemcpy3DBatchAsync.stream = (hipStream_t)stream; \
};
// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')]
#define INIT_hipMemcpy3DPeer_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpy3DPeer.p = (hipMemcpy3DPeerParms*)p; \
};
// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')]
#define INIT_hipMemcpy3DPeerAsync_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpy3DPeerAsync.p = (hipMemcpy3DPeerParms*)p; \
cb_data.args.hipMemcpy3DPeerAsync.stream = (hipStream_t)stream; \
};
// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpyAsync.dst = (void*)dst; \
@@ -5624,6 +5688,18 @@ typedef struct hip_api_data_s {
cb_data.args.hipMemcpyAtoHAsync.ByteCount = (size_t)ByteCount; \
cb_data.args.hipMemcpyAtoHAsync.stream = (hipStream_t)stream; \
};
// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')]
#define INIT_hipMemcpyBatchAsync_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpyBatchAsync.dsts = (void**)dsts; \
cb_data.args.hipMemcpyBatchAsync.srcs = (void**)srcs; \
cb_data.args.hipMemcpyBatchAsync.sizes = (size_t*)sizes; \
cb_data.args.hipMemcpyBatchAsync.count = (size_t)count; \
cb_data.args.hipMemcpyBatchAsync.attrs = (hipMemcpyAttributes*)attrs; \
cb_data.args.hipMemcpyBatchAsync.attrsIdxs = (size_t*)attrsIdxs; \
cb_data.args.hipMemcpyBatchAsync.numAttrs = (size_t)numAttrs; \
cb_data.args.hipMemcpyBatchAsync.failIdx = (size_t*)failIdx; \
cb_data.args.hipMemcpyBatchAsync.stream = (hipStream_t)stream; \
};
// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')]
#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipMemcpyDtoA.dstArray = (hipArray_t)dstArray; \
@@ -7627,6 +7703,19 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
case HIP_API_ID_hipMemcpy3DAsync:
if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p);
break;
// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemcpy3DBatchAsync:
if (data->args.hipMemcpy3DBatchAsync.opList) data->args.hipMemcpy3DBatchAsync.opList__val = *(data->args.hipMemcpy3DBatchAsync.opList);
if (data->args.hipMemcpy3DBatchAsync.failIdx) data->args.hipMemcpy3DBatchAsync.failIdx__val = *(data->args.hipMemcpy3DBatchAsync.failIdx);
break;
// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')]
case HIP_API_ID_hipMemcpy3DPeer:
if (data->args.hipMemcpy3DPeer.p) data->args.hipMemcpy3DPeer.p__val = *(data->args.hipMemcpy3DPeer.p);
break;
// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemcpy3DPeerAsync:
if (data->args.hipMemcpy3DPeerAsync.p) data->args.hipMemcpy3DPeerAsync.p__val = *(data->args.hipMemcpy3DPeerAsync.p);
break;
// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemcpyAsync:
break;
@@ -7642,6 +7731,15 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
// hipMemcpyAtoHAsync[('void*', 'dstHost'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemcpyAtoHAsync:
break;
// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')]
case HIP_API_ID_hipMemcpyBatchAsync:
if (data->args.hipMemcpyBatchAsync.dsts) data->args.hipMemcpyBatchAsync.dsts__val = *(data->args.hipMemcpyBatchAsync.dsts);
if (data->args.hipMemcpyBatchAsync.srcs) data->args.hipMemcpyBatchAsync.srcs__val = *(data->args.hipMemcpyBatchAsync.srcs);
if (data->args.hipMemcpyBatchAsync.sizes) data->args.hipMemcpyBatchAsync.sizes__val = *(data->args.hipMemcpyBatchAsync.sizes);
if (data->args.hipMemcpyBatchAsync.attrs) data->args.hipMemcpyBatchAsync.attrs__val = *(data->args.hipMemcpyBatchAsync.attrs);
if (data->args.hipMemcpyBatchAsync.attrsIdxs) data->args.hipMemcpyBatchAsync.attrsIdxs__val = *(data->args.hipMemcpyBatchAsync.attrsIdxs);
if (data->args.hipMemcpyBatchAsync.failIdx) data->args.hipMemcpyBatchAsync.failIdx__val = *(data->args.hipMemcpyBatchAsync.failIdx);
break;
// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')]
case HIP_API_ID_hipMemcpyDtoA:
break;
@@ -10436,6 +10534,30 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemcpy3DBatchAsync:
oss << "hipMemcpy3DBatchAsync(";
oss << "numOps="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.numOps);
if (data->args.hipMemcpy3DBatchAsync.opList == NULL) oss << ", opList=NULL";
else { oss << ", opList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.opList__val); }
if (data->args.hipMemcpy3DBatchAsync.failIdx == NULL) oss << ", failIdx=NULL";
else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.failIdx__val); }
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.flags);
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemcpy3DPeer:
oss << "hipMemcpy3DPeer(";
if (data->args.hipMemcpy3DPeer.p == NULL) oss << "p=NULL";
else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeer.p__val); }
oss << ")";
break;
case HIP_API_ID_hipMemcpy3DPeerAsync:
oss << "hipMemcpy3DPeerAsync(";
if (data->args.hipMemcpy3DPeerAsync.p == NULL) oss << "p=NULL";
else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.p__val); }
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemcpyAsync:
oss << "hipMemcpyAsync(";
oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.dst);
@@ -10479,6 +10601,25 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemcpyBatchAsync:
oss << "hipMemcpyBatchAsync(";
if (data->args.hipMemcpyBatchAsync.dsts == NULL) oss << "dsts=NULL";
else { oss << "dsts="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.dsts__val); }
if (data->args.hipMemcpyBatchAsync.srcs == NULL) oss << ", srcs=NULL";
else { oss << ", srcs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.srcs__val); }
if (data->args.hipMemcpyBatchAsync.sizes == NULL) oss << ", sizes=NULL";
else { oss << ", sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.sizes__val); }
oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.count);
if (data->args.hipMemcpyBatchAsync.attrs == NULL) oss << ", attrs=NULL";
else { oss << ", attrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrs__val); }
if (data->args.hipMemcpyBatchAsync.attrsIdxs == NULL) oss << ", attrsIdxs=NULL";
else { oss << ", attrsIdxs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrsIdxs__val); }
oss << ", numAttrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.numAttrs);
if (data->args.hipMemcpyBatchAsync.failIdx == NULL) oss << ", failIdx=NULL";
else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.failIdx__val); }
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.stream);
oss << ")";
break;
case HIP_API_ID_hipMemcpyDtoA:
oss << "hipMemcpyDtoA(";
oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.dstArray);
+4
Просмотреть файл
@@ -502,3 +502,7 @@ hipMemsetD2D32Async
hipStreamGetAttribute
hipStreamSetAttribute
hipModuleLoadFatBinary
hipMemcpyBatchAsync
hipMemcpy3DBatchAsync
hipMemcpy3DPeer
hipMemcpy3DPeerAsync
+16 -1
Просмотреть файл
@@ -848,6 +848,13 @@ hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value);
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
const hipStreamAttrValue *value);
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
size_t *failIdx, hipStream_t stream);
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
unsigned long long flags, hipStream_t stream);
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p);
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream);
} // namespace hip
namespace hip {
@@ -1373,6 +1380,10 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async;
ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute;
ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute;
ptrDispatchTable->hipMemcpyBatchAsync_fn = hip::hipMemcpyBatchAsync;
ptrDispatchTable->hipMemcpy3DBatchAsync_fn = hip::hipMemcpy3DBatchAsync;
ptrDispatchTable->hipMemcpy3DPeer_fn = hip::hipMemcpy3DPeer;
ptrDispatchTable->hipMemcpy3DPeerAsync_fn = hip::hipMemcpy3DPeerAsync;
}
#if HIP_ROCPROFILER_REGISTER > 0
@@ -2027,13 +2038,17 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483);
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484);
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485);
HIP_ENFORCE_ABI(HipDispatchTable, hipModuleLoadFatBinary_fn, 486);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpyBatchAsync_fn, 487);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DBatchAsync_fn, 488);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeer_fn, 489);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeerAsync_fn, 490);
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
//
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
//
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 487)
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 491)
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
+74 -1
Просмотреть файл
@@ -978,4 +978,77 @@ inline
size_t getElementSize(const hipChannelFormatDesc &desc) {
return (desc.x / 8) * getNumChannels(desc);
}
};
inline
hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DBatchOp& desc) {
hipMemcpy3DParms params;
params.extent = desc.extent;
params.kind = hipMemcpyDefault;
// infer elementSize
size_t elementSize = 1;
if (desc.src.type == hipMemcpyOperandTypeArray) {
elementSize = getElementSize(desc.src.op.array.array);
} else if (desc.dst.type == hipMemcpyOperandTypeArray) {
elementSize = getElementSize(desc.dst.op.array.array);
}
// source
if (desc.src.type == hipMemcpyOperandTypePointer) {
size_t row = desc.src.op.ptr.rowLength;
size_t height = desc.src.op.ptr.layerHeight;
size_t spitch = (row ? row : desc.extent.width) * elementSize;
size_t swidth = (row ? row : desc.extent.width);
size_t sheight = (height ? height : desc.extent.height);
params.srcPtr = make_hipPitchedPtr(
desc.src.op.ptr.ptr,
spitch,
swidth,
sheight
);
} else if (desc.src.type == hipMemcpyOperandTypeArray) {
params.srcArray = desc.src.op.array.array;
params.srcPos = make_hipPos(
desc.src.op.array.offset.x,
desc.src.op.array.offset.y,
desc.src.op.array.offset.z
);
}
// dest
if (desc.dst.type == hipMemcpyOperandTypePointer) {
size_t row = desc.dst.op.ptr.rowLength;
size_t height = desc.dst.op.ptr.layerHeight;
size_t spitch = (row ? row : desc.extent.width) * elementSize;
size_t swidth = (row ? row : desc.extent.width);
size_t sheight = (height ? height : desc.extent.height);
params.dstPtr = make_hipPitchedPtr(
desc.dst.op.ptr.ptr,
spitch,
swidth,
sheight
);
} else if (desc.dst.type == hipMemcpyOperandTypeArray) {
params.dstArray = desc.dst.op.array.array;
params.dstPos = make_hipPos(
desc.dst.op.array.offset.x,
desc.dst.op.array.offset.y,
desc.dst.op.array.offset.z
);
}
return params;
}
inline
hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DPeerParms& desc) {
hipMemcpy3DParms params;
params.srcArray = desc.srcArray;
params.srcPos = desc.srcPos;
params.srcPtr = desc.srcPtr;
params.dstArray = desc.dstArray;
params.dstPos = desc.dstPos;
params.dstPtr = desc.dstPtr;
params.extent = desc.extent;
params.kind = hipMemcpyDeviceToDevice;
return params;
}
};
+4
Просмотреть файл
@@ -618,6 +618,10 @@ global:
hipStreamGetAttribute;
hipStreamSetAttribute;
hipModuleLoadFatBinary;
hipMemcpyBatchAsync;
hipMemcpy3DBatchAsync;
hipMemcpy3DPeer;
hipMemcpy3DPeerAsync;
local:
*;
} hip_6.5;
+2
Просмотреть файл
@@ -680,6 +680,8 @@ public:
size_t sizeBytes);
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hip::Stream& stream, bool isHostAsync = false, bool isGPUAsync = true);
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream = nullptr,
bool isAsync = false);
constexpr bool kOptionChangeable = true;
constexpr bool kNewDevProg = false;
+64 -2
Просмотреть файл
@@ -2759,7 +2759,7 @@ hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p
return ihipGetMemcpyParam3DCommand(command, &desc, stream);
}
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) {
hipError_t status = ihipMemcpy3D_validate(p);
if (status != hipSuccess) {
return status;
@@ -2808,10 +2808,72 @@ hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) {
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream);
HIP_RETURN_DURATION(ihipMemcpyParam3D(pCopy, stream, true));
}
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
size_t *failIdx, hipStream_t stream) {
HIP_INIT_API(hipMemcpyBatchAsync, dsts, srcs, sizes, count, attrs, attrsIdxs, numAttrs, failIdx,
stream);
// validate stream
if(!hip::isValid(stream)) {
HIP_RETURN(hipErrorInvalidResourceHandle);
}
// validate inputs
if (dsts == nullptr || srcs == nullptr || sizes == nullptr || failIdx == nullptr ||
count == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
// no support for memcpy attributes
if (numAttrs > 0) {
HIP_RETURN(hipErrorNotSupported);
}
hipError_t status = hipSuccess;
*failIdx = SIZE_MAX;
for (int i = 0; i < count; ++i) {
if (sizes[i] == 0) {
*failIdx = i;
status = hipErrorInvalidValue;
break;
}
status = ihipMemcpy(dsts[i], srcs[i], sizes[i], hipMemcpyDefault, *hip::getStream(stream),
true, true);
if (status != hipSuccess) {
*failIdx = i;
break;
}
}
HIP_RETURN(status);
}
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
unsigned long long flags, hipStream_t stream) {
HIP_INIT_API(hipMemcpy3DBatchAsync, numOps, opList, failIdx, flags, stream);
if (flags != 0 || opList == nullptr || numOps == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorInvalidResourceHandle);
}
hipError_t status = hipSuccess;
*failIdx = SIZE_MAX;
for (int i = 0; i < numOps; ++i) {
hipMemcpy3DParms parms = getMemcpy3DParms(opList[i]);
status = ihipMemcpy3D(&parms, stream, true);
if (status != hipSuccess) {
*failIdx = i;
break;
}
}
HIP_RETURN(status);
}
hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset,
int64_t value, size_t valueSize, size_t sizeBytes,
hip::Stream* stream) {
+35
Просмотреть файл
@@ -21,6 +21,8 @@
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include "hip_conversions.hpp"
namespace hip {
@@ -217,6 +219,39 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src
HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice, *hip_stream, true, true));
}
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) {
HIP_INIT_API(hipMemcpy3DPeer, p);
if (p == NULL) {
HIP_RETURN(hipErrorInvalidValue);
}
if (p->srcDevice >= static_cast<int>(g_devices.size()) ||
p->dstDevice >= static_cast<int>(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) {
HIP_RETURN(hipErrorInvalidDevice);
}
hipMemcpy3DParms copyParms = getMemcpy3DParms(*p);
HIP_RETURN(ihipMemcpy3D(&copyParms, nullptr));
}
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) {
HIP_INIT_API(hipMemcpy3DPeerAsync, p, stream);
if (p == NULL) {
HIP_RETURN(hipErrorInvalidValue);
}
if (p->srcDevice >= static_cast<int>(g_devices.size()) ||
p->dstDevice >= static_cast<int>(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) {
HIP_RETURN(hipErrorInvalidDevice);
}
getStreamPerThread(stream);
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipMemcpy3DParms copyParms = getMemcpy3DParms(*p);
HIP_RETURN(ihipMemcpy3D(&copyParms), stream, true);
}
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
+17
Просмотреть файл
@@ -1926,4 +1926,21 @@ hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value) {
return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value);
}
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
size_t *failIdx, hipStream_t stream) {
return hip::GetHipDispatchTable()->hipMemcpyBatchAsync_fn(dsts, srcs, sizes, count, attrs,
attrsIdxs, numAttrs, failIdx, stream);
}
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
unsigned long long flags, hipStream_t stream) {
return hip::GetHipDispatchTable()->hipMemcpy3DBatchAsync_fn(numOps, opList, failIdx, flags,
stream);
}
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) {
return hip::GetHipDispatchTable()->hipMemcpy3DPeer_fn(p);
}
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) {
return hip::GetHipDispatchTable()->hipMemcpy3DPeerAsync_fn(p, stream);
}