SWDEV-546295 - Add new APIs hipMemcpy3DBatchAsync and hipMemcpy3DPeer (#818)
Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
1385d159ba
Коммит
e3cf50c96d
@@ -17,6 +17,10 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
|
||||
- `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream
|
||||
- `hipStreamGetAttribute` returns attributes such as priority for a given stream
|
||||
- `hipModuleLoadFatBinary` loads fatbin binary to a module
|
||||
- `hipMemcpyBatchAsync` Performs a batch of 1D or 2D memory copied asynchronously
|
||||
- `hipMemcpy3DBatchAsync` Performs a batch of 3D memory copied asynchronously
|
||||
- `hipMemcpy3DPeer` Copies memory between devices
|
||||
- `hipMemcpy3DPeerAsync`Copied memory between devices asynchronously
|
||||
|
||||
## HIP 7.0 for ROCm 7.0
|
||||
|
||||
|
||||
@@ -1064,6 +1064,14 @@ typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrI
|
||||
typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value_out);
|
||||
typedef hipError_t (*t_hipModuleLoadFatBinary)(hipModule_t* module, const void* fatbin);
|
||||
typedef hipError_t (*t_hipMemcpyBatchAsync) (void **dsts, void **srcs, size_t *sizes, size_t count,
|
||||
hipMemcpyAttributes *attrs, size_t *attrsIdxs,
|
||||
size_t numAttrs, size_t *failIdx, hipStream_t stream);
|
||||
typedef hipError_t (*t_hipMemcpy3DBatchAsync) (size_t numOps, struct hipMemcpy3DBatchOp *opList,
|
||||
size_t *failIdx, unsigned long long flags,
|
||||
hipStream_t stream);
|
||||
typedef hipError_t (*t_hipMemcpy3DPeer) (hipMemcpy3DPeerParms *p);
|
||||
typedef hipError_t (*t_hipMemcpy3DPeerAsync) (hipMemcpy3DPeerParms *p, hipStream_t stream);
|
||||
|
||||
// HIP Compiler dispatch table
|
||||
struct HipCompilerDispatchTable {
|
||||
@@ -1617,6 +1625,10 @@ struct HipDispatchTable {
|
||||
t_hipStreamGetAttribute hipStreamGetAttribute_fn;
|
||||
t_hipStreamSetAttribute hipStreamSetAttribute_fn;
|
||||
t_hipModuleLoadFatBinary hipModuleLoadFatBinary_fn;
|
||||
t_hipMemcpyBatchAsync hipMemcpyBatchAsync_fn;
|
||||
t_hipMemcpy3DBatchAsync hipMemcpy3DBatchAsync_fn;
|
||||
t_hipMemcpy3DPeer hipMemcpy3DPeer_fn;
|
||||
t_hipMemcpy3DPeerAsync hipMemcpy3DPeerAsync_fn;
|
||||
|
||||
// HIP_RUNTIME_API_TABLE_STEP_VERSION = 14
|
||||
// removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams
|
||||
@@ -1659,4 +1671,4 @@ struct HipToolsDispatchTable {
|
||||
// 4) GENERATE COMMENT FOR NEXT STEP VERSION
|
||||
// 5) ADD "DO NOT EDIT ABOVE!" COMMENT
|
||||
// ******************************************************************************************* //
|
||||
};
|
||||
};
|
||||
@@ -448,7 +448,11 @@ enum hip_api_id_t {
|
||||
HIP_API_ID_hipStreamGetAttribute = 428,
|
||||
HIP_API_ID_hipStreamSetAttribute = 429,
|
||||
HIP_API_ID_hipModuleLoadFatBinary = 430,
|
||||
HIP_API_ID_LAST = 430,
|
||||
HIP_API_ID_hipMemcpy3DBatchAsync = 431,
|
||||
HIP_API_ID_hipMemcpy3DPeer = 432,
|
||||
HIP_API_ID_hipMemcpy3DPeerAsync = 433,
|
||||
HIP_API_ID_hipMemcpyBatchAsync = 434,
|
||||
HIP_API_ID_LAST = 434,
|
||||
|
||||
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
|
||||
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
|
||||
@@ -778,11 +782,15 @@ static inline const char* hip_api_name(const uint32_t id) {
|
||||
case HIP_API_ID_hipMemcpy2DToArrayAsync: return "hipMemcpy2DToArrayAsync";
|
||||
case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D";
|
||||
case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync";
|
||||
case HIP_API_ID_hipMemcpy3DBatchAsync: return "hipMemcpy3DBatchAsync";
|
||||
case HIP_API_ID_hipMemcpy3DPeer: return "hipMemcpy3DPeer";
|
||||
case HIP_API_ID_hipMemcpy3DPeerAsync: return "hipMemcpy3DPeerAsync";
|
||||
case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync";
|
||||
case HIP_API_ID_hipMemcpyAtoA: return "hipMemcpyAtoA";
|
||||
case HIP_API_ID_hipMemcpyAtoD: return "hipMemcpyAtoD";
|
||||
case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH";
|
||||
case HIP_API_ID_hipMemcpyAtoHAsync: return "hipMemcpyAtoHAsync";
|
||||
case HIP_API_ID_hipMemcpyBatchAsync: return "hipMemcpyBatchAsync";
|
||||
case HIP_API_ID_hipMemcpyDtoA: return "hipMemcpyDtoA";
|
||||
case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD";
|
||||
case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync";
|
||||
@@ -1203,11 +1211,15 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
||||
if (strcmp("hipMemcpy2DToArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DToArrayAsync;
|
||||
if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D;
|
||||
if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync;
|
||||
if (strcmp("hipMemcpy3DBatchAsync", name) == 0) return HIP_API_ID_hipMemcpy3DBatchAsync;
|
||||
if (strcmp("hipMemcpy3DPeer", name) == 0) return HIP_API_ID_hipMemcpy3DPeer;
|
||||
if (strcmp("hipMemcpy3DPeerAsync", name) == 0) return HIP_API_ID_hipMemcpy3DPeerAsync;
|
||||
if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync;
|
||||
if (strcmp("hipMemcpyAtoA", name) == 0) return HIP_API_ID_hipMemcpyAtoA;
|
||||
if (strcmp("hipMemcpyAtoD", name) == 0) return HIP_API_ID_hipMemcpyAtoD;
|
||||
if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH;
|
||||
if (strcmp("hipMemcpyAtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyAtoHAsync;
|
||||
if (strcmp("hipMemcpyBatchAsync", name) == 0) return HIP_API_ID_hipMemcpyBatchAsync;
|
||||
if (strcmp("hipMemcpyDtoA", name) == 0) return HIP_API_ID_hipMemcpyDtoA;
|
||||
if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD;
|
||||
if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync;
|
||||
@@ -3064,6 +3076,24 @@ typedef struct hip_api_data_s {
|
||||
hipMemcpy3DParms p__val;
|
||||
hipStream_t stream;
|
||||
} hipMemcpy3DAsync;
|
||||
struct {
|
||||
size_t numOps;
|
||||
hipMemcpy3DBatchOp* opList;
|
||||
hipMemcpy3DBatchOp opList__val;
|
||||
size_t* failIdx;
|
||||
size_t failIdx__val;
|
||||
unsigned long long flags;
|
||||
hipStream_t stream;
|
||||
} hipMemcpy3DBatchAsync;
|
||||
struct {
|
||||
hipMemcpy3DPeerParms* p;
|
||||
hipMemcpy3DPeerParms p__val;
|
||||
} hipMemcpy3DPeer;
|
||||
struct {
|
||||
hipMemcpy3DPeerParms* p;
|
||||
hipMemcpy3DPeerParms p__val;
|
||||
hipStream_t stream;
|
||||
} hipMemcpy3DPeerAsync;
|
||||
struct {
|
||||
void* dst;
|
||||
const void* src;
|
||||
@@ -3097,6 +3127,23 @@ typedef struct hip_api_data_s {
|
||||
size_t ByteCount;
|
||||
hipStream_t stream;
|
||||
} hipMemcpyAtoHAsync;
|
||||
struct {
|
||||
void** dsts;
|
||||
void* dsts__val;
|
||||
void** srcs;
|
||||
void* srcs__val;
|
||||
size_t* sizes;
|
||||
size_t sizes__val;
|
||||
size_t count;
|
||||
hipMemcpyAttributes* attrs;
|
||||
hipMemcpyAttributes attrs__val;
|
||||
size_t* attrsIdxs;
|
||||
size_t attrsIdxs__val;
|
||||
size_t numAttrs;
|
||||
size_t* failIdx;
|
||||
size_t failIdx__val;
|
||||
hipStream_t stream;
|
||||
} hipMemcpyBatchAsync;
|
||||
struct {
|
||||
hipArray_t dstArray;
|
||||
size_t dstOffset;
|
||||
@@ -5586,6 +5633,23 @@ typedef struct hip_api_data_s {
|
||||
cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \
|
||||
cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
|
||||
#define INIT_hipMemcpy3DBatchAsync_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpy3DBatchAsync.numOps = (size_t)numOps; \
|
||||
cb_data.args.hipMemcpy3DBatchAsync.opList = (hipMemcpy3DBatchOp*)opList; \
|
||||
cb_data.args.hipMemcpy3DBatchAsync.failIdx = (size_t*)failIdx; \
|
||||
cb_data.args.hipMemcpy3DBatchAsync.flags = (unsigned long long)flags; \
|
||||
cb_data.args.hipMemcpy3DBatchAsync.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')]
|
||||
#define INIT_hipMemcpy3DPeer_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpy3DPeer.p = (hipMemcpy3DPeerParms*)p; \
|
||||
};
|
||||
// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')]
|
||||
#define INIT_hipMemcpy3DPeerAsync_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpy3DPeerAsync.p = (hipMemcpy3DPeerParms*)p; \
|
||||
cb_data.args.hipMemcpy3DPeerAsync.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
|
||||
#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpyAsync.dst = (void*)dst; \
|
||||
@@ -5624,6 +5688,18 @@ typedef struct hip_api_data_s {
|
||||
cb_data.args.hipMemcpyAtoHAsync.ByteCount = (size_t)ByteCount; \
|
||||
cb_data.args.hipMemcpyAtoHAsync.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')]
|
||||
#define INIT_hipMemcpyBatchAsync_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpyBatchAsync.dsts = (void**)dsts; \
|
||||
cb_data.args.hipMemcpyBatchAsync.srcs = (void**)srcs; \
|
||||
cb_data.args.hipMemcpyBatchAsync.sizes = (size_t*)sizes; \
|
||||
cb_data.args.hipMemcpyBatchAsync.count = (size_t)count; \
|
||||
cb_data.args.hipMemcpyBatchAsync.attrs = (hipMemcpyAttributes*)attrs; \
|
||||
cb_data.args.hipMemcpyBatchAsync.attrsIdxs = (size_t*)attrsIdxs; \
|
||||
cb_data.args.hipMemcpyBatchAsync.numAttrs = (size_t)numAttrs; \
|
||||
cb_data.args.hipMemcpyBatchAsync.failIdx = (size_t*)failIdx; \
|
||||
cb_data.args.hipMemcpyBatchAsync.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')]
|
||||
#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipMemcpyDtoA.dstArray = (hipArray_t)dstArray; \
|
||||
@@ -7627,6 +7703,19 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
case HIP_API_ID_hipMemcpy3DAsync:
|
||||
if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p);
|
||||
break;
|
||||
// hipMemcpy3DBatchAsync[('size_t', 'numOps'), ('hipMemcpy3DBatchOp*', 'opList'), ('size_t*', 'failIdx'), ('unsigned long long', 'flags'), ('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipMemcpy3DBatchAsync:
|
||||
if (data->args.hipMemcpy3DBatchAsync.opList) data->args.hipMemcpy3DBatchAsync.opList__val = *(data->args.hipMemcpy3DBatchAsync.opList);
|
||||
if (data->args.hipMemcpy3DBatchAsync.failIdx) data->args.hipMemcpy3DBatchAsync.failIdx__val = *(data->args.hipMemcpy3DBatchAsync.failIdx);
|
||||
break;
|
||||
// hipMemcpy3DPeer[('hipMemcpy3DPeerParms*', 'p')]
|
||||
case HIP_API_ID_hipMemcpy3DPeer:
|
||||
if (data->args.hipMemcpy3DPeer.p) data->args.hipMemcpy3DPeer.p__val = *(data->args.hipMemcpy3DPeer.p);
|
||||
break;
|
||||
// hipMemcpy3DPeerAsync[('hipMemcpy3DPeerParms*', 'p'), ('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipMemcpy3DPeerAsync:
|
||||
if (data->args.hipMemcpy3DPeerAsync.p) data->args.hipMemcpy3DPeerAsync.p__val = *(data->args.hipMemcpy3DPeerAsync.p);
|
||||
break;
|
||||
// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipMemcpyAsync:
|
||||
break;
|
||||
@@ -7642,6 +7731,15 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
// hipMemcpyAtoHAsync[('void*', 'dstHost'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'ByteCount'), ('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipMemcpyAtoHAsync:
|
||||
break;
|
||||
// hipMemcpyBatchAsync[('void**', 'dsts'), ('void**', 'srcs'), ('size_t*', 'sizes'), ('size_t', 'count'), ('hipMemcpyAttributes*', 'attrs'), ('size_t*', 'attrsIdxs'), ('size_t', 'numAttrs'), ('size_t*', 'failIdx'), ('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipMemcpyBatchAsync:
|
||||
if (data->args.hipMemcpyBatchAsync.dsts) data->args.hipMemcpyBatchAsync.dsts__val = *(data->args.hipMemcpyBatchAsync.dsts);
|
||||
if (data->args.hipMemcpyBatchAsync.srcs) data->args.hipMemcpyBatchAsync.srcs__val = *(data->args.hipMemcpyBatchAsync.srcs);
|
||||
if (data->args.hipMemcpyBatchAsync.sizes) data->args.hipMemcpyBatchAsync.sizes__val = *(data->args.hipMemcpyBatchAsync.sizes);
|
||||
if (data->args.hipMemcpyBatchAsync.attrs) data->args.hipMemcpyBatchAsync.attrs__val = *(data->args.hipMemcpyBatchAsync.attrs);
|
||||
if (data->args.hipMemcpyBatchAsync.attrsIdxs) data->args.hipMemcpyBatchAsync.attrsIdxs__val = *(data->args.hipMemcpyBatchAsync.attrsIdxs);
|
||||
if (data->args.hipMemcpyBatchAsync.failIdx) data->args.hipMemcpyBatchAsync.failIdx__val = *(data->args.hipMemcpyBatchAsync.failIdx);
|
||||
break;
|
||||
// hipMemcpyDtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('hipDeviceptr_t', 'srcDevice'), ('size_t', 'ByteCount')]
|
||||
case HIP_API_ID_hipMemcpyDtoA:
|
||||
break;
|
||||
@@ -10436,6 +10534,30 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpy3DBatchAsync:
|
||||
oss << "hipMemcpy3DBatchAsync(";
|
||||
oss << "numOps="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.numOps);
|
||||
if (data->args.hipMemcpy3DBatchAsync.opList == NULL) oss << ", opList=NULL";
|
||||
else { oss << ", opList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.opList__val); }
|
||||
if (data->args.hipMemcpy3DBatchAsync.failIdx == NULL) oss << ", failIdx=NULL";
|
||||
else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.failIdx__val); }
|
||||
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.flags);
|
||||
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DBatchAsync.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpy3DPeer:
|
||||
oss << "hipMemcpy3DPeer(";
|
||||
if (data->args.hipMemcpy3DPeer.p == NULL) oss << "p=NULL";
|
||||
else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeer.p__val); }
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpy3DPeerAsync:
|
||||
oss << "hipMemcpy3DPeerAsync(";
|
||||
if (data->args.hipMemcpy3DPeerAsync.p == NULL) oss << "p=NULL";
|
||||
else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.p__val); }
|
||||
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DPeerAsync.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpyAsync:
|
||||
oss << "hipMemcpyAsync(";
|
||||
oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.dst);
|
||||
@@ -10479,6 +10601,25 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoHAsync.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpyBatchAsync:
|
||||
oss << "hipMemcpyBatchAsync(";
|
||||
if (data->args.hipMemcpyBatchAsync.dsts == NULL) oss << "dsts=NULL";
|
||||
else { oss << "dsts="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.dsts__val); }
|
||||
if (data->args.hipMemcpyBatchAsync.srcs == NULL) oss << ", srcs=NULL";
|
||||
else { oss << ", srcs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.srcs__val); }
|
||||
if (data->args.hipMemcpyBatchAsync.sizes == NULL) oss << ", sizes=NULL";
|
||||
else { oss << ", sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.sizes__val); }
|
||||
oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.count);
|
||||
if (data->args.hipMemcpyBatchAsync.attrs == NULL) oss << ", attrs=NULL";
|
||||
else { oss << ", attrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrs__val); }
|
||||
if (data->args.hipMemcpyBatchAsync.attrsIdxs == NULL) oss << ", attrsIdxs=NULL";
|
||||
else { oss << ", attrsIdxs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.attrsIdxs__val); }
|
||||
oss << ", numAttrs="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.numAttrs);
|
||||
if (data->args.hipMemcpyBatchAsync.failIdx == NULL) oss << ", failIdx=NULL";
|
||||
else { oss << ", failIdx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.failIdx__val); }
|
||||
oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyBatchAsync.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpyDtoA:
|
||||
oss << "hipMemcpyDtoA(";
|
||||
oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoA.dstArray);
|
||||
|
||||
@@ -502,3 +502,7 @@ hipMemsetD2D32Async
|
||||
hipStreamGetAttribute
|
||||
hipStreamSetAttribute
|
||||
hipModuleLoadFatBinary
|
||||
hipMemcpyBatchAsync
|
||||
hipMemcpy3DBatchAsync
|
||||
hipMemcpy3DPeer
|
||||
hipMemcpy3DPeerAsync
|
||||
@@ -848,6 +848,13 @@ hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value);
|
||||
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
const hipStreamAttrValue *value);
|
||||
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
|
||||
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
|
||||
size_t *failIdx, hipStream_t stream);
|
||||
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
|
||||
unsigned long long flags, hipStream_t stream);
|
||||
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p);
|
||||
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream);
|
||||
} // namespace hip
|
||||
|
||||
namespace hip {
|
||||
@@ -1373,6 +1380,10 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
|
||||
ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async;
|
||||
ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute;
|
||||
ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute;
|
||||
ptrDispatchTable->hipMemcpyBatchAsync_fn = hip::hipMemcpyBatchAsync;
|
||||
ptrDispatchTable->hipMemcpy3DBatchAsync_fn = hip::hipMemcpy3DBatchAsync;
|
||||
ptrDispatchTable->hipMemcpy3DPeer_fn = hip::hipMemcpy3DPeer;
|
||||
ptrDispatchTable->hipMemcpy3DPeerAsync_fn = hip::hipMemcpy3DPeerAsync;
|
||||
}
|
||||
|
||||
#if HIP_ROCPROFILER_REGISTER > 0
|
||||
@@ -2027,13 +2038,17 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipModuleLoadFatBinary_fn, 486);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpyBatchAsync_fn, 487);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DBatchAsync_fn, 488);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeer_fn, 489);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemcpy3DPeerAsync_fn, 490);
|
||||
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
|
||||
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
|
||||
//
|
||||
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
|
||||
//
|
||||
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 487)
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 491)
|
||||
|
||||
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14,
|
||||
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
|
||||
|
||||
@@ -978,4 +978,77 @@ inline
|
||||
size_t getElementSize(const hipChannelFormatDesc &desc) {
|
||||
return (desc.x / 8) * getNumChannels(desc);
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DBatchOp& desc) {
|
||||
hipMemcpy3DParms params;
|
||||
params.extent = desc.extent;
|
||||
params.kind = hipMemcpyDefault;
|
||||
|
||||
// infer elementSize
|
||||
size_t elementSize = 1;
|
||||
if (desc.src.type == hipMemcpyOperandTypeArray) {
|
||||
elementSize = getElementSize(desc.src.op.array.array);
|
||||
} else if (desc.dst.type == hipMemcpyOperandTypeArray) {
|
||||
elementSize = getElementSize(desc.dst.op.array.array);
|
||||
}
|
||||
|
||||
// source
|
||||
if (desc.src.type == hipMemcpyOperandTypePointer) {
|
||||
size_t row = desc.src.op.ptr.rowLength;
|
||||
size_t height = desc.src.op.ptr.layerHeight;
|
||||
size_t spitch = (row ? row : desc.extent.width) * elementSize;
|
||||
size_t swidth = (row ? row : desc.extent.width);
|
||||
size_t sheight = (height ? height : desc.extent.height);
|
||||
params.srcPtr = make_hipPitchedPtr(
|
||||
desc.src.op.ptr.ptr,
|
||||
spitch,
|
||||
swidth,
|
||||
sheight
|
||||
);
|
||||
} else if (desc.src.type == hipMemcpyOperandTypeArray) {
|
||||
params.srcArray = desc.src.op.array.array;
|
||||
params.srcPos = make_hipPos(
|
||||
desc.src.op.array.offset.x,
|
||||
desc.src.op.array.offset.y,
|
||||
desc.src.op.array.offset.z
|
||||
);
|
||||
}
|
||||
// dest
|
||||
if (desc.dst.type == hipMemcpyOperandTypePointer) {
|
||||
size_t row = desc.dst.op.ptr.rowLength;
|
||||
size_t height = desc.dst.op.ptr.layerHeight;
|
||||
size_t spitch = (row ? row : desc.extent.width) * elementSize;
|
||||
size_t swidth = (row ? row : desc.extent.width);
|
||||
size_t sheight = (height ? height : desc.extent.height);
|
||||
params.dstPtr = make_hipPitchedPtr(
|
||||
desc.dst.op.ptr.ptr,
|
||||
spitch,
|
||||
swidth,
|
||||
sheight
|
||||
);
|
||||
} else if (desc.dst.type == hipMemcpyOperandTypeArray) {
|
||||
params.dstArray = desc.dst.op.array.array;
|
||||
params.dstPos = make_hipPos(
|
||||
desc.dst.op.array.offset.x,
|
||||
desc.dst.op.array.offset.y,
|
||||
desc.dst.op.array.offset.z
|
||||
);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
inline
|
||||
hipMemcpy3DParms getMemcpy3DParms(const hipMemcpy3DPeerParms& desc) {
|
||||
hipMemcpy3DParms params;
|
||||
params.srcArray = desc.srcArray;
|
||||
params.srcPos = desc.srcPos;
|
||||
params.srcPtr = desc.srcPtr;
|
||||
params.dstArray = desc.dstArray;
|
||||
params.dstPos = desc.dstPos;
|
||||
params.dstPtr = desc.dstPtr;
|
||||
params.extent = desc.extent;
|
||||
params.kind = hipMemcpyDeviceToDevice;
|
||||
return params;
|
||||
}
|
||||
};
|
||||
@@ -618,6 +618,10 @@ global:
|
||||
hipStreamGetAttribute;
|
||||
hipStreamSetAttribute;
|
||||
hipModuleLoadFatBinary;
|
||||
hipMemcpyBatchAsync;
|
||||
hipMemcpy3DBatchAsync;
|
||||
hipMemcpy3DPeer;
|
||||
hipMemcpy3DPeerAsync;
|
||||
local:
|
||||
*;
|
||||
} hip_6.5;
|
||||
@@ -680,6 +680,8 @@ public:
|
||||
size_t sizeBytes);
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hip::Stream& stream, bool isHostAsync = false, bool isGPUAsync = true);
|
||||
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream = nullptr,
|
||||
bool isAsync = false);
|
||||
constexpr bool kOptionChangeable = true;
|
||||
constexpr bool kNewDevProg = false;
|
||||
|
||||
|
||||
@@ -2759,7 +2759,7 @@ hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p
|
||||
return ihipGetMemcpyParam3DCommand(command, &desc, stream);
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
|
||||
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) {
|
||||
hipError_t status = ihipMemcpy3D_validate(p);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
@@ -2808,10 +2808,72 @@ hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) {
|
||||
|
||||
hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
|
||||
HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream);
|
||||
|
||||
HIP_RETURN_DURATION(ihipMemcpyParam3D(pCopy, stream, true));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
|
||||
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
|
||||
size_t *failIdx, hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpyBatchAsync, dsts, srcs, sizes, count, attrs, attrsIdxs, numAttrs, failIdx,
|
||||
stream);
|
||||
// validate stream
|
||||
if(!hip::isValid(stream)) {
|
||||
HIP_RETURN(hipErrorInvalidResourceHandle);
|
||||
}
|
||||
// validate inputs
|
||||
if (dsts == nullptr || srcs == nullptr || sizes == nullptr || failIdx == nullptr ||
|
||||
count == 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// no support for memcpy attributes
|
||||
if (numAttrs > 0) {
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t status = hipSuccess;
|
||||
|
||||
*failIdx = SIZE_MAX;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (sizes[i] == 0) {
|
||||
*failIdx = i;
|
||||
status = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
status = ihipMemcpy(dsts[i], srcs[i], sizes[i], hipMemcpyDefault, *hip::getStream(stream),
|
||||
true, true);
|
||||
if (status != hipSuccess) {
|
||||
*failIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIP_RETURN(status);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
|
||||
unsigned long long flags, hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpy3DBatchAsync, numOps, opList, failIdx, flags, stream);
|
||||
if (flags != 0 || opList == nullptr || numOps == 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
if (!hip::isValid(stream)) {
|
||||
HIP_RETURN(hipErrorInvalidResourceHandle);
|
||||
}
|
||||
|
||||
hipError_t status = hipSuccess;
|
||||
|
||||
*failIdx = SIZE_MAX;
|
||||
for (int i = 0; i < numOps; ++i) {
|
||||
hipMemcpy3DParms parms = getMemcpy3DParms(opList[i]);
|
||||
status = ihipMemcpy3D(&parms, stream, true);
|
||||
if (status != hipSuccess) {
|
||||
*failIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIP_RETURN(status);
|
||||
}
|
||||
|
||||
hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset,
|
||||
int64_t value, size_t valueSize, size_t sizeBytes,
|
||||
hip::Stream* stream) {
|
||||
|
||||
@@ -21,6 +21,8 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "hip_conversions.hpp"
|
||||
|
||||
|
||||
namespace hip {
|
||||
|
||||
@@ -217,6 +219,39 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src
|
||||
HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice, *hip_stream, true, true));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) {
|
||||
HIP_INIT_API(hipMemcpy3DPeer, p);
|
||||
if (p == NULL) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
if (p->srcDevice >= static_cast<int>(g_devices.size()) ||
|
||||
p->dstDevice >= static_cast<int>(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
hipMemcpy3DParms copyParms = getMemcpy3DParms(*p);
|
||||
HIP_RETURN(ihipMemcpy3D(©Parms, nullptr));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpy3DPeerAsync, p, stream);
|
||||
if (p == NULL) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
if (p->srcDevice >= static_cast<int>(g_devices.size()) ||
|
||||
p->dstDevice >= static_cast<int>(g_devices.size()) || p->srcDevice < 0 || p->dstDevice < 0) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
getStreamPerThread(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipMemcpy3DParms copyParms = getMemcpy3DParms(*p);
|
||||
HIP_RETURN(ihipMemcpy3D(©Parms), stream, true);
|
||||
}
|
||||
|
||||
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
|
||||
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
|
||||
|
||||
|
||||
@@ -1926,4 +1926,21 @@ hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value) {
|
||||
return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value);
|
||||
}
|
||||
hipError_t hipMemcpyBatchAsync(void **dsts, void **srcs, size_t *sizes, size_t count,
|
||||
hipMemcpyAttributes *attrs, size_t *attrsIdxs, size_t numAttrs,
|
||||
size_t *failIdx, hipStream_t stream) {
|
||||
return hip::GetHipDispatchTable()->hipMemcpyBatchAsync_fn(dsts, srcs, sizes, count, attrs,
|
||||
attrsIdxs, numAttrs, failIdx, stream);
|
||||
}
|
||||
hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp *opList, size_t *failIdx,
|
||||
unsigned long long flags, hipStream_t stream) {
|
||||
return hip::GetHipDispatchTable()->hipMemcpy3DBatchAsync_fn(numOps, opList, failIdx, flags,
|
||||
stream);
|
||||
}
|
||||
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms *p) {
|
||||
return hip::GetHipDispatchTable()->hipMemcpy3DPeer_fn(p);
|
||||
}
|
||||
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms *p, hipStream_t stream) {
|
||||
return hip::GetHipDispatchTable()->hipMemcpy3DPeerAsync_fn(p, stream);
|
||||
}
|
||||
Ссылка в новой задаче
Block a user