SWDEV-484578 SWDEV-484575 SWDEV-484573 SWDEV-483324 SWDEV-483323 - Fixes issues in nvidia mappings for batch mem ops
Change-Id: I6202ea5691b8256e004650d2689c2826a53d8113
[ROCm/hipother commit: 1d96ab69ee]
This commit is contained in:
committed by
Sourabh Betigeri
parent
b7a1df8fb0
commit
a421a72d8f
@@ -519,69 +519,66 @@ static inline void hipResourceDesTocudaResourceDes(CUDA_RESOURCE_DESC* a, const
|
||||
a->flags = p->flags;
|
||||
}
|
||||
|
||||
|
||||
/** Operations for hipStreamBatchMemOp*/
|
||||
typedef enum hipStreamBatchMemOpType {
|
||||
hipStreamMemOpWaitValue32 = 0x1,
|
||||
hipStreamMemOpWriteValue32 = 0x2,
|
||||
hipStreamMemOpWaitValue64 = 0x4,
|
||||
hipStreamMemOpWriteValue64 = 0x5,
|
||||
hipStreamMemOpBarrier = 0x6, ///< Currently not supported
|
||||
hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported
|
||||
hipStreamMemOpBarrier = 0x6, ///< Currently not supported
|
||||
hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported
|
||||
} hipStreamBatchMemOpType;
|
||||
|
||||
|
||||
|
||||
inline static CUstreamBatchMemOpType hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
|
||||
hipStreamBatchMemOpType memOpType) {
|
||||
hipStreamBatchMemOpType memOpType) {
|
||||
switch (memOpType) {
|
||||
case hipStreamMemOpWaitValue32:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
case hipStreamMemOpWriteValue32:
|
||||
return CU_STREAM_MEM_OP_WRITE_VALUE_32;
|
||||
case hipStreamMemOpWaitValue64:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_64;
|
||||
case hipStreamMemOpWriteValue64:
|
||||
return CU_STREAM_MEM_OP_WRITE_VALUE_64;
|
||||
case hipStreamMemOpBarrier:
|
||||
return CU_STREAM_MEM_OP_BARRIER;
|
||||
case hipStreamMemOpFlushRemoteWrites:
|
||||
return CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
|
||||
default:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
case hipStreamMemOpWaitValue32:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
case hipStreamMemOpWriteValue32:
|
||||
return CU_STREAM_MEM_OP_WRITE_VALUE_32;
|
||||
case hipStreamMemOpWaitValue64:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_64;
|
||||
case hipStreamMemOpWriteValue64:
|
||||
return CU_STREAM_MEM_OP_WRITE_VALUE_64;
|
||||
case hipStreamMemOpBarrier:
|
||||
return CU_STREAM_MEM_OP_BARRIER;
|
||||
case hipStreamMemOpFlushRemoteWrites:
|
||||
return CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
|
||||
default:
|
||||
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
}
|
||||
}
|
||||
|
||||
typedef union hipStreamBatchMemOpParams_union {
|
||||
hipStreamBatchMemOpType operation;
|
||||
struct hipStreamMemOpWaitValueParams_t{
|
||||
struct hipStreamMemOpWaitValueParams_t {
|
||||
hipStreamBatchMemOpType operation;
|
||||
hipDeviceptr_t address;
|
||||
union {
|
||||
uint32_t value;
|
||||
uint64_t value64;
|
||||
};
|
||||
unsigned int flags;
|
||||
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
|
||||
} waitValue;
|
||||
struct hipStreamMemOpWriteValueParams_t{
|
||||
hipStreamBatchMemOpType operation;
|
||||
hipDeviceptr_t address;
|
||||
union {
|
||||
uint32_t value;
|
||||
uint64_t value64;
|
||||
};
|
||||
unsigned int flags;
|
||||
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
|
||||
union {
|
||||
uint32_t value;
|
||||
uint64_t value64;
|
||||
};
|
||||
unsigned int flags;
|
||||
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
|
||||
} waitValue;
|
||||
struct hipStreamMemOpWriteValueParams_t {
|
||||
hipStreamBatchMemOpType operation;
|
||||
hipDeviceptr_t address;
|
||||
union {
|
||||
uint32_t value;
|
||||
uint64_t value64;
|
||||
};
|
||||
unsigned int flags;
|
||||
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
|
||||
} writeValue;
|
||||
struct hipStreamMemOpFlushRemoteWritesParams_t{
|
||||
struct hipStreamMemOpFlushRemoteWritesParams_t {
|
||||
hipStreamBatchMemOpType operation;
|
||||
unsigned int flags;
|
||||
} flushRemoteWrites; ///< Currently not supported on AMD
|
||||
struct hipStreamMemOpMemoryBarrierParams_t{
|
||||
} flushRemoteWrites; ///< Currently not supported on AMD
|
||||
struct hipStreamMemOpMemoryBarrierParams_t {
|
||||
hipStreamBatchMemOpType operation;
|
||||
unsigned int flags;
|
||||
} memoryBarrier; ///< Currently not supported on AMD
|
||||
} memoryBarrier; ///< Currently not supported on AMD
|
||||
uint64_t pad[6];
|
||||
} hipStreamBatchMemOpParams;
|
||||
// hipStreamBatchMemOpType
|
||||
@@ -589,7 +586,7 @@ typedef union hipStreamBatchMemOpParams_union {
|
||||
typedef struct hipBatchMemOpNodeParams {
|
||||
hipCtx_t ctx;
|
||||
unsigned int count;
|
||||
hipStreamBatchMemOpParams* paramArray;
|
||||
hipStreamBatchMemOpParams *paramArray;
|
||||
unsigned int flags;
|
||||
} hipBatchMemOpNodeParams;
|
||||
|
||||
@@ -599,64 +596,43 @@ static inline void hipBatchMemOpParamsTocudaBatchMemOpParams(CUstreamBatchMemOpP
|
||||
const hipStreamBatchMemOpParams* p,
|
||||
unsigned int count) {
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
switch (p[i].operation) {
|
||||
case hipStreamMemOpWaitValue32:
|
||||
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
break;
|
||||
case hipStreamMemOpWriteValue32:
|
||||
a[i].operation = CU_STREAM_MEM_OP_WRITE_VALUE_32;
|
||||
break;
|
||||
case hipStreamMemOpWaitValue64:
|
||||
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_64;
|
||||
break;
|
||||
case hipStreamMemOpWriteValue64:
|
||||
a[i].operation = CU_STREAM_MEM_OP_WRITE_VALUE_64;
|
||||
break;
|
||||
case hipStreamMemOpBarrier:
|
||||
a[i].operation = CU_STREAM_MEM_OP_BARRIER;
|
||||
break;
|
||||
case hipStreamMemOpFlushRemoteWrites:
|
||||
a[i].operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
|
||||
break;
|
||||
default:
|
||||
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
break;
|
||||
if (p[i].waitValue.operation == hipStreamMemOpWaitValue32) {
|
||||
a[i].waitValue.operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
|
||||
a[i].waitValue.address = p[i].waitValue.address;
|
||||
a[i].waitValue.value = (cuuint32_t)(p[i].waitValue.value);
|
||||
a[i].waitValue.flags = p[i].waitValue.flags;
|
||||
a[i].waitValue.alias = (CUdeviceptr)(p[i].waitValue.alias);
|
||||
}
|
||||
a[i].waitValue.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
|
||||
p[i].waitValue.operation);
|
||||
a[i].waitValue.address = p[i].waitValue.address;
|
||||
a[i].waitValue.value = static_cast<cuuint32_t>(p[i].waitValue.value);
|
||||
a[i].waitValue.value64 = static_cast<cuuint64_t>(p[i].waitValue.value64);
|
||||
a[i].waitValue.flags = p[i].waitValue.flags;
|
||||
a[i].waitValue.alias = (CUdeviceptr)p[i].waitValue.alias;
|
||||
|
||||
a[i].writeValue.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
|
||||
p[i].waitValue.operation);
|
||||
a[i].writeValue.address = p[i].writeValue.address;
|
||||
a[i].writeValue.value = static_cast<cuuint32_t>(p[i].writeValue.value);
|
||||
a[i].writeValue.value64 = static_cast<cuuint64_t>(p[i].writeValue.value64);
|
||||
a[i].writeValue.flags = p[i].writeValue.flags;
|
||||
a[i].writeValue.alias = (CUdeviceptr)p[i].writeValue.alias;
|
||||
|
||||
a[i].flushRemoteWrites.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
|
||||
p[i].flushRemoteWrites.operation);
|
||||
a[i].flushRemoteWrites.flags = p[i].flushRemoteWrites.flags;
|
||||
|
||||
a[i].memoryBarrier.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
|
||||
p[i].memoryBarrier.operation);
|
||||
a[i].memoryBarrier.flags = p[i].memoryBarrier.flags;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void hipBatchMemOpNodeParamsTocudaBatchMemOpNodeParams(
|
||||
CUDA_BATCH_MEM_OP_NODE_PARAMS* a,
|
||||
const hipBatchMemOpNodeParams* p) {
|
||||
CUstreamBatchMemOpParams cuParamArray[p->count];
|
||||
hipBatchMemOpParamsTocudaBatchMemOpParams(cuParamArray, p->paramArray, p->count);
|
||||
a->ctx = (CUcontext)p->ctx;
|
||||
a->count = p->count;
|
||||
a->paramArray = cuParamArray;
|
||||
a->flags = p->flags;
|
||||
else if (p[i].writeValue.operation == hipStreamMemOpWriteValue32) {
|
||||
a[i].writeValue.operation = CU_STREAM_MEM_OP_WRITE_VALUE_32;
|
||||
a[i].writeValue.address = p[i].writeValue.address;
|
||||
a[i].writeValue.value = (cuuint32_t)(p[i].writeValue.value);
|
||||
a[i].writeValue.flags = p[i].writeValue.flags;
|
||||
a[i].writeValue.alias = (CUdeviceptr)(p[i].writeValue.alias);
|
||||
}
|
||||
else if (p[i].waitValue.operation == hipStreamMemOpWaitValue64) {
|
||||
a[i].waitValue.operation = CU_STREAM_MEM_OP_WAIT_VALUE_64;
|
||||
a[i].waitValue.address = p[i].waitValue.address;
|
||||
a[i].waitValue.value64 = (cuuint64_t)(p[i].waitValue.value64);
|
||||
a[i].waitValue.flags = p[i].waitValue.flags;
|
||||
a[i].waitValue.alias = (CUdeviceptr)(p[i].waitValue.alias);
|
||||
}
|
||||
else if (p[i].writeValue.operation == hipStreamMemOpWriteValue64) {
|
||||
a[i].writeValue.operation = CU_STREAM_MEM_OP_WRITE_VALUE_64;
|
||||
a[i].writeValue.address = p[i].writeValue.address;
|
||||
a[i].writeValue.value64 = (cuuint64_t)(p[i].writeValue.value64);
|
||||
a[i].writeValue.flags = p[i].writeValue.flags;
|
||||
a[i].writeValue.alias = (CUdeviceptr)(p[i].writeValue.alias);
|
||||
}
|
||||
else if (p[i].memoryBarrier.operation == hipStreamMemOpBarrier) {
|
||||
a[i].memoryBarrier.operation == CU_STREAM_MEM_OP_BARRIER;
|
||||
a[i].memoryBarrier.flags = p[i].memoryBarrier.flags;
|
||||
}
|
||||
else if (p[i].flushRemoteWrites.operation == hipStreamMemOpFlushRemoteWrites) {
|
||||
a[i].flushRemoteWrites.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
|
||||
a[i].flushRemoteWrites.flags = p[i].flushRemoteWrites.flags;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct hip_Memcpy2D {
|
||||
@@ -4159,41 +4135,64 @@ inline static hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int
|
||||
inline static hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
|
||||
hipStreamBatchMemOpParams* paramArray,
|
||||
unsigned int flags) {
|
||||
CUstreamBatchMemOpParams cuParamArray[count];
|
||||
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[count];
|
||||
hipBatchMemOpParamsTocudaBatchMemOpParams(cuParamArray, paramArray, count);
|
||||
return hipCUResultTohipError(cuStreamBatchMemOp(stream, count, cuParamArray, flags));
|
||||
}
|
||||
|
||||
inline static hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t *phGraphNode, hipGraph_t hGraph,
|
||||
const hipGraphNode_t *dependencies,
|
||||
inline static hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph,
|
||||
const hipGraphNode_t* dependencies,
|
||||
size_t numDependencies,
|
||||
const hipBatchMemOpNodeParams* nodeParams) {
|
||||
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
|
||||
hipBatchMemOpNodeParamsTocudaBatchMemOpNodeParams(&cuBatchMemOpNodeParams, nodeParams);
|
||||
return hipCUDAErrorTohipError(cuGraphAddBatchMemOpNode(phGraphNode, hGraph, dependencies,
|
||||
numDependencies,
|
||||
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)&cuBatchMemOpNodeParams));
|
||||
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
|
||||
hipBatchMemOpParamsTocudaBatchMemOpParams(
|
||||
cuParamArray, nodeParams->paramArray, nodeParams->count);
|
||||
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
|
||||
cuBatchMemOpNodeParams.count = nodeParams->count;
|
||||
cuBatchMemOpNodeParams.paramArray = cuParamArray;
|
||||
cuBatchMemOpNodeParams.flags = nodeParams->flags;
|
||||
return hipCUResultTohipError(cuGraphAddBatchMemOpNode(phGraphNode, hGraph, dependencies,
|
||||
numDependencies,
|
||||
&cuBatchMemOpNodeParams));
|
||||
delete[] cuParamArray;
|
||||
}
|
||||
|
||||
inline static hipError_t hipGraphBatchMemOpNodeGetParams(hipGraphNode_t hNode,
|
||||
hipBatchMemOpNodeParams* nodeParams_out) {
|
||||
return hipCUDAErrorTohipError(cuGraphBatchMemOpNodeGetParams(hNode, nodeParams_out));
|
||||
return hipCUResultTohipError(cuGraphBatchMemOpNodeGetParams(
|
||||
hNode, (CUDA_BATCH_MEM_OP_NODE_PARAMS *)nodeParams_out));
|
||||
}
|
||||
|
||||
inline static hipError_t hipGraphBatchMemOpNodeSetParams(hipGraphNode_t hNode,
|
||||
hipBatchMemOpNodeParams* nodeParams) {
|
||||
return hipCUDAErrorTohipError(cuGraphBatchMemOpNodeSetParams (hNode,
|
||||
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)nodeParams));
|
||||
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
|
||||
hipBatchMemOpParamsTocudaBatchMemOpParams(
|
||||
cuParamArray, nodeParams->paramArray, nodeParams->count);
|
||||
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
|
||||
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
|
||||
cuBatchMemOpNodeParams.count = nodeParams->count;
|
||||
cuBatchMemOpNodeParams.paramArray = cuParamArray;
|
||||
cuBatchMemOpNodeParams.flags = nodeParams->flags;
|
||||
return hipCUResultTohipError(cuGraphBatchMemOpNodeSetParams(hNode, &cuBatchMemOpNodeParams));
|
||||
delete[] cuParamArray;
|
||||
}
|
||||
|
||||
inline static hipError_t hipGraphExecBatchMemOpNodeSetParams(hipGraphExec_t hGraphExec,
|
||||
hipGraphNode_t hNode,
|
||||
const hipBatchMemOpNodeParams* nodeParams) {
|
||||
return hipCUDAErrorTohipError(cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode,
|
||||
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)nodeParams));
|
||||
inline static hipError_t hipGraphExecBatchMemOpNodeSetParams(
|
||||
hipGraphExec_t hGraphExec, hipGraphNode_t hNode, const hipBatchMemOpNodeParams* nodeParams) {
|
||||
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
|
||||
hipBatchMemOpParamsTocudaBatchMemOpParams(
|
||||
cuParamArray, nodeParams->paramArray, nodeParams->count);
|
||||
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
|
||||
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
|
||||
cuBatchMemOpNodeParams.count = nodeParams->count;
|
||||
cuBatchMemOpNodeParams.paramArray = cuParamArray;
|
||||
cuBatchMemOpNodeParams.flags = nodeParams->flags;
|
||||
return hipCUResultTohipError(cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode,
|
||||
&cuBatchMemOpNodeParams));
|
||||
delete[] cuParamArray;
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from,
|
||||
const hipGraphNode_t* to,
|
||||
size_t numDependencies) {
|
||||
|
||||
Reference in New Issue
Block a user