SWDEV-484578 SWDEV-484575 SWDEV-484573 SWDEV-483324 SWDEV-483323 - Fixes issues in nvidia mappings for batch mem ops

Change-Id: I6202ea5691b8256e004650d2689c2826a53d8113


[ROCm/hipother commit: 1d96ab69ee]
This commit is contained in:
Sourabh Betigeri
2024-12-11 13:28:59 +00:00
committed by Sourabh Betigeri
parent b7a1df8fb0
commit a421a72d8f
@@ -519,69 +519,66 @@ static inline void hipResourceDesTocudaResourceDes(CUDA_RESOURCE_DESC* a, const
a->flags = p->flags;
}
/** Operations for hipStreamBatchMemOp*/
typedef enum hipStreamBatchMemOpType {
hipStreamMemOpWaitValue32 = 0x1,
hipStreamMemOpWriteValue32 = 0x2,
hipStreamMemOpWaitValue64 = 0x4,
hipStreamMemOpWriteValue64 = 0x5,
hipStreamMemOpBarrier = 0x6, ///< Currently not supported
hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported
hipStreamMemOpBarrier = 0x6, ///< Currently not supported
hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported
} hipStreamBatchMemOpType;
inline static CUstreamBatchMemOpType hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
hipStreamBatchMemOpType memOpType) {
hipStreamBatchMemOpType memOpType) {
switch (memOpType) {
case hipStreamMemOpWaitValue32:
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
case hipStreamMemOpWriteValue32:
return CU_STREAM_MEM_OP_WRITE_VALUE_32;
case hipStreamMemOpWaitValue64:
return CU_STREAM_MEM_OP_WAIT_VALUE_64;
case hipStreamMemOpWriteValue64:
return CU_STREAM_MEM_OP_WRITE_VALUE_64;
case hipStreamMemOpBarrier:
return CU_STREAM_MEM_OP_BARRIER;
case hipStreamMemOpFlushRemoteWrites:
return CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
default:
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
case hipStreamMemOpWaitValue32:
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
case hipStreamMemOpWriteValue32:
return CU_STREAM_MEM_OP_WRITE_VALUE_32;
case hipStreamMemOpWaitValue64:
return CU_STREAM_MEM_OP_WAIT_VALUE_64;
case hipStreamMemOpWriteValue64:
return CU_STREAM_MEM_OP_WRITE_VALUE_64;
case hipStreamMemOpBarrier:
return CU_STREAM_MEM_OP_BARRIER;
case hipStreamMemOpFlushRemoteWrites:
return CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
default:
return CU_STREAM_MEM_OP_WAIT_VALUE_32;
}
}
typedef union hipStreamBatchMemOpParams_union {
hipStreamBatchMemOpType operation;
struct hipStreamMemOpWaitValueParams_t{
struct hipStreamMemOpWaitValueParams_t {
hipStreamBatchMemOpType operation;
hipDeviceptr_t address;
union {
uint32_t value;
uint64_t value64;
};
unsigned int flags;
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
} waitValue;
struct hipStreamMemOpWriteValueParams_t{
hipStreamBatchMemOpType operation;
hipDeviceptr_t address;
union {
uint32_t value;
uint64_t value64;
};
unsigned int flags;
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
union {
uint32_t value;
uint64_t value64;
};
unsigned int flags;
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
} waitValue;
struct hipStreamMemOpWriteValueParams_t {
hipStreamBatchMemOpType operation;
hipDeviceptr_t address;
union {
uint32_t value;
uint64_t value64;
};
unsigned int flags;
hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant
} writeValue;
struct hipStreamMemOpFlushRemoteWritesParams_t{
struct hipStreamMemOpFlushRemoteWritesParams_t {
hipStreamBatchMemOpType operation;
unsigned int flags;
} flushRemoteWrites; ///< Currently not supported on AMD
struct hipStreamMemOpMemoryBarrierParams_t{
} flushRemoteWrites; ///< Currently not supported on AMD
struct hipStreamMemOpMemoryBarrierParams_t {
hipStreamBatchMemOpType operation;
unsigned int flags;
} memoryBarrier; ///< Currently not supported on AMD
} memoryBarrier; ///< Currently not supported on AMD
uint64_t pad[6];
} hipStreamBatchMemOpParams;
// hipStreamBatchMemOpType
@@ -589,7 +586,7 @@ typedef union hipStreamBatchMemOpParams_union {
typedef struct hipBatchMemOpNodeParams {
hipCtx_t ctx;
unsigned int count;
hipStreamBatchMemOpParams* paramArray;
hipStreamBatchMemOpParams *paramArray;
unsigned int flags;
} hipBatchMemOpNodeParams;
@@ -599,64 +596,43 @@ static inline void hipBatchMemOpParamsTocudaBatchMemOpParams(CUstreamBatchMemOpP
const hipStreamBatchMemOpParams* p,
unsigned int count) {
for (unsigned int i = 0; i < count; i++) {
switch (p[i].operation) {
case hipStreamMemOpWaitValue32:
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
break;
case hipStreamMemOpWriteValue32:
a[i].operation = CU_STREAM_MEM_OP_WRITE_VALUE_32;
break;
case hipStreamMemOpWaitValue64:
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_64;
break;
case hipStreamMemOpWriteValue64:
a[i].operation = CU_STREAM_MEM_OP_WRITE_VALUE_64;
break;
case hipStreamMemOpBarrier:
a[i].operation = CU_STREAM_MEM_OP_BARRIER;
break;
case hipStreamMemOpFlushRemoteWrites:
a[i].operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
break;
default:
a[i].operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
break;
if (p[i].waitValue.operation == hipStreamMemOpWaitValue32) {
a[i].waitValue.operation = CU_STREAM_MEM_OP_WAIT_VALUE_32;
a[i].waitValue.address = p[i].waitValue.address;
a[i].waitValue.value = (cuuint32_t)(p[i].waitValue.value);
a[i].waitValue.flags = p[i].waitValue.flags;
a[i].waitValue.alias = (CUdeviceptr)(p[i].waitValue.alias);
}
a[i].waitValue.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
p[i].waitValue.operation);
a[i].waitValue.address = p[i].waitValue.address;
a[i].waitValue.value = static_cast<cuuint32_t>(p[i].waitValue.value);
a[i].waitValue.value64 = static_cast<cuuint64_t>(p[i].waitValue.value64);
a[i].waitValue.flags = p[i].waitValue.flags;
a[i].waitValue.alias = (CUdeviceptr)p[i].waitValue.alias;
a[i].writeValue.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
p[i].waitValue.operation);
a[i].writeValue.address = p[i].writeValue.address;
a[i].writeValue.value = static_cast<cuuint32_t>(p[i].writeValue.value);
a[i].writeValue.value64 = static_cast<cuuint64_t>(p[i].writeValue.value64);
a[i].writeValue.flags = p[i].writeValue.flags;
a[i].writeValue.alias = (CUdeviceptr)p[i].writeValue.alias;
a[i].flushRemoteWrites.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
p[i].flushRemoteWrites.operation);
a[i].flushRemoteWrites.flags = p[i].flushRemoteWrites.flags;
a[i].memoryBarrier.operation = hipStreamBatchMemOpType_enumToCUstreamBatchMemOpType(
p[i].memoryBarrier.operation);
a[i].memoryBarrier.flags = p[i].memoryBarrier.flags;
}
}
static inline void hipBatchMemOpNodeParamsTocudaBatchMemOpNodeParams(
CUDA_BATCH_MEM_OP_NODE_PARAMS* a,
const hipBatchMemOpNodeParams* p) {
CUstreamBatchMemOpParams cuParamArray[p->count];
hipBatchMemOpParamsTocudaBatchMemOpParams(cuParamArray, p->paramArray, p->count);
a->ctx = (CUcontext)p->ctx;
a->count = p->count;
a->paramArray = cuParamArray;
a->flags = p->flags;
else if (p[i].writeValue.operation == hipStreamMemOpWriteValue32) {
a[i].writeValue.operation = CU_STREAM_MEM_OP_WRITE_VALUE_32;
a[i].writeValue.address = p[i].writeValue.address;
a[i].writeValue.value = (cuuint32_t)(p[i].writeValue.value);
a[i].writeValue.flags = p[i].writeValue.flags;
a[i].writeValue.alias = (CUdeviceptr)(p[i].writeValue.alias);
}
else if (p[i].waitValue.operation == hipStreamMemOpWaitValue64) {
a[i].waitValue.operation = CU_STREAM_MEM_OP_WAIT_VALUE_64;
a[i].waitValue.address = p[i].waitValue.address;
a[i].waitValue.value64 = (cuuint64_t)(p[i].waitValue.value64);
a[i].waitValue.flags = p[i].waitValue.flags;
a[i].waitValue.alias = (CUdeviceptr)(p[i].waitValue.alias);
}
else if (p[i].writeValue.operation == hipStreamMemOpWriteValue64) {
a[i].writeValue.operation = CU_STREAM_MEM_OP_WRITE_VALUE_64;
a[i].writeValue.address = p[i].writeValue.address;
a[i].writeValue.value64 = (cuuint64_t)(p[i].writeValue.value64);
a[i].writeValue.flags = p[i].writeValue.flags;
a[i].writeValue.alias = (CUdeviceptr)(p[i].writeValue.alias);
}
else if (p[i].memoryBarrier.operation == hipStreamMemOpBarrier) {
a[i].memoryBarrier.operation == CU_STREAM_MEM_OP_BARRIER;
a[i].memoryBarrier.flags = p[i].memoryBarrier.flags;
}
else if (p[i].flushRemoteWrites.operation == hipStreamMemOpFlushRemoteWrites) {
a[i].flushRemoteWrites.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
a[i].flushRemoteWrites.flags = p[i].flushRemoteWrites.flags;
}
}
}
typedef struct hip_Memcpy2D {
@@ -4159,41 +4135,64 @@ inline static hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int
inline static hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
hipStreamBatchMemOpParams* paramArray,
unsigned int flags) {
CUstreamBatchMemOpParams cuParamArray[count];
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[count];
hipBatchMemOpParamsTocudaBatchMemOpParams(cuParamArray, paramArray, count);
return hipCUResultTohipError(cuStreamBatchMemOp(stream, count, cuParamArray, flags));
}
inline static hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t *phGraphNode, hipGraph_t hGraph,
const hipGraphNode_t *dependencies,
inline static hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph,
const hipGraphNode_t* dependencies,
size_t numDependencies,
const hipBatchMemOpNodeParams* nodeParams) {
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
hipBatchMemOpNodeParamsTocudaBatchMemOpNodeParams(&cuBatchMemOpNodeParams, nodeParams);
return hipCUDAErrorTohipError(cuGraphAddBatchMemOpNode(phGraphNode, hGraph, dependencies,
numDependencies,
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)&cuBatchMemOpNodeParams));
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
hipBatchMemOpParamsTocudaBatchMemOpParams(
cuParamArray, nodeParams->paramArray, nodeParams->count);
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
cuBatchMemOpNodeParams.count = nodeParams->count;
cuBatchMemOpNodeParams.paramArray = cuParamArray;
cuBatchMemOpNodeParams.flags = nodeParams->flags;
return hipCUResultTohipError(cuGraphAddBatchMemOpNode(phGraphNode, hGraph, dependencies,
numDependencies,
&cuBatchMemOpNodeParams));
delete[] cuParamArray;
}
inline static hipError_t hipGraphBatchMemOpNodeGetParams(hipGraphNode_t hNode,
hipBatchMemOpNodeParams* nodeParams_out) {
return hipCUDAErrorTohipError(cuGraphBatchMemOpNodeGetParams(hNode, nodeParams_out));
return hipCUResultTohipError(cuGraphBatchMemOpNodeGetParams(
hNode, (CUDA_BATCH_MEM_OP_NODE_PARAMS *)nodeParams_out));
}
inline static hipError_t hipGraphBatchMemOpNodeSetParams(hipGraphNode_t hNode,
hipBatchMemOpNodeParams* nodeParams) {
return hipCUDAErrorTohipError(cuGraphBatchMemOpNodeSetParams (hNode,
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)nodeParams));
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
hipBatchMemOpParamsTocudaBatchMemOpParams(
cuParamArray, nodeParams->paramArray, nodeParams->count);
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
cuBatchMemOpNodeParams.count = nodeParams->count;
cuBatchMemOpNodeParams.paramArray = cuParamArray;
cuBatchMemOpNodeParams.flags = nodeParams->flags;
return hipCUResultTohipError(cuGraphBatchMemOpNodeSetParams(hNode, &cuBatchMemOpNodeParams));
delete[] cuParamArray;
}
inline static hipError_t hipGraphExecBatchMemOpNodeSetParams(hipGraphExec_t hGraphExec,
hipGraphNode_t hNode,
const hipBatchMemOpNodeParams* nodeParams) {
return hipCUDAErrorTohipError(cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode,
(const CUDA_BATCH_MEM_OP_NODE_PARAMS*)nodeParams));
inline static hipError_t hipGraphExecBatchMemOpNodeSetParams(
hipGraphExec_t hGraphExec, hipGraphNode_t hNode, const hipBatchMemOpNodeParams* nodeParams) {
CUstreamBatchMemOpParams* cuParamArray = new CUstreamBatchMemOpParams[nodeParams->count];
hipBatchMemOpParamsTocudaBatchMemOpParams(
cuParamArray, nodeParams->paramArray, nodeParams->count);
CUDA_BATCH_MEM_OP_NODE_PARAMS cuBatchMemOpNodeParams;
cuBatchMemOpNodeParams.ctx = (CUcontext)nodeParams->ctx;
cuBatchMemOpNodeParams.count = nodeParams->count;
cuBatchMemOpNodeParams.paramArray = cuParamArray;
cuBatchMemOpNodeParams.flags = nodeParams->flags;
return hipCUResultTohipError(cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode,
&cuBatchMemOpNodeParams));
delete[] cuParamArray;
}
inline static hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from,
const hipGraphNode_t* to,
size_t numDependencies) {