From e4e9475327e1ee958a41f2ca274b13608e07b537 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 7 Jan 2020 15:59:02 -0500
Subject: [PATCH] P4 to Git Change 2053320 by vsytchen@vsytchen-hip-win10 on
2020/01/07 15:54:34
SWDEV-215533 - [HIP][Windows]Output mismatch with tex3D()
1. Implement hipMemcpy3DAsync().
2. Add logic in hipMemcpy3D() to determine based on src/dst parameters if the user intended on calling cudaMemcpy3D() or cuMemcpy3D().
ReviewBoardURL = http://ocltc.amd.com/reviews/r/18444/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#38 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#36 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#95 edit
[ROCm/hip commit: 3cbd083c8dcb3582cbb4d9b0082130b87df9694d]
---
projects/hip/api/hip/hip_hcc.def.in | 1 +
projects/hip/api/hip/hip_hcc.map.in | 1 +
projects/hip/api/hip/hip_memory.cpp | 344 +++++++++++++++++++++++-----
3 files changed, 284 insertions(+), 62 deletions(-)
diff --git a/projects/hip/api/hip/hip_hcc.def.in b/projects/hip/api/hip/hip_hcc.def.in
index 6aba1f5a25..fd51a5701a 100644
--- a/projects/hip/api/hip/hip_hcc.def.in
+++ b/projects/hip/api/hip/hip_hcc.def.in
@@ -87,6 +87,7 @@ hipMemcpy2D
hipMemcpy2DAsync
hipMemcpy2DToArray
hipMemcpy3D
+hipMemcpy3DAsync
hipMemcpyAsync
hipMemcpyDtoD
hipMemcpyDtoDAsync
diff --git a/projects/hip/api/hip/hip_hcc.map.in b/projects/hip/api/hip/hip_hcc.map.in
index f80d7db23a..c0e2bf9458 100644
--- a/projects/hip/api/hip/hip_hcc.map.in
+++ b/projects/hip/api/hip/hip_hcc.map.in
@@ -88,6 +88,7 @@ global:
hipMemcpy2DAsync;
hipMemcpy2DToArray;
hipMemcpy3D;
+ hipMemcpy3DAsync;
hipMemcpyAsync;
hipMemcpyDtoD;
hipMemcpyDtoDAsync;
diff --git a/projects/hip/api/hip/hip_memory.cpp b/projects/hip/api/hip/hip_memory.cpp
index 11e08384fc..6b23913386 100644
--- a/projects/hip/api/hip/hip_memory.cpp
+++ b/projects/hip/api/hip/hip_memory.cpp
@@ -1107,95 +1107,315 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t
HIP_RETURN(hipSuccess);
}
-hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
- HIP_INIT_API(hipMemcpy3D, p);
-
- hip::syncStreams();
- amd::HostQueue* queue = hip::getNullStream();
-
- size_t byteSize;
- size_t srcPitchInBytes;
- size_t dstPitchInbytes;
- void* srcPtr;
- void* dstPtr;
- size_t srcOrigin[3];
- size_t dstOrigin[3];
- size_t region[3];
-
- region[2] = p->extent.depth;
- region[1] = p->extent.height;
- region[0] = p->extent.width;
- srcOrigin[0] = p->srcPos.x;
- srcOrigin[1] = p->srcPos.y;
- srcOrigin[2] = p->srcPos.z;
- dstOrigin[0] = p->dstPos.x;
- dstOrigin[1] = p->dstPos.y;
- dstOrigin[2] = p->dstPos.z;
-
- if (p->dstArray != nullptr) {
- getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize);
- dstPitchInbytes = p->dstArray->width * byteSize;
- srcPitchInBytes = p->srcPtr.pitch;
- srcPtr = (void*)p->srcPtr.ptr;
- dstPtr = p->dstArray->data;
+hipError_t ihipMemcpy3D_V1(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
+ const void* srcPtr = nullptr;
+ size_t srcElementSizeInBytes = sizeof(unsigned char);
+ size_t srcRowPitchInBytes = 0;
+ size_t srcSlicePitchInBytes = 0;
+ if (p->srcMemoryType == hipMemoryTypeHost) {
+ srcPtr = p->srcHost;
+ srcRowPitchInBytes = p->srcPitch;
+ srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight;
+ } else if ((p->srcMemoryType == hipMemoryTypeDevice) ||
+ (p->srcMemoryType == hipMemoryTypeUnified)) {
+ srcPtr = p->srcDevice;
+ srcRowPitchInBytes = p->srcPitch;
+ srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight;
+ } else if (p->srcMemoryType == hipMemoryTypeArray) {
+ srcPtr = p->srcArray->data;
+ getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes);
+ srcElementSizeInBytes *= p->srcArray->NumChannels;
+ srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width;
+ srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height;
} else {
- srcPitchInBytes = p->srcPtr.pitch;
- dstPitchInbytes = p->dstPtr.pitch;
- srcPtr = p->srcPtr.ptr;
- dstPtr = p->dstPtr.ptr;
+ ShouldNotReachHere();
}
- // Create buffer rectangle info structure
+ void* dstPtr = nullptr;
+ size_t dstElementSizeInBytes = sizeof(unsigned char);
+ size_t dstRowPitchInBytes = 0;
+ size_t dstSlicePitchInBytes = 0;
+ if (p->dstMemoryType == hipMemoryTypeHost) {
+ dstPtr = p->dstHost;
+ dstRowPitchInBytes = p->dstPitch;
+ dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight;
+ } else if ((p->dstMemoryType == hipMemoryTypeDevice) ||
+ (p->dstMemoryType == hipMemoryTypeUnified)) {
+ dstPtr = p->dstDevice;
+ dstRowPitchInBytes = p->dstPitch;
+ dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight;
+ } else if (p->dstMemoryType == hipMemoryTypeArray) {
+ dstPtr = p->dstArray->data;
+ getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes);
+ dstElementSizeInBytes *= p->dstArray->NumChannels;
+ dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width;
+ dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height;
+ } else {
+ ShouldNotReachHere();
+ }
+
+ // For HIP arrays, srcXInBytes must be evenly divisible by the array element size.
+ if ((p->srcMemoryType == hipMemoryTypeArray) &&
+ ((p->srcXInBytes % srcElementSizeInBytes) != 0)) {
+ return hipErrorInvalidValue;
+ }
+
+ // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes
+ if ((p->srcMemoryType != hipMemoryTypeArray) &&
+ (p->srcPitch < (p->WidthInBytes + p->srcXInBytes))) {
+ return hipErrorInvalidValue;
+ }
+
+ // If specified, srcHeight must be greater than or equal to Height + srcY
+ if ((p->srcMemoryType != hipMemoryTypeArray) &&
+ (p->srcHeight < (p->Height + p->srcY))) {
+ return hipErrorInvalidValue;
+ }
+
+ // For HIP arrays, dstXInBytes must be evenly divisible by the array element size.
+ if ((p->dstMemoryType == hipMemoryTypeArray) &&
+ ((p->dstXInBytes % dstElementSizeInBytes) != 0)) {
+ return hipErrorInvalidValue;
+ }
+
+ // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes
+ if ((p->dstMemoryType != hipMemoryTypeArray) &&
+ (p->dstPitch < (p->WidthInBytes + p->dstXInBytes))) {
+ return hipErrorInvalidValue;
+ }
+
+ // If specified, srcHeight must be greater than or equal to Height + srcY
+ if ((p->dstMemoryType != hipMemoryTypeArray) &&
+ (p->dstHeight < (p->Height + p->dstY))) {
+ return hipErrorInvalidValue;
+ }
+
+ // The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0.
+ if ((p->srcLOD != 0) || (p->dstLOD != 0)) {
+ return hipErrorInvalidValue;
+ }
+
+ size_t region[3];
+ region[0] = p->WidthInBytes;
+ region[1] = p->Height;
+ region[2] = p->Depth;
+
+ size_t srcOrigin[3];
+ srcOrigin[0] = p->srcXInBytes;
+ srcOrigin[1] = p->srcY;
+ srcOrigin[2] = p->srcZ;
+
+ size_t dstOrigin[3];
+ dstOrigin[0] = p->dstXInBytes;
+ dstOrigin[1] = p->dstY;
+ dstOrigin[2] = p->dstZ;
+
amd::BufferRect srcRect;
- amd::BufferRect dstRect;
- size_t offset = 0;
- amd::Memory* srcMemory = getMemoryObject(srcPtr, offset);
- assert(offset == 0);
- amd::Memory* dstMemory = getMemoryObject(dstPtr, offset);
- assert(offset == 0);
-
- size_t src_slice_pitch = srcPitchInBytes * p->extent.height;
- size_t dst_slice_pitch = dstPitchInbytes * p->extent.height;
-
- if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) ||
- !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) {
- HIP_RETURN(hipErrorInvalidValue);
+ if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) {
+ return hipErrorInvalidValue;
}
- hipMemcpyKind kind = p->kind;
+ size_t srcMemoryOffset = 0;
+ amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset);
+ amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0);
+
+ amd::BufferRect dstRect;
+ if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) {
+ return hipErrorInvalidValue;
+ }
+
+ size_t dstMemoryOffset = 0;
+ amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset);
+ amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0);
amd::Command* command = nullptr;
amd::Command::EventWaitList waitList;
-
- amd::Coord3D srcStart(srcRect.start_, 0, 0);
- amd::Coord3D dstStart(dstRect.start_, 0, 0);
- amd::Coord3D size(region[0], region[1], region[2]);
+ amd::HostQueue* queue = hip::getQueue(stream);
+ amd::Coord3D regionSize(region[0], region[1], region[2]);
if (((srcMemory == nullptr) && (dstMemory == nullptr)) ||
- (kind == hipMemcpyHostToHost)) {
+ (p->kind == hipMemcpyHostToHost)) {
memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]);
- HIP_RETURN(hipSuccess);
+ return hipSuccess;
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList,
- *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect);
+ *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList,
- *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect);
+ *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList,
- *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size,
+ *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize,
srcRect, dstRect);
}
if (command == nullptr) {
- HIP_RETURN(hipErrorOutOfMemory);
+ return hipErrorOutOfMemory;
}
command->enqueue();
- command->awaitCompletion();
+ if (!isAsync) {
+ command->awaitCompletion();
+ }
command->release();
- HIP_RETURN(hipSuccess);
+ return hipSuccess;
+}
+
+hipError_t ihipMemcpy3D_V2(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
+ void* srcPtr = nullptr;
+ size_t srcElementSizeInBytes = sizeof(unsigned char);
+ size_t srcRowPitchInBytes = 0;
+ size_t srcSlicePitchInBytes = 0;
+ if ((p->srcArray != nullptr) && (p->srcPtr.ptr == nullptr)) {
+ srcPtr = p->srcArray->data;
+ getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes);
+ srcElementSizeInBytes *= p->srcArray->NumChannels;
+ srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width;
+ srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height;
+ } else if ((p->srcArray == nullptr) && (p->srcPtr.ptr != nullptr)) {
+ srcPtr = p->srcPtr.ptr;
+ srcRowPitchInBytes = p->srcPtr.pitch;
+ srcSlicePitchInBytes = srcRowPitchInBytes * p->srcPtr.ysize;
+ } else {
+ ShouldNotReachHere();
+ }
+
+ void* dstPtr = nullptr;
+ size_t dstElementSizeInBytes = sizeof(unsigned char);
+ size_t dstRowPitchInBytes = 0;
+ size_t dstSlicePitchInBytes = 0;
+ if ((p->dstArray != nullptr) && (p->dstPtr.ptr == nullptr)) {
+ dstPtr = p->dstArray->data;
+ getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes);
+ dstElementSizeInBytes *= p->dstArray->NumChannels;
+ dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width;
+ dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height;
+ } else if ((p->dstArray == nullptr) && (p->dstPtr.ptr != nullptr)) {
+ dstPtr = p->dstPtr.ptr;
+ dstRowPitchInBytes = p->srcPtr.pitch;
+ dstSlicePitchInBytes = dstRowPitchInBytes * p->dstPtr.ysize;
+ } else {
+ ShouldNotReachHere();
+ }
+
+ // If the source and destination are both arrays, they must have the same element size.
+ if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) &&
+ (srcElementSizeInBytes != dstElementSizeInBytes)) {
+ return hipErrorInvalidValue;
+ }
+
+ // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements.
+ // If no HIP array is participating in the copy, the extent is defined in elements of unsigned char.
+ size_t region[3];
+ if (p->srcArray != nullptr) {
+ region[0] = srcRowPitchInBytes;
+ } else if (p->dstArray != nullptr) {
+ region[0] = dstRowPitchInBytes;
+ } else {
+ region[0] = sizeof(unsigned char) * p->extent.width;
+ }
+ region[1] = p->extent.height;
+ region[2] = p->extent.depth;
+
+ // The offset into the object is defined in units of the object's elements.
+ size_t srcOrigin[3];
+ srcOrigin[0] = srcElementSizeInBytes * p->srcPos.x;
+ srcOrigin[1] = p->srcPos.y;
+ srcOrigin[2] = p->srcPos.z;
+
+ amd::BufferRect srcRect;
+ if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) {
+ return hipErrorInvalidValue;
+ }
+
+ size_t srcMemoryOffset = 0;
+ amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset);
+ amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0);
+
+ size_t dstOrigin[3];
+ dstOrigin[0] = dstElementSizeInBytes * p->dstPos.x;
+ dstOrigin[1] = p->dstPos.y;
+ dstOrigin[2] = p->dstPos.z;
+
+ amd::BufferRect dstRect;
+ if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) {
+ return hipErrorInvalidValue;
+ }
+
+ size_t dstMemoryOffset = 0;
+ amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset);
+ amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0);
+
+ amd::Command* command = nullptr;
+ amd::Command::EventWaitList waitList;
+ amd::HostQueue* queue = hip::getQueue(stream);
+ amd::Coord3D regionSize(region[0], region[1], region[2]);
+
+ if (((srcMemory == nullptr) && (dstMemory == nullptr)) ||
+ (p->kind == hipMemcpyHostToHost)) {
+ memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]);
+ return hipSuccess;
+ } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
+ command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList,
+ *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect);
+ } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
+ command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList,
+ *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect);
+ } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
+ command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList,
+ *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize,
+ srcRect, dstRect);
+ }
+
+ if (command == nullptr) {
+ return hipErrorOutOfMemory;
+ }
+
+ command->enqueue();
+ if (!isAsync) {
+ command->awaitCompletion();
+ }
+ command->release();
+
+ return hipSuccess;
+}
+
+hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
+ // Having src and dst be an array is ambigous, since we can't tell if the user intended to call hipMemcpy3D_V1() or hipMemcpy3D_V2().
+ // For now hope that we never encounter this case.
+ assert((p->srcArray == nullptr) || (p->dstArray == nullptr));
+
+ // When calling hipMemcpy3D_V1(), the user must specify
+ // one of srcHost, srcDevice or srcArray and
+ // one of dstHost, dstDevice or dstArray.
+ if (((p->srcHost != nullptr) || (p->srcDevice != nullptr) || (p->srcArray != nullptr)) &&
+ ((p->dstHost != nullptr) || (p->dstDevice != nullptr) || (p->dstArray != nullptr))) {
+ return ihipMemcpy3D_V1(p, stream, isAsync);
+ }
+
+ // When calling hipMemcpy3D_V2(), the user must specify
+ // one of srcArray or srcPtr and
+ // one of dstArray or dstPtr.
+ if (((p->srcArray != nullptr) || (p->srcPtr.ptr != nullptr)) &&
+ ((p->dstArray != nullptr) || (p->dstPtr.ptr != nullptr))) {
+ return ihipMemcpy3D_V2(p, stream, isAsync);
+ }
+
+ // If we got here, then the user specified an invalid combination of src/dst parameters.
+ return hipErrorInvalidValue;
+}
+
+hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
+ HIP_INIT_API(hipMemcpy3D, p);
+
+ HIP_RETURN(ihipMemcpy3D(p, nullptr));
+}
+
+hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) {
+ HIP_INIT_API(hipMemcpy3DAsync, p, stream);
+
+ HIP_RETURN(ihipMemcpy3D(p, stream, true));
}
hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes,