P4 to Git Change 2053320 by vsytchen@vsytchen-hip-win10 on 2020/01/07 15:54:34

SWDEV-215533 - [HIP][Windows]Output mismatch with tex3D()

	1. Implement hipMemcpy3DAsync().
	2. Add logic in hipMemcpy3D() to determine based on src/dst parameters if the user intended on calling cudaMemcpy3D() or cuMemcpy3D().

	ReviewBoardURL = http://ocltc.amd.com/reviews/r/18444/diff/

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#38 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#36 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#95 edit


[ROCm/hip commit: 3cbd083c8d]
This commit is contained in:
foreman
2020-01-07 15:59:02 -05:00
rodzic 31e4b7e893
commit e4e9475327
3 zmienionych plików z 284 dodań i 62 usunięć
+1
Wyświetl plik
@@ -87,6 +87,7 @@ hipMemcpy2D
hipMemcpy2DAsync
hipMemcpy2DToArray
hipMemcpy3D
hipMemcpy3DAsync
hipMemcpyAsync
hipMemcpyDtoD
hipMemcpyDtoDAsync
+1
Wyświetl plik
@@ -88,6 +88,7 @@ global:
hipMemcpy2DAsync;
hipMemcpy2DToArray;
hipMemcpy3D;
hipMemcpy3DAsync;
hipMemcpyAsync;
hipMemcpyDtoD;
hipMemcpyDtoDAsync;
+282 -62
Wyświetl plik
@@ -1107,95 +1107,315 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t
HIP_RETURN(hipSuccess);
}
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
HIP_INIT_API(hipMemcpy3D, p);
hip::syncStreams();
amd::HostQueue* queue = hip::getNullStream();
size_t byteSize;
size_t srcPitchInBytes;
size_t dstPitchInbytes;
void* srcPtr;
void* dstPtr;
size_t srcOrigin[3];
size_t dstOrigin[3];
size_t region[3];
region[2] = p->extent.depth;
region[1] = p->extent.height;
region[0] = p->extent.width;
srcOrigin[0] = p->srcPos.x;
srcOrigin[1] = p->srcPos.y;
srcOrigin[2] = p->srcPos.z;
dstOrigin[0] = p->dstPos.x;
dstOrigin[1] = p->dstPos.y;
dstOrigin[2] = p->dstPos.z;
if (p->dstArray != nullptr) {
getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize);
dstPitchInbytes = p->dstArray->width * byteSize;
srcPitchInBytes = p->srcPtr.pitch;
srcPtr = (void*)p->srcPtr.ptr;
dstPtr = p->dstArray->data;
hipError_t ihipMemcpy3D_V1(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
const void* srcPtr = nullptr;
size_t srcElementSizeInBytes = sizeof(unsigned char);
size_t srcRowPitchInBytes = 0;
size_t srcSlicePitchInBytes = 0;
if (p->srcMemoryType == hipMemoryTypeHost) {
srcPtr = p->srcHost;
srcRowPitchInBytes = p->srcPitch;
srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight;
} else if ((p->srcMemoryType == hipMemoryTypeDevice) ||
(p->srcMemoryType == hipMemoryTypeUnified)) {
srcPtr = p->srcDevice;
srcRowPitchInBytes = p->srcPitch;
srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight;
} else if (p->srcMemoryType == hipMemoryTypeArray) {
srcPtr = p->srcArray->data;
getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes);
srcElementSizeInBytes *= p->srcArray->NumChannels;
srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width;
srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height;
} else {
srcPitchInBytes = p->srcPtr.pitch;
dstPitchInbytes = p->dstPtr.pitch;
srcPtr = p->srcPtr.ptr;
dstPtr = p->dstPtr.ptr;
ShouldNotReachHere();
}
// Create buffer rectangle info structure
void* dstPtr = nullptr;
size_t dstElementSizeInBytes = sizeof(unsigned char);
size_t dstRowPitchInBytes = 0;
size_t dstSlicePitchInBytes = 0;
if (p->dstMemoryType == hipMemoryTypeHost) {
dstPtr = p->dstHost;
dstRowPitchInBytes = p->dstPitch;
dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight;
} else if ((p->dstMemoryType == hipMemoryTypeDevice) ||
(p->dstMemoryType == hipMemoryTypeUnified)) {
dstPtr = p->dstDevice;
dstRowPitchInBytes = p->dstPitch;
dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight;
} else if (p->dstMemoryType == hipMemoryTypeArray) {
dstPtr = p->dstArray->data;
getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes);
dstElementSizeInBytes *= p->dstArray->NumChannels;
dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width;
dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height;
} else {
ShouldNotReachHere();
}
// For HIP arrays, srcXInBytes must be evenly divisible by the array element size.
if ((p->srcMemoryType == hipMemoryTypeArray) &&
((p->srcXInBytes % srcElementSizeInBytes) != 0)) {
return hipErrorInvalidValue;
}
// If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes
if ((p->srcMemoryType != hipMemoryTypeArray) &&
(p->srcPitch < (p->WidthInBytes + p->srcXInBytes))) {
return hipErrorInvalidValue;
}
// If specified, srcHeight must be greater than or equal to Height + srcY
if ((p->srcMemoryType != hipMemoryTypeArray) &&
(p->srcHeight < (p->Height + p->srcY))) {
return hipErrorInvalidValue;
}
// For HIP arrays, dstXInBytes must be evenly divisible by the array element size.
if ((p->dstMemoryType == hipMemoryTypeArray) &&
((p->dstXInBytes % dstElementSizeInBytes) != 0)) {
return hipErrorInvalidValue;
}
// If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes
if ((p->dstMemoryType != hipMemoryTypeArray) &&
(p->dstPitch < (p->WidthInBytes + p->dstXInBytes))) {
return hipErrorInvalidValue;
}
// If specified, srcHeight must be greater than or equal to Height + srcY
if ((p->dstMemoryType != hipMemoryTypeArray) &&
(p->dstHeight < (p->Height + p->dstY))) {
return hipErrorInvalidValue;
}
// The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0.
if ((p->srcLOD != 0) || (p->dstLOD != 0)) {
return hipErrorInvalidValue;
}
size_t region[3];
region[0] = p->WidthInBytes;
region[1] = p->Height;
region[2] = p->Depth;
size_t srcOrigin[3];
srcOrigin[0] = p->srcXInBytes;
srcOrigin[1] = p->srcY;
srcOrigin[2] = p->srcZ;
size_t dstOrigin[3];
dstOrigin[0] = p->dstXInBytes;
dstOrigin[1] = p->dstY;
dstOrigin[2] = p->dstZ;
amd::BufferRect srcRect;
amd::BufferRect dstRect;
size_t offset = 0;
amd::Memory* srcMemory = getMemoryObject(srcPtr, offset);
assert(offset == 0);
amd::Memory* dstMemory = getMemoryObject(dstPtr, offset);
assert(offset == 0);
size_t src_slice_pitch = srcPitchInBytes * p->extent.height;
size_t dst_slice_pitch = dstPitchInbytes * p->extent.height;
if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) ||
!dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) {
HIP_RETURN(hipErrorInvalidValue);
if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) {
return hipErrorInvalidValue;
}
hipMemcpyKind kind = p->kind;
size_t srcMemoryOffset = 0;
amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset);
amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0);
amd::BufferRect dstRect;
if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) {
return hipErrorInvalidValue;
}
size_t dstMemoryOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset);
amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0);
amd::Command* command = nullptr;
amd::Command::EventWaitList waitList;
amd::Coord3D srcStart(srcRect.start_, 0, 0);
amd::Coord3D dstStart(dstRect.start_, 0, 0);
amd::Coord3D size(region[0], region[1], region[2]);
amd::HostQueue* queue = hip::getQueue(stream);
amd::Coord3D regionSize(region[0], region[1], region[2]);
if (((srcMemory == nullptr) && (dstMemory == nullptr)) ||
(kind == hipMemcpyHostToHost)) {
(p->kind == hipMemcpyHostToHost)) {
memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]);
HIP_RETURN(hipSuccess);
return hipSuccess;
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList,
*dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect);
*dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList,
*srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect);
*srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize,
srcRect, dstRect);
}
if (command == nullptr) {
HIP_RETURN(hipErrorOutOfMemory);
return hipErrorOutOfMemory;
}
command->enqueue();
command->awaitCompletion();
if (!isAsync) {
command->awaitCompletion();
}
command->release();
HIP_RETURN(hipSuccess);
return hipSuccess;
}
hipError_t ihipMemcpy3D_V2(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
void* srcPtr = nullptr;
size_t srcElementSizeInBytes = sizeof(unsigned char);
size_t srcRowPitchInBytes = 0;
size_t srcSlicePitchInBytes = 0;
if ((p->srcArray != nullptr) && (p->srcPtr.ptr == nullptr)) {
srcPtr = p->srcArray->data;
getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes);
srcElementSizeInBytes *= p->srcArray->NumChannels;
srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width;
srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height;
} else if ((p->srcArray == nullptr) && (p->srcPtr.ptr != nullptr)) {
srcPtr = p->srcPtr.ptr;
srcRowPitchInBytes = p->srcPtr.pitch;
srcSlicePitchInBytes = srcRowPitchInBytes * p->srcPtr.ysize;
} else {
ShouldNotReachHere();
}
void* dstPtr = nullptr;
size_t dstElementSizeInBytes = sizeof(unsigned char);
size_t dstRowPitchInBytes = 0;
size_t dstSlicePitchInBytes = 0;
if ((p->dstArray != nullptr) && (p->dstPtr.ptr == nullptr)) {
dstPtr = p->dstArray->data;
getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes);
dstElementSizeInBytes *= p->dstArray->NumChannels;
dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width;
dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height;
} else if ((p->dstArray == nullptr) && (p->dstPtr.ptr != nullptr)) {
dstPtr = p->dstPtr.ptr;
dstRowPitchInBytes = p->srcPtr.pitch;
dstSlicePitchInBytes = dstRowPitchInBytes * p->dstPtr.ysize;
} else {
ShouldNotReachHere();
}
// If the source and destination are both arrays, they must have the same element size.
if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) &&
(srcElementSizeInBytes != dstElementSizeInBytes)) {
return hipErrorInvalidValue;
}
// If a HIP array is participating in the copy, the extent is defined in terms of that array's elements.
// If no HIP array is participating in the copy, the extent is defined in elements of unsigned char.
size_t region[3];
if (p->srcArray != nullptr) {
region[0] = srcRowPitchInBytes;
} else if (p->dstArray != nullptr) {
region[0] = dstRowPitchInBytes;
} else {
region[0] = sizeof(unsigned char) * p->extent.width;
}
region[1] = p->extent.height;
region[2] = p->extent.depth;
// The offset into the object is defined in units of the object's elements.
size_t srcOrigin[3];
srcOrigin[0] = srcElementSizeInBytes * p->srcPos.x;
srcOrigin[1] = p->srcPos.y;
srcOrigin[2] = p->srcPos.z;
amd::BufferRect srcRect;
if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) {
return hipErrorInvalidValue;
}
size_t srcMemoryOffset = 0;
amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset);
amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0);
size_t dstOrigin[3];
dstOrigin[0] = dstElementSizeInBytes * p->dstPos.x;
dstOrigin[1] = p->dstPos.y;
dstOrigin[2] = p->dstPos.z;
amd::BufferRect dstRect;
if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) {
return hipErrorInvalidValue;
}
size_t dstMemoryOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset);
amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0);
amd::Command* command = nullptr;
amd::Command::EventWaitList waitList;
amd::HostQueue* queue = hip::getQueue(stream);
amd::Coord3D regionSize(region[0], region[1], region[2]);
if (((srcMemory == nullptr) && (dstMemory == nullptr)) ||
(p->kind == hipMemcpyHostToHost)) {
memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]);
return hipSuccess;
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList,
*dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList,
*srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList,
*srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize,
srcRect, dstRect);
}
if (command == nullptr) {
return hipErrorOutOfMemory;
}
command->enqueue();
if (!isAsync) {
command->awaitCompletion();
}
command->release();
return hipSuccess;
}
hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
// Having src and dst be an array is ambigous, since we can't tell if the user intended to call hipMemcpy3D_V1() or hipMemcpy3D_V2().
// For now hope that we never encounter this case.
assert((p->srcArray == nullptr) || (p->dstArray == nullptr));
// When calling hipMemcpy3D_V1(), the user must specify
// one of srcHost, srcDevice or srcArray and
// one of dstHost, dstDevice or dstArray.
if (((p->srcHost != nullptr) || (p->srcDevice != nullptr) || (p->srcArray != nullptr)) &&
((p->dstHost != nullptr) || (p->dstDevice != nullptr) || (p->dstArray != nullptr))) {
return ihipMemcpy3D_V1(p, stream, isAsync);
}
// When calling hipMemcpy3D_V2(), the user must specify
// one of srcArray or srcPtr and
// one of dstArray or dstPtr.
if (((p->srcArray != nullptr) || (p->srcPtr.ptr != nullptr)) &&
((p->dstArray != nullptr) || (p->dstPtr.ptr != nullptr))) {
return ihipMemcpy3D_V2(p, stream, isAsync);
}
// If we got here, then the user specified an invalid combination of src/dst parameters.
return hipErrorInvalidValue;
}
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
HIP_INIT_API(hipMemcpy3D, p);
HIP_RETURN(ihipMemcpy3D(p, nullptr));
}
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) {
HIP_INIT_API(hipMemcpy3DAsync, p, stream);
HIP_RETURN(ihipMemcpy3D(p, stream, true));
}
hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes,