From 7e152bb0f3b8303713bd116263fb3cd4601df4af Mon Sep 17 00:00:00 2001 From: Marko Arandjelovic Date: Sun, 22 Dec 2024 22:05:17 +0000 Subject: [PATCH] SWDEV-506234 - Refactor validation in hip_memory Change-Id: I9d69695e4b6668e6de00f1f6b060862872358340 --- hipamd/src/hip_memory.cpp | 1065 ++++++++++--------------------------- 1 file changed, 283 insertions(+), 782 deletions(-) diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 09a435e149..4aa766aec1 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -19,6 +19,7 @@ THE SOFTWARE. */ #include +#include "hip/driver_types.h" #include "hip_internal.hpp" #include "hip_platform.hpp" #include "hip_conversions.hpp" @@ -1696,67 +1697,12 @@ hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t By ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true)); } -hipError_t ihipMemcpyAtoDValidate(hipArray_t srcArray, void* dstDevice, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t dstRowPitch, size_t dstSlicePitch, - amd::Memory*& dstMemory, amd::Image*& srcImage, - amd::BufferRect& srcRect, amd::BufferRect& dstRect) { - size_t dstOffset = 0; - dstMemory = getMemoryObject(dstDevice, dstOffset); - if (srcArray == nullptr || (dstMemory == nullptr)) { - return hipErrorInvalidValue; - } - cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (!is_valid(srcMemObj)) { - return hipErrorInvalidValue; - } - - srcImage = as_amd(srcMemObj)->asImage(); - // HIP assumes the width is in bytes, but OCL assumes it's in pixels. - const size_t elementSize = srcImage->getImageFormat().getElementSize(); - static_cast(srcOrigin)[0] /= elementSize; - static_cast(copyRegion)[0] /= elementSize; - - amd::Coord3D copyRegionReal = copyRegion; - if (hip::isLayered1D(srcArray)) copyRegionReal.c[1] = 1; - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegionReal), - srcImage->getRowPitch(), srcImage->getSlicePitch())) { - return hipErrorInvalidValue; - } - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegionReal), - dstRowPitch, dstSlicePitch)) { - return hipErrorInvalidValue; - } - dstRect.start_ += dstOffset; - - const size_t copySizeInBytes = - copyRegion[0] * copyRegion[1] * copyRegion[2] * srcImage->getImageFormat().getElementSize(); - if (!srcImage->validateRegion(srcOrigin, copyRegion) || - !dstMemory->validateRegion(dstOrigin, {copySizeInBytes, 0, 0})) { - return hipErrorInvalidValue; - } - - dstOrigin.c[0] = dstRect.offset(0, 0, 0); // Get the physical offset of the logic origin - dstOrigin.c[1] = dstOrigin.c[2] = 0; - return hipSuccess; -} - -hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray_t srcArray, void* dstDevice, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, - hip::Stream* stream) { - amd::BufferRect srcRect; - amd::BufferRect dstRect; - amd::Memory* dstMemory; - amd::Image* srcImage; - hipError_t status = - ihipMemcpyAtoDValidate(srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion, dstRowPitch, - dstSlicePitch, dstMemory, srcImage, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } +hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, void* dstDevice, amd::Coord3D dstOrigin, + amd::Image* srcImage, amd::Coord3D srcOrigin, + amd::Coord3D copyRegion, amd::BufferRect srcRect, + amd::BufferRect dstRect, hip::Stream* stream) { + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dOffset); amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE_TO_BUFFER, amd::Command::EventWaitList{}, *srcImage, *dstMemory, @@ -1774,69 +1720,16 @@ hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray_t srcArray, vo return hipSuccess; } -hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray_t dstArray, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, - amd::Memory*& srcMemory, amd::BufferRect& dstRect, - amd::BufferRect& srcRect) { - size_t srcOffset = 0; - srcMemory = getMemoryObject(srcDevice, srcOffset); - if ((srcMemory == nullptr) || dstArray == nullptr) { - return hipErrorInvalidValue; - } - cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (!is_valid(dstMemObj)) { - return hipErrorInvalidValue; - } +hipError_t ihipMemcpyDtoACommand(amd::Command*& command, amd::Image* dstImage, + amd::Coord3D dstOrigin, void* srcDevice, amd::Coord3D srcOrigin, + amd::Coord3D copyRegion, amd::BufferRect srcRect, + amd::BufferRect dstRect, hip::Stream* stream) { + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, sOffset); - dstImage = as_amd(dstMemObj)->asImage(); - // HIP assumes the width is in bytes, but OCL assumes it's in pixels. - const size_t elementSize = dstImage->getImageFormat().getElementSize(); - static_cast(dstOrigin)[0] /= elementSize; - static_cast(copyRegion)[0] /= elementSize; - - amd::Coord3D copyRegionReal = copyRegion; - if (hip::isLayered1D(dstArray)) copyRegionReal.c[1] = 1; - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegionReal), - srcRowPitch, srcSlicePitch)) { - return hipErrorInvalidValue; - } - srcRect.start_ += srcOffset; - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegionReal), - dstImage->getRowPitch(), dstImage->getSlicePitch())) { - return hipErrorInvalidValue; - } - - const size_t copySizeInBytes = - copyRegion[0] * copyRegion[1] * copyRegion[2] * dstImage->getImageFormat().getElementSize(); - if (!srcMemory->validateRegion(srcOrigin, {copySizeInBytes, 0, 0}) || - !dstImage->validateRegion(dstOrigin, copyRegion)) { - return hipErrorInvalidValue; - } - srcOrigin.c[0] = srcRect.offset(0, 0, 0); // Get the physical offset of the logic origin - srcOrigin.c[1] = srcOrigin.c[2] = 0; - return hipSuccess; -} - -hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray_t dstArray, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - hip::Stream* stream) { - amd::Image* dstImage; - amd::Memory* srcMemory; - amd::BufferRect dstRect; - amd::BufferRect srcRect; - hipError_t status = - ihipMemcpyDtoAValidate(srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion, srcRowPitch, - srcSlicePitch, dstImage, srcMemory, dstRect, srcRect); - if (status != hipSuccess) { - return status; - } - amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE, - amd::Command::EventWaitList{}, *srcMemory, *dstImage, - srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand( + *stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE, amd::Command::EventWaitList{}, *srcMemory, + *dstImage, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); if (cpyMemCmd == nullptr) { return hipErrorOutOfMemory; @@ -1850,62 +1743,14 @@ hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArr return hipSuccess; } -hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, - size_t dstSlicePitch, amd::Memory*& srcMemory, - amd::Memory*& dstMemory, amd::BufferRect& srcRect, - amd::BufferRect& dstRect) { +hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* dstDevice, void* srcDevice, + amd::Coord3D copyRegion, amd::BufferRect srcRect, + amd::BufferRect dstRect, hip::Stream* stream) { size_t srcOffset = 0; - srcMemory = getMemoryObject(srcDevice, srcOffset); + amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); size_t dstOffset = 0; - dstMemory = getMemoryObject(dstDevice, dstOffset); + amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); - if ((srcMemory == nullptr) || (dstMemory == nullptr)) { - return hipErrorInvalidValue; - } - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), - srcRowPitch, srcSlicePitch)) { - return hipErrorInvalidValue; - } - srcRect.start_ += srcOffset; - - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcSize(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcSize)) { - return hipErrorInvalidValue; - } - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), - dstRowPitch, dstSlicePitch)) { - return hipErrorInvalidValue; - } - dstRect.start_ += dstOffset; - - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstSize(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstSize)) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* dstDevice, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) { - amd::Memory* srcMemory; - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - hipError_t status = ihipMemcpyDtoDValidate(srcDevice, dstDevice, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - srcMemory, dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } amd::Command::EventWaitList waitList; amd::CopyMemoryCommand* copyCommand; amd::Device* queueDevice = &stream->device(); @@ -1971,55 +1816,15 @@ hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* return hipSuccess; } -hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, - size_t dstSlicePitch, amd::Memory*& srcMemory, - amd::BufferRect& srcRect, amd::BufferRect& dstRect) { - size_t srcOffset = 0; - srcMemory = getMemoryObject(srcDevice, srcOffset); - - if ((srcMemory == nullptr) || (dstHost == nullptr)) { - return hipErrorInvalidValue; - } - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), - srcRowPitch, srcSlicePitch)) { - return hipErrorInvalidValue; - } - srcRect.start_ += srcOffset; - - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcSize(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcSize)) { - return hipErrorInvalidValue; - } - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), - dstRowPitch, dstSlicePitch)) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* dstHost, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, - bool isAsync = false) { - amd::Memory* srcMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; +hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* dstHost, amd::Coord3D dstOrigin, + void* srcDevice, amd::Coord3D srcOrigin, amd::Coord3D copyRegion, + amd::BufferRect srcRect, amd::BufferRect dstRect, + hip::Stream* stream, bool isAsync = false) { + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, sOffset); size_t dOffset = 0; amd::Memory* dstMemory = getMemoryObject(dstHost, dOffset); - hipError_t status = ihipMemcpyDtoHValidate(srcDevice, dstHost, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - srcMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::Coord3D srcStart(srcRect.start_, 0, 0); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); if (dstMemory) { @@ -2058,55 +1863,16 @@ hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* return hipSuccess; } -hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, - size_t dstSlicePitch, amd::Memory*& dstMemory, - amd::BufferRect& srcRect, amd::BufferRect& dstRect) { - size_t dstOffset = 0; - dstMemory = getMemoryObject(dstDevice, dstOffset); - - if ((srcHost == nullptr) || (dstMemory == nullptr)) { - return hipErrorInvalidValue; - } - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), - srcRowPitch, srcSlicePitch)) { - return hipErrorInvalidValue; - } - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), - dstRowPitch, dstSlicePitch)) { - return hipErrorInvalidValue; - } - dstRect.start_ += dstOffset; - - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstSize(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstSize)) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, void* dstDevice, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, +hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, void* dstDevice, amd::Coord3D dstOrigin, + const void* srcHost, amd::Coord3D srcOrigin, + amd::Coord3D copyRegion, amd::BufferRect srcRect, + amd::BufferRect dstRect, hip::Stream* stream, bool isAsync = false) { - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dOffset); size_t sOffset = 0; amd::Memory* srcMemory = getMemoryObject(srcHost, sOffset); - hipError_t status = ihipMemcpyHtoDValidate(srcHost, dstDevice, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::Coord3D dstStart(dstRect.start_, 0, 0); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); if (srcMemory) { @@ -2136,44 +1902,8 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo return hipSuccess; } -hipError_t ihipMemcpyHtoHValidate(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - amd::BufferRect& srcRect, amd::BufferRect& dstRect) { - - if ((srcHost == nullptr) || (dstHost == nullptr)) { - return hipErrorInvalidValue; - } - - if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), - srcRowPitch, srcSlicePitch)) { - return hipErrorInvalidValue; - } - - if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), - dstRowPitch, dstSlicePitch)) { - return hipErrorInvalidValue; - } - - return hipSuccess; -} - -hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - hip::Stream* stream) { - - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - hipError_t status = ihipMemcpyHtoHValidate(srcHost, dstHost, srcOrigin, - dstOrigin, copyRegion, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, srcRect, dstRect); - - if (status != hipSuccess) { - return status; - } - +hipError_t ihipMemcpyHtoH(void* dstHost, const void* srcHost, amd::Coord3D copyRegion, + amd::BufferRect srcRect, amd::BufferRect dstRect, hip::Stream* stream) { if (stream) { stream->finish(); } @@ -2191,53 +1921,13 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOr return hipSuccess; } -hipError_t ihipMemcpyAtoAValidate(hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - amd::Image*& srcImage, amd::Image*& dstImage) { - if (dstArray == nullptr || srcArray == nullptr) { - return hipErrorInvalidValue; - } - cl_mem srcMemObj = reinterpret_cast(srcArray->data); - cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (!is_valid(srcMemObj) || !is_valid(dstMemObj)) { - return hipErrorInvalidValue; - } - - srcImage = as_amd(srcMemObj)->asImage(); - dstImage = as_amd(dstMemObj)->asImage(); - - // HIP assumes the width is in bytes, but OCL assumes it's in pixels. - // Note that src and dst should have the same element size. - const size_t elementSize = srcImage->getImageFormat().getElementSize(); - if (elementSize != dstImage->getImageFormat().getElementSize()) { - return hipErrorInvalidValue; - } - static_cast(srcOrigin)[0] /= elementSize; - static_cast(dstOrigin)[0] /= elementSize; - static_cast(copyRegion)[0] /= elementSize; - - if (!srcImage->validateRegion(srcOrigin, copyRegion) || - !dstImage->validateRegion(dstOrigin, copyRegion)) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray_t srcArray, hipArray_t dstArray, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, hip::Stream* stream) { - amd::Image* srcImage; - amd::Image* dstImage; - - hipError_t status = ihipMemcpyAtoAValidate(srcArray, dstArray, srcOrigin, dstOrigin, copyRegion, - srcImage, dstImage); - if (status != hipSuccess) { - return status; - } - - amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE, - amd::Command::EventWaitList{}, *srcImage, *dstImage, - srcOrigin, dstOrigin, copyRegion); +hipError_t ihipMemcpyAtoACommand(amd::Command*& command, amd::Image* dstImage, + amd::Coord3D dstOrigin, amd::Image* srcImage, + amd::Coord3D srcOrigin, amd::Coord3D copyRegion, + hip::Stream* stream) { + amd::CopyMemoryCommand* cpyMemCmd = + new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE, amd::Command::EventWaitList{}, + *srcImage, *dstImage, srcOrigin, dstOrigin, copyRegion); if (cpyMemCmd == nullptr) { return hipErrorOutOfMemory; @@ -2262,48 +1952,15 @@ size_t ihipGetbufferStart(const size_t* bufferOrigin, const size_t* region, size return bufferOrigin[2] * slicePitch_ + bufferOrigin[1] * rowPitch_ + bufferOrigin[0]; } -hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray_t dstArray, - amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, - amd::Coord3D& copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, amd::Image*& dstImage, - size_t &start) { - if ((srcHost == nullptr) || dstArray == nullptr) { - return hipErrorInvalidValue; - } - cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (!is_valid(dstMemObj)) { - return hipErrorInvalidValue; - } - - start = ihipGetbufferStart(static_cast(srcOrigin), static_cast(copyRegion), - srcRowPitch, srcSlicePitch); - - dstImage = as_amd(dstMemObj)->asImage(); - // HIP assumes the width is in bytes, but OCL assumes it's in pixels. - const size_t elementSize = dstImage->getImageFormat().getElementSize(); - static_cast(dstOrigin)[0] /= elementSize; - static_cast(copyRegion)[0] /= elementSize; - - if (!dstImage->validateRegion(dstOrigin, copyRegion)) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray_t dstArray, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - hip::Stream* stream, bool isAsync = false) { - amd::Image* dstImage; - size_t start = 0; //!< Start offset for the copy region +hipError_t ihipMemcpyHtoACommand(amd::Command*& command, amd::Image* dstImage, + amd::Coord3D dstOrigin, const void* srcHost, + amd::Coord3D srcOrigin, amd::Coord3D copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, hip::Stream* stream, + bool isAsync = false) { size_t sOffset = 0; amd::Memory* srcMemory = getMemoryObject(srcHost, sOffset); - - hipError_t status = ihipMemcpyHtoAValidate(srcHost, dstArray, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, dstImage, start); - if (status != hipSuccess) { - return status; - } + size_t start = ihipGetbufferStart(static_cast(srcOrigin), + static_cast(copyRegion), srcRowPitch, srcSlicePitch); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); if (srcMemory) { @@ -2334,49 +1991,14 @@ hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hi return hipSuccess; } -hipError_t ihipMemcpyAtoHValidate(hipArray_t srcArray, void* dstHost, amd::Coord3D& srcOrigin, - amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, - size_t dstRowPitch, size_t dstSlicePitch, amd::Image*& srcImage, - size_t &start) { - if (srcArray == nullptr || (dstHost == nullptr)) { - return hipErrorInvalidValue; - } - cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (!is_valid(srcMemObj)) { - return hipErrorInvalidValue; - } - - start = ihipGetbufferStart(static_cast(dstOrigin), static_cast(copyRegion), - dstRowPitch, dstSlicePitch); - - srcImage = as_amd(srcMemObj)->asImage(); - // HIP assumes the width is in bytes, but OCL assumes it's in pixels. - const size_t elementSize = srcImage->getImageFormat().getElementSize(); - static_cast(srcOrigin)[0] /= elementSize; - static_cast(copyRegion)[0] /= elementSize; - - if (!srcImage->validateRegion(srcOrigin, copyRegion) || - !srcImage->isRowSliceValid(dstRowPitch, dstSlicePitch, copyRegion[0], copyRegion[1])) { - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray_t srcArray, void* dstHost, - amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, +hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, void* dstHost, amd::Coord3D dstOrigin, + amd::Image* srcImage, amd::Coord3D srcOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, bool isAsync = false) { - amd::Image* srcImage; - amd::BufferRect dstRect; - size_t start = 0; //!< Start offset for the copy region size_t dOffset = 0; amd::Memory* dstMemory = getMemoryObject(dstHost, dOffset); - - hipError_t status = ihipMemcpyAtoHValidate(srcArray, dstHost, srcOrigin, dstOrigin, copyRegion, - dstRowPitch, dstSlicePitch, srcImage, start); - if (status != hipSuccess) { - return status; - } + size_t start = ihipGetbufferStart(static_cast(dstOrigin), + static_cast(copyRegion), dstRowPitch, dstSlicePitch); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); if (dstMemory) { @@ -2425,9 +2047,9 @@ void ihipCopyMemParamSet(const HIP_MEMCPY3D* pCopy, hipMemoryType& srcMemType, // {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we // detect system memory. const_cast(pCopy)->srcHost = pCopy->srcDevice; - // We don't need detect memory type again for hipMemoryTypeUnified - const_cast(pCopy)->srcMemoryType = srcMemoryType; } + // We don't need detect memory type again for hipMemoryTypeUnified + const_cast(pCopy)->srcMemoryType = srcMemoryType; } offset = 0; hipMemoryType dstMemoryType = pCopy->dstMemoryType; @@ -2440,9 +2062,9 @@ void ihipCopyMemParamSet(const HIP_MEMCPY3D* pCopy, hipMemoryType& srcMemType, if (dstMemoryType == hipMemoryTypeHost) { const_cast(pCopy)->dstHost = pCopy->dstDevice; - // We don't need detect memory type again for hipMemoryTypeUnified - const_cast(pCopy)->dstMemoryType = dstMemoryType; } + // We don't need detect memory type again for hipMemoryTypeUnified + const_cast(pCopy)->dstMemoryType = dstMemoryType; } offset = 0; // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. @@ -2468,8 +2090,155 @@ void ihipCopyMemParamSet(const HIP_MEMCPY3D* pCopy, hipMemoryType& srcMemType, dstMemType = dstMemoryType; } +hipError_t validateImageObject(hipArray_t array, amd::Coord3D& origin, amd::Coord3D& copyRegion, + amd::BufferRect* rect, amd::Image*& image) { + if (array == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem memObj = reinterpret_cast(array->data); + if (!is_valid(memObj)) { + return hipErrorInvalidValue; + } + + image = as_amd(memObj)->asImage(); + if (!image->validateRegion(origin, copyRegion)) { + return hipErrorInvalidValue; + } + + if (!rect->create(static_cast(origin), static_cast(copyRegion), + image->getRowPitch(), image->getSlicePitch())) { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t validateMemoryObject(const void* ptr, bool isDeviceMemory, amd::Coord3D& origin, + amd::Coord3D& copyRegion, size_t rowPitch, size_t slicePitch, + amd::BufferRect& rect) { + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + if (!rect.create(static_cast(origin), static_cast(copyRegion), rowPitch, + slicePitch)) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::Memory* memory = nullptr; + memory = getMemoryObject(ptr, offset); + if (memory == nullptr && isDeviceMemory) { + return hipErrorInvalidValue; + } + + amd::Coord3D start(rect.start_, 0, 0); + amd::Coord3D size(rect.end_, 1, 1); + if (memory && !memory->validateRegion(start, size)) { + return hipErrorInvalidValue; + } + + rect.start_ += offset; + + origin.c[0] = rect.offset(0, 0, 0); // Get the physical offset of the logic origin + origin.c[1] = origin.c[2] = 0; + + return hipSuccess; +} + +hipError_t ihipDrvMemcpy3D_validate(const HIP_MEMCPY3D* pCopy, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + amd::BufferRect* outSrcRect, amd::BufferRect* outDstRect, + amd::Image** outSrcImage, amd::Image** outDstImage) { + hipError_t status = hipSuccess; + + if (copyRegion.c[0] * copyRegion.c[1] * copyRegion.c[2] <= 0) { + return hipErrorInvalidValue; + } + + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Image* srcImage = nullptr; + amd::Image* dstImage = nullptr; + amd::Coord3D tempCopyRegion = copyRegion; + + if (pCopy->srcMemoryType == hipMemoryTypeArray || pCopy->dstMemoryType == hipMemoryTypeArray) { + void* arr = pCopy->srcMemoryType == hipMemoryTypeArray ? pCopy->srcArray : pCopy->dstArray; + if (arr == nullptr) { + return hipErrorInvalidValue; + } + auto element_size = hip::getElementSize((hipArray_const_t)arr); + copyRegion.c[0] /= element_size; + tempCopyRegion.c[0] /= element_size; + + if (pCopy->srcMemoryType == hipMemoryTypeArray) { + static_cast(srcOrigin)[0] /= element_size; + if (hip::isLayered1D(pCopy->srcArray)) { + tempCopyRegion.c[1] = 1; + } + } + + if (pCopy->dstMemoryType == hipMemoryTypeArray) { + static_cast(dstOrigin)[0] /= element_size; + if (hip::isLayered1D(pCopy->dstArray)) { + tempCopyRegion.c[1] = 1; + } + } + } + + if (pCopy->srcMemoryType == hipMemoryTypeArray) { + status = validateImageObject(pCopy->srcArray, srcOrigin, tempCopyRegion, &srcRect, srcImage); + } else if (pCopy->srcMemoryType == hipMemoryTypeDevice) { + status = validateMemoryObject(pCopy->srcDevice, true, srcOrigin, tempCopyRegion, + pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, srcRect); + } else if (pCopy->srcMemoryType == hipMemoryTypeHost) { + status = validateMemoryObject(pCopy->srcHost, false, srcOrigin, tempCopyRegion, pCopy->srcPitch, + pCopy->srcPitch * pCopy->srcHeight, srcRect); + } + + if (status != hipSuccess) { + return status; + } + + if (pCopy->dstMemoryType == hipMemoryTypeArray) { + status = validateImageObject(pCopy->dstArray, dstOrigin, tempCopyRegion, &dstRect, dstImage); + } else if (pCopy->dstMemoryType == hipMemoryTypeDevice) { + status = validateMemoryObject(pCopy->dstDevice, true, dstOrigin, tempCopyRegion, + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, dstRect); + } else if (pCopy->dstMemoryType == hipMemoryTypeHost) { + status = validateMemoryObject(pCopy->dstHost, false, dstOrigin, tempCopyRegion, pCopy->dstPitch, + pCopy->dstPitch * pCopy->dstHeight, dstRect); + } + + if (status != hipSuccess) { + return status; + } + + *outSrcRect = srcRect; + *outDstRect = dstRect; + *outSrcImage = srcImage; + *outDstImage = dstImage; + + return hipSuccess; +} + +hipError_t ihipDrvMemcpy3D_validate(const HIP_MEMCPY3D* pCopy) { + amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; + amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; + amd::Coord3D copyRegion = {pCopy->WidthInBytes, pCopy->Height, pCopy->Depth}; + amd::BufferRect srcRect, dstRect; + amd::Image *srcImage, *dstImage; + return ihipDrvMemcpy3D_validate(pCopy, srcOrigin, dstOrigin, copyRegion, &srcRect, &dstRect, + &srcImage, &dstImage); +} + hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3D* pCopy, hip::Stream* stream) { + if (pCopy->WidthInBytes == 0 || pCopy->Height == 0 || pCopy->Depth == 0) { + return hipSuccess; + } + hipMemoryType srcMemoryType; hipMemoryType dstMemoryType; ihipCopyMemParamSet(pCopy, srcMemoryType, dstMemoryType); @@ -2477,46 +2246,55 @@ hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3 amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; amd::Coord3D copyRegion = {pCopy->WidthInBytes, pCopy->Height, pCopy->Depth}; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Image* srcImage = nullptr; + amd::Image* dstImage = nullptr; + + auto status = ihipDrvMemcpy3D_validate(pCopy, srcOrigin, dstOrigin, copyRegion, &srcRect, + &dstRect, &srcImage, &dstImage); + + if (status != hipSuccess) { + return status; + } if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { // Host to Device. - return ihipMemcpyHtoDCommand(command, pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + return ihipMemcpyHtoDCommand(command, pCopy->dstDevice, dstOrigin, pCopy->srcHost, srcOrigin, + copyRegion, srcRect, dstRect, stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { // Device to Host. - return ihipMemcpyDtoHCommand(command, pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + return ihipMemcpyDtoHCommand(command, pCopy->dstHost, dstOrigin, pCopy->srcDevice, srcOrigin, + copyRegion, srcRect, dstRect, stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { // Device to Device. - return ihipMemcpyDtoDCommand(command, pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + return ihipMemcpyDtoDCommand(command, pCopy->dstDevice, pCopy->srcDevice, copyRegion, srcRect, + dstRect, stream); } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { // Host to Image. - return ihipMemcpyHtoACommand(command, pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, + return ihipMemcpyHtoACommand(command, dstImage, dstOrigin, pCopy->srcHost, srcOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { // Image to Host. - return ihipMemcpyAtoHCommand(command, pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, + return ihipMemcpyAtoHCommand(command, pCopy->dstHost, dstOrigin, srcImage, srcOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { // Device to Image. - return ihipMemcpyDtoACommand(command, pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - stream); + return ihipMemcpyDtoACommand(command, dstImage, dstOrigin, pCopy->srcDevice, + {srcRect.start_, 0, 0}, copyRegion, srcRect, dstRect, stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { // Image to Device. - return ihipMemcpyAtoDCommand(command, pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, - stream); + return ihipMemcpyAtoDCommand(command, pCopy->dstDevice, {dstRect.start_, 0, 0}, srcImage, + srcOrigin, copyRegion, srcRect, dstRect, stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { // Image to Image. - return ihipMemcpyAtoACommand(command, pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, - copyRegion, stream); + return ihipMemcpyAtoACommand(command, dstImage, dstOrigin, srcImage, srcOrigin, copyRegion, + stream); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { + // Host to Host + return ihipMemcpyHtoH(pCopy->dstHost, pCopy->srcHost, copyRegion, srcRect, dstRect, stream); } else { ShouldNotReachHere(); } @@ -2561,34 +2339,21 @@ hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool if (!hip::isValid(stream)) { return hipErrorContextIsDestroyed; } - if (pCopy->WidthInBytes == 0 || pCopy->Height == 0 || pCopy->Depth == 0) { - LogPrintfInfo("Either Width :%d or Height: %d and Depth: %d is zero", pCopy->WidthInBytes, - pCopy->Height, pCopy->Depth); - return hipSuccess; - } hipMemoryType srcMemoryType; hipMemoryType dstMemoryType; ihipCopyMemParamSet(pCopy, srcMemoryType, dstMemoryType); - if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { - amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; - amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; - amd::Coord3D copyRegion = {pCopy->WidthInBytes, (pCopy->Height != 0) ? pCopy->Height : 1, - (pCopy->Depth != 0) ? pCopy->Depth : 1}; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } - // Host to Host. - return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, - pCopy->dstPitch * pCopy->dstHeight, hip::getStream(stream)); - } else { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; + amd::Command* command = nullptr; + status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream); + if (command != nullptr) { + if (status != hipSuccess) { + return status; } - status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream); - if (status != hipSuccess) return status; - // Transfers from device memory to pageable host memory and transfers from any // host memory to any host memory are synchronous with respect to the host. // Device to Device copies do not need to host side synchronization. @@ -2603,6 +2368,8 @@ hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool } return ihipMemcpyCmdEnqueue(command, isAsync, hip_stream); } + + return status; } hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, @@ -2741,10 +2508,6 @@ hipError_t hipMemcpy2DAsync_spt(void* dst, size_t dpitch, const void* src, size_ } hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { - if (dst == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); - } - hip_Memcpy2D desc = {}; desc.srcXInBytes = 0; @@ -2816,10 +2579,6 @@ hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, size_t hOffset, cons } hipError_t ihipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { - if (src == nullptr) { - HIP_RETURN(hipErrorInvalidResourceHandle); - } - hip_Memcpy2D desc = {}; desc.srcXInBytes = wOffsetSrc; @@ -2864,143 +2623,11 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t src, size_t wOffsetSrc HIP_RETURN_DURATION(hipMemcpyFromArray_common(dst, src, wOffsetSrc, hOffset, count, kind, nullptr)); } -hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind) { +hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, + size_t hOffset, size_t count, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpyFromArray, dst, src, wOffsetSrc, hOffset, count, kind); HIP_RETURN_DURATION(hipMemcpyFromArray_common(dst, src, wOffsetSrc, hOffset, count, kind, - getPerThreadDefaultStream())); -} - -hipError_t ihipMemcpyAtoD(hipArray_t srcArray, void* dstDevice, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, - size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = - ihipMemcpyAtoDCommand(command, srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion, - dstRowPitch, dstSlicePitch, hip_stream); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray_t dstArray, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = - ihipMemcpyDtoACommand(command, srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, hip_stream); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyDtoD(void* srcDevice, void* dstDevice, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = ihipMemcpyDtoDCommand(command, srcDevice, dstDevice, srcOrigin, dstOrigin, - copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, hip_stream); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyDtoH(void* srcDevice, void* dstHost, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = ihipMemcpyDtoHCommand(command, srcDevice, dstHost, srcOrigin, dstOrigin, - copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, hip_stream, isAsync); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = ihipMemcpyHtoDCommand(command, srcHost, dstDevice, srcOrigin, dstOrigin, - copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, hip_stream, isAsync); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyAtoA(hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream, - bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = ihipMemcpyAtoACommand(command, srcArray, dstArray, srcOrigin, dstOrigin, - copyRegion, hip_stream); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray_t dstArray, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, - size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = - ihipMemcpyHtoACommand(command, srcHost, dstArray, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, hip_stream, isAsync); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} -hipError_t ihipMemcpyAtoH(hipArray_t srcArray, void* dstHost, amd::Coord3D srcOrigin, - amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, - size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { - amd::Command* command; - hip::Stream* hip_stream = hip::getStream(stream); - if (hip_stream == nullptr) { - return hipErrorInvalidValue; - } - hipError_t status = - ihipMemcpyAtoHCommand(command, srcArray, dstHost, srcOrigin, dstOrigin, copyRegion, - dstRowPitch, dstSlicePitch, hip_stream, isAsync); - if (status != hipSuccess) return status; - return ihipMemcpyCmdEnqueue(command, isAsync); -} - -hipError_t hipMemcpyHtoA(hipArray_t dstArray, - size_t dstOffset, - const void* srcHost, - size_t ByteCount) { - HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, ByteCount); - CHECK_STREAM_CAPTURING(); - HIP_RETURN_DURATION(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); -} - -hipError_t hipMemcpyAtoH(void* dstHost, - hipArray_t srcArray, - size_t srcOffset, - size_t ByteCount) { - HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); - CHECK_STREAM_CAPTURING(); - HIP_RETURN_DURATION(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); + getPerThreadDefaultStream())); } hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) { @@ -3094,133 +2721,6 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) { return hipSuccess; } -hipError_t ihipDrvMemcpy3D_validate(const HIP_MEMCPY3D* pCopy) { - hipError_t status; - if (pCopy->WidthInBytes == 0 || pCopy->Height == 0 || pCopy->Depth == 0) { - LogPrintfInfo("Either Width :%d or Height: %d and Depth: %d is zero", pCopy->WidthInBytes, - pCopy->Height, pCopy->Depth); - return hipSuccess; - } - hipMemoryType srcMemoryType; - hipMemoryType dstMemoryType; - ihipCopyMemParamSet(pCopy, srcMemoryType, dstMemoryType); - - amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; - amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; - amd::Coord3D copyRegion = {pCopy->WidthInBytes, pCopy->Height, pCopy->Depth}; - - if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { - // Host to Device. - - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - status = - ihipMemcpyHtoDValidate(pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, - pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, - pCopy->dstPitch * pCopy->dstHeight, dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { - // Device to Host. - amd::Memory* srcMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - status = - ihipMemcpyDtoHValidate(pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, - pCopy->dstPitch * pCopy->dstHeight, srcMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { - // Device to Device. - amd::Memory* srcMemory; - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - status = ihipMemcpyDtoDValidate(pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, srcMemory, - dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { - amd::Image* dstImage; - size_t start = 0; - - status = - ihipMemcpyHtoAValidate(pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, - pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, dstImage, - start); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { - // Image to Host. - amd::Image* srcImage; - size_t start = 0; - - status = - ihipMemcpyAtoHValidate(pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, srcImage, - start); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { - // Device to Image. - amd::Image* dstImage; - amd::Memory* srcMemory; - amd::BufferRect dstRect; - amd::BufferRect srcRect; - status = ihipMemcpyDtoAValidate(pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, - copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - dstImage, srcMemory, dstRect, srcRect); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { - // Image to Device. - amd::BufferRect srcRect; - amd::BufferRect dstRect; - amd::Memory* dstMemory; - amd::Image* srcImage; - status = ihipMemcpyAtoDValidate(pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, - dstMemory, srcImage, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { - amd::Image* srcImage; - amd::Image* dstImage; - - status = ihipMemcpyAtoAValidate(pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, - copyRegion, srcImage, dstImage); - if (status != hipSuccess) { - return status; - } - } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { - amd::BufferRect srcRect; - amd::BufferRect dstRect; - status = ihipMemcpyHtoHValidate(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, - srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - } else { - return hipErrorInvalidValue; - } - return hipSuccess; -} - hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p, hip::Stream* stream) { const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); @@ -4262,8 +3762,8 @@ hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, // ================================================================================================ hipError_t ihipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, - size_t width, size_t height, hipMemcpyKind kind, - hipStream_t stream, bool isAsync = false) { + size_t width, size_t height, hipStream_t stream, + bool isAsync = false) { hip_Memcpy2D desc = {}; desc.srcXInBytes = wOffsetSrc; @@ -4308,7 +3808,7 @@ hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOf HIP_RETURN(validateDst); } HIP_RETURN_DURATION(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, - hOffsetSrc, width, height, kind, nullptr)); + hOffsetSrc, width, height, nullptr)); } // ================================================================================================ @@ -4404,61 +3904,62 @@ hipError_t hipMemcpy2DToArrayAsync_spt(hipArray_t dst, size_t wOffset, size_t hO } // ================================================================================================ -hipError_t hipMemcpyAtoA(hipArray_t dstArray, - size_t dstOffset, - hipArray_t srcArray, - size_t srcOffset, - size_t ByteCount) { +hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount) { HIP_INIT_API(hipMemcpyAtoA, dstArray, dstOffset, srcArray, srcOffset, ByteCount); CHECK_STREAM_CAPTURING(); - HIP_RETURN_DURATION(ihipMemcpyAtoA(srcArray, dstArray, {srcOffset, 0, 0}, - {dstOffset, 0, 0}, {ByteCount, 1, 1}, nullptr)); + HIP_RETURN_DURATION(ihipMemcpy2DArrayToArray(dstArray, dstOffset, 0, srcArray, srcOffset, 0, + ByteCount, 1, nullptr)); } // ================================================================================================ -hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, - hipArray_t srcArray, - size_t srcOffset, +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset, size_t ByteCount) { HIP_INIT_API(hipMemcpyAtoD, dstDevice, srcArray, srcOffset, ByteCount); - - HIP_RETURN_DURATION(ihipMemcpyAtoD(srcArray, dstDevice, {srcOffset, 0, 0}, - {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); + HIP_RETURN_DURATION(ihipMemcpy2DFromArray(dstDevice, 0, srcArray, srcOffset, 0, ByteCount, 1, + hipMemcpyDeviceToDevice, nullptr)); } // ================================================================================================ -hipError_t hipMemcpyAtoHAsync(void* dstHost, - hipArray_t srcArray, - size_t srcOffset, - size_t ByteCount, - hipStream_t stream) { +hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount, hipStream_t stream) { HIP_INIT_API(hipMemcpyAtoHAsync, dstHost, srcArray, srcOffset, ByteCount, stream); STREAM_CAPTURE(hipMemcpyAtoHAsync, stream, dstHost, srcArray, srcOffset, ByteCount); - HIP_RETURN_DURATION(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, - {ByteCount, 1, 1}, 0, 0, stream, true)); + HIP_RETURN_DURATION(ihipMemcpy2DFromArray(dstHost, 0, srcArray, srcOffset, 0, ByteCount, 1, + hipMemcpyDeviceToHost, stream, true)); } // ================================================================================================ -hipError_t hipMemcpyDtoA(hipArray_t dstArray, - size_t dstOffset, - hipDeviceptr_t srcDevice, - size_t ByteCount) { +hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, + size_t ByteCount) { HIP_INIT_API(hipMemcpyDtoA, dstArray, dstOffset, srcDevice, ByteCount); CHECK_STREAM_CAPTURING(); - HIP_RETURN_DURATION(ihipMemcpyDtoA(srcDevice, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, - {ByteCount, 1, 1}, 0, 0, nullptr)); + HIP_RETURN_DURATION(ihipMemcpy2DToArray(dstArray, dstOffset, 0, srcDevice, 0, ByteCount, 1, + hipMemcpyDeviceToDevice, nullptr)); } // ================================================================================================ -hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, - size_t dstOffset, - const void* srcHost, - size_t ByteCount, - hipStream_t stream) { +hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t ByteCount, hipStream_t stream) { HIP_INIT_API(hipMemcpyHtoAAsync, dstArray, dstOffset, srcHost, ByteCount, stream); STREAM_CAPTURE(hipMemcpyHtoAAsync, stream, dstArray, dstOffset, srcHost, ByteCount); - HIP_RETURN_DURATION(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, - {ByteCount, 1, 1}, 0, 0, stream, true)); + HIP_RETURN_DURATION(ihipMemcpy2DToArray(dstArray, dstOffset, 0, srcHost, 0, ByteCount, 1, + hipMemcpyHostToDevice, stream, true)); +} + +hipError_t hipMemcpyHtoA(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpy2DToArray(dstArray, dstOffset, 0, srcHost, 0, ByteCount, 1, + hipMemcpyHostToDevice, nullptr)); +} + +hipError_t hipMemcpyAtoH(void* dstHost, hipArray_t srcArray, size_t srcOffset, size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpy2DFromArray(dstHost, 0, srcArray, srcOffset, 0, ByteCount, 1, + hipMemcpyDeviceToHost, nullptr)); } // ================================================================================================