Fix for memcpy2DAsync for pinned host memory case
Tá an tiomantas seo le fáil i:
@@ -1303,27 +1303,6 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
|
||||
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
|
||||
hipError_t e = hipSuccess;
|
||||
if((width == dpitch) && (width == spitch)) {
|
||||
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
|
||||
} else {
|
||||
try {
|
||||
for (int i = 0; i < height; ++i) {
|
||||
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind, stream);
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
||||
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind);
|
||||
@@ -1554,6 +1533,19 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) {
|
||||
|
||||
return std::min(upper, std::max(x, lower));
|
||||
}
|
||||
|
||||
template <uint32_t block_dim, typename T>
|
||||
__global__ void hip_copy_n(T* dst, const T* src, size_t n) {
|
||||
const uint32_t grid_dim = gridDim.x * blockDim.x;
|
||||
|
||||
size_t idx = blockIdx.x * block_dim + threadIdx.x;
|
||||
while (idx < n) {
|
||||
// __builtin_memcpy(reinterpret_cast<void*>(dst+idx), reinterpret_cast<const void*>(src+idx),
|
||||
// sizeof(T));
|
||||
dst[idx] = src[idx];
|
||||
idx += grid_dim;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
@@ -1566,6 +1558,16 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) {
|
||||
sizeBytes, std::move(val));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ihipMemcpyKernel(hipStream_t stream, T* dst, const T* src, size_t sizeBytes) {
|
||||
static constexpr uint32_t block_dim_ = 256;
|
||||
|
||||
const uint32_t grid_dim = clamp_integer<size_t>(sizeBytes / block_dim_, 1, UINT32_MAX);
|
||||
|
||||
hipLaunchKernelGGL(hip_copy_n<block_dim_>, dim3(grid_dim), dim3{block_dim_}, 0u, stream, dst, src,
|
||||
sizeBytes);
|
||||
}
|
||||
|
||||
typedef enum ihipMemsetDataType {
|
||||
ihipMemsetDataTypeChar = 0,
|
||||
ihipMemsetDataTypeShort = 1,
|
||||
@@ -1623,6 +1625,55 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
|
||||
return e;
|
||||
};
|
||||
|
||||
int isLockedPointer(const void *ptr)
|
||||
{
|
||||
hsa_amd_pointer_info_t info;
|
||||
int isLocked = 0;
|
||||
|
||||
info.size = sizeof(info);
|
||||
hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast<void*>(ptr), &info, nullptr, nullptr, nullptr);
|
||||
if(hsa_status != HSA_STATUS_SUCCESS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) {
|
||||
isLocked = 1;
|
||||
}
|
||||
|
||||
return isLocked;
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
|
||||
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
|
||||
hipError_t e = hipSuccess;
|
||||
int isLocked = 0;
|
||||
if(kind == hipMemcpyHostToDevice) {
|
||||
isLocked = isLockedPointer(src);
|
||||
} else if(kind == hipMemcpyDeviceToHost) {
|
||||
isLocked = isLockedPointer(dst);
|
||||
}
|
||||
if((width == dpitch) && (width == spitch)) {
|
||||
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
|
||||
} else {
|
||||
try {
|
||||
for (int i = 0; i < height; ++i) {
|
||||
if(!isLocked) {
|
||||
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind, stream);
|
||||
} else{
|
||||
size_t sizeBytes = width*height;
|
||||
ihipMemcpyKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), sizeBytes/sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir