Fix for memcpy2DAsync for pinned host memory case

Tá an tiomantas seo le fáil i:
Rahul Garg
2018-05-18 21:09:50 +05:30
tuismitheoir 1c93e11cdf
tiomantas 4f5bdb071c
+72 -21
Féach ar an gComhad
@@ -1303,27 +1303,6 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
return ihipLogStatus(e);
}
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
hipError_t e = hipSuccess;
if((width == dpitch) && (width == spitch)) {
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
} else {
try {
for (int i = 0; i < height; ++i) {
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
}
} catch (ihipException& ex) {
e = ex._code;
}
}
return ihipLogStatus(e);
}
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind);
@@ -1554,6 +1533,19 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) {
return std::min(upper, std::max(x, lower));
}
template <uint32_t block_dim, typename T>
__global__ void hip_copy_n(T* dst, const T* src, size_t n) {
const uint32_t grid_dim = gridDim.x * blockDim.x;
size_t idx = blockIdx.x * block_dim + threadIdx.x;
while (idx < n) {
// __builtin_memcpy(reinterpret_cast<void*>(dst+idx), reinterpret_cast<const void*>(src+idx),
// sizeof(T));
dst[idx] = src[idx];
idx += grid_dim;
}
}
} // namespace
template <typename T>
@@ -1566,6 +1558,16 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) {
sizeBytes, std::move(val));
}
template <typename T>
void ihipMemcpyKernel(hipStream_t stream, T* dst, const T* src, size_t sizeBytes) {
static constexpr uint32_t block_dim_ = 256;
const uint32_t grid_dim = clamp_integer<size_t>(sizeBytes / block_dim_, 1, UINT32_MAX);
hipLaunchKernelGGL(hip_copy_n<block_dim_>, dim3(grid_dim), dim3{block_dim_}, 0u, stream, dst, src,
sizeBytes);
}
typedef enum ihipMemsetDataType {
ihipMemsetDataTypeChar = 0,
ihipMemsetDataTypeShort = 1,
@@ -1623,6 +1625,55 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
return e;
};
int isLockedPointer(const void *ptr)
{
hsa_amd_pointer_info_t info;
int isLocked = 0;
info.size = sizeof(info);
hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast<void*>(ptr), &info, nullptr, nullptr, nullptr);
if(hsa_status != HSA_STATUS_SUCCESS) {
return -1;
}
if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) {
isLocked = 1;
}
return isLocked;
}
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
hipError_t e = hipSuccess;
int isLocked = 0;
if(kind == hipMemcpyHostToDevice) {
isLocked = isLockedPointer(src);
} else if(kind == hipMemcpyDeviceToHost) {
isLocked = isLockedPointer(dst);
}
if((width == dpitch) && (width == spitch)) {
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
} else {
try {
for (int i = 0; i < height; ++i) {
if(!isLocked) {
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
size_t sizeBytes = width*height;
ihipMemcpyKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), sizeBytes/sizeof(uint32_t));
}
}
} catch (ihipException& ex) {
e = ex._code;
}
}
return ihipLogStatus(e);
}
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {