From a3bc29976b97598fb0347551ad28e645602b323b Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 11 Apr 2018 15:58:48 +0530 Subject: [PATCH] hipMemset refactoring [ROCm/clr commit: 40846f6f8ed5cc0030edf14887e4efe5ad4d8caa] --- projects/clr/hipamd/src/hip_memory.cpp | 181 +++++++++---------------- 1 file changed, 62 insertions(+), 119 deletions(-) diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index ffa9f0cfdd..0644f8de99 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -1542,6 +1542,63 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { sizeBytes, std::move(val)); } +typedef enum ihipMemsetCopyDataType { + ihipMemsetCopyDataTypeChar = 0, + ihipMemsetCopyDataTypeShort = 1, + ihipMemsetCopyDataTypeInt = 2 +}ihipMemsetCopyDataType; + +hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t stream, enum ihipMemsetCopyDataType copyDataType ) +{ + hipError_t e = hipSuccess; + + if (stream) { + if(copyDataType == ihipMemsetCopyDataTypeChar){ + if ((sizeBytes & 0x3) == 0) { + // use a faster dword-per-workitem copy: + try { + value = value & 0xff; + uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; + ihipMemsetKernel (stream, static_cast (dst), value32, sizeBytes/sizeof(uint32_t)); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } else { + // use a slow byte-per-workitem copy: + try { + ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + if (HIP_API_BLOCKING) { + tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str()); + stream->locked_wait(); + } + } + } else { + if(copyDataType == ihipMemsetCopyDataTypeInt) { // 4 Bytes value + try { + ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + } catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } else if(copyDataType == ihipMemsetCopyDataTypeShort) { + try { + value = value & 0xffff; + ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + } catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } + } + } else { + e = hipErrorInvalidValue; + } + return e; +}; + // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { @@ -1551,35 +1608,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st stream = ihipSyncAndResolveStream(stream); - if (stream) { - if ((sizeBytes & 0x3) == 0) { - // use a faster dword-per-workitem copy: - try { - value = value & 0xff; - uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value); - ihipMemsetKernel(stream, static_cast(dst), value32, - sizeBytes / sizeof(uint32_t)); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } else { - // use a slow byte-per-workitem copy: - try { - ihipMemsetKernel(stream, static_cast(dst), value, sizeBytes); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } - - if (HIP_API_BLOCKING) { - tprintf(DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", - ToString(stream).c_str()); - stream->locked_wait(); - } - } else { - e = hipErrorInvalidValue; - } - + e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar); return ihipLogStatus(e); }; @@ -1590,42 +1619,12 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { hipError_t e = hipSuccess; hipStream_t stream = hipStreamNull; - // TODO - call an ihip memset so HIP_TRACE is correct. - stream = ihipSyncAndResolveStream(stream); - if (stream) { - if ((sizeBytes & 0x3) == 0) { - // use a faster dword-per-workitem copy: - try { - value = value & 0xff; - uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value); - ihipMemsetKernel(stream, static_cast(dst), value32, - sizeBytes / sizeof(uint32_t)); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } else { - // use a slow byte-per-workitem copy: - try { - ihipMemsetKernel(stream, static_cast(dst), value, sizeBytes); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } - // TODO - is hipMemset supposed to be async? + e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar); stream->locked_wait(); - - if (HIP_LAUNCH_BLOCKING) { - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, - ToString(stream).c_str()); - stream->locked_wait(); - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, - ToString(stream).c_str()); - } } else { e = hipErrorInvalidValue; - } - + } return ihipLogStatus(e); } @@ -1635,39 +1634,11 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t hipError_t e = hipSuccess; hipStream_t stream = hipStreamNull; - // TODO - call an ihip memset so HIP_TRACE is correct. stream = ihipSyncAndResolveStream(stream); - if (stream) { size_t sizeBytes = pitch * height; - if ((sizeBytes & 0x3) == 0) { - // use a faster dword-per-workitem copy: - try { - value = value & 0xff; - uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value); - ihipMemsetKernel(stream, static_cast(dst), value32, - sizeBytes / sizeof(uint32_t)); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } else { - // use a slow byte-per-workitem copy: - try { - ihipMemsetKernel(stream, static_cast(dst), value, sizeBytes); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } - // TODO - is hipMemset supposed to be async? + e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar); stream->locked_wait(); - - if (HIP_LAUNCH_BLOCKING) { - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, - ToString(stream).c_str()); - stream->locked_wait(); - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, - ToString(stream).c_str()); - } } else { e = hipErrorInvalidValue; } @@ -1681,41 +1652,13 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes hipError_t e = hipSuccess; hipStream_t stream = hipStreamNull; - // TODO - call an ihip memset so HIP_TRACE is correct. stream = ihipSyncAndResolveStream(stream); - if (stream) { - if ((sizeBytes & 0x3) == 0) { - // use a faster dword-per-workitem copy: - try { - uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value); - ihipMemsetKernel(stream, static_cast(dst), value32, - sizeBytes / sizeof(uint32_t)); - } catch (std::exception& ex) { - std::cout << ex.what() << std::endl; - e = hipErrorInvalidValue; - } - } else { - // use a slow byte-per-workitem copy: - try { - ihipMemsetKernel(stream, static_cast(dst), value, sizeBytes); - } catch (std::exception& ex) { - e = hipErrorInvalidValue; - } - } + e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar); stream->locked_wait(); - - if (HIP_LAUNCH_BLOCKING) { - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, - ToString(stream).c_str()); - stream->locked_wait(); - tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, - ToString(stream).c_str()); - } } else { e = hipErrorInvalidValue; } - return ihipLogStatus(e); }