From a730d60930e236bca0ec06818fd16d67462caed5 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Mon, 25 Apr 2016 11:05:30 -0500 Subject: [PATCH] fixed hipmemset to use native ihip api --- src/hip_memory.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 01b24a9e13..4107be44ab 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -419,10 +419,53 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) { - HIP_INIT_API(dst, value, sizeBytes); - + hipStream_t stream = hipStreamNull; // TODO - call an ihip memset so HIP_TRACE is correct. - return hipMemsetAsync(dst, value, sizeBytes, hipStreamNull); + HIP_INIT_API(dst, value, sizeBytes, stream); + + hipError_t e = hipSuccess; + + stream = ihipSyncAndResolveStream(stream); + + if (stream) { + stream->lockopen_preKernelCommand(); + + hc::completion_future cf ; + + if ((sizeBytes & 0x3) == 0) { + // use a faster dword-per-workitem copy: + try { + value = value & 0xff; + unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; + cf = ihipMemsetKernel (stream, static_cast (dst), value32, sizeBytes/sizeof(unsigned)); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } else { + // use a slow byte-per-workitem copy: + try { + cf = ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } + cf.wait(); + + stream->lockclose_postKernelCommand(cf); + + + if (HIP_LAUNCH_BLOCKING) { + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset [stream:%p].\n", __func__, (void*)stream); + cf.wait(); + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed [stream:%p].\n", __func__, (void*)stream); + } + } else { + e = hipErrorInvalidValue; + } + + return ihipLogStatus(e); }