hipMemset refactoring

[ROCm/clr commit: 40846f6f8e]
This commit is contained in:
Rahul Garg
2018-04-11 15:58:48 +05:30
rodzic d087d3adc3
commit a3bc29976b
+62 -119
Wyświetl plik
@@ -1542,6 +1542,63 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) {
sizeBytes, std::move(val));
}
typedef enum ihipMemsetCopyDataType {
ihipMemsetCopyDataTypeChar = 0,
ihipMemsetCopyDataTypeShort = 1,
ihipMemsetCopyDataTypeInt = 2
}ihipMemsetCopyDataType;
hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t stream, enum ihipMemsetCopyDataType copyDataType )
{
hipError_t e = hipSuccess;
if (stream) {
if(copyDataType == ihipMemsetCopyDataTypeChar){
if ((sizeBytes & 0x3) == 0) {
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value32, sizeBytes/sizeof(uint32_t));
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
ihipMemsetKernel<char> (stream, static_cast<char*> (dst), value, sizeBytes);
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
if (HIP_API_BLOCKING) {
tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str());
stream->locked_wait();
}
}
} else {
if(copyDataType == ihipMemsetCopyDataTypeInt) { // 4 Bytes value
try {
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value, sizeBytes);
} catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else if(copyDataType == ihipMemsetCopyDataTypeShort) {
try {
value = value & 0xffff;
ihipMemsetKernel<uint16_t> (stream, static_cast<uint16_t*> (dst), value, sizeBytes);
} catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
}
}
} else {
e = hipErrorInvalidValue;
}
return e;
};
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
@@ -1551,35 +1608,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st
stream = ihipSyncAndResolveStream(stream);
if (stream) {
if ((sizeBytes & 0x3) == 0) {
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
sizeBytes / sizeof(uint32_t));
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
}
if (HIP_API_BLOCKING) {
tprintf(DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n",
ToString(stream).c_str());
stream->locked_wait();
}
} else {
e = hipErrorInvalidValue;
}
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
return ihipLogStatus(e);
};
@@ -1590,42 +1619,12 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
// TODO - call an ihip memset so HIP_TRACE is correct.
stream = ihipSyncAndResolveStream(stream);
if (stream) {
if ((sizeBytes & 0x3) == 0) {
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
sizeBytes / sizeof(uint32_t));
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
}
// TODO - is hipMemset supposed to be async?
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
stream->locked_wait();
if (HIP_LAUNCH_BLOCKING) {
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
ToString(stream).c_str());
stream->locked_wait();
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
ToString(stream).c_str());
}
} else {
e = hipErrorInvalidValue;
}
}
return ihipLogStatus(e);
}
@@ -1635,39 +1634,11 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
// TODO - call an ihip memset so HIP_TRACE is correct.
stream = ihipSyncAndResolveStream(stream);
if (stream) {
size_t sizeBytes = pitch * height;
if ((sizeBytes & 0x3) == 0) {
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
sizeBytes / sizeof(uint32_t));
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
}
// TODO - is hipMemset supposed to be async?
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
stream->locked_wait();
if (HIP_LAUNCH_BLOCKING) {
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
ToString(stream).c_str());
stream->locked_wait();
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
ToString(stream).c_str());
}
} else {
e = hipErrorInvalidValue;
}
@@ -1681,41 +1652,13 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
// TODO - call an ihip memset so HIP_TRACE is correct.
stream = ihipSyncAndResolveStream(stream);
if (stream) {
if ((sizeBytes & 0x3) == 0) {
// use a faster dword-per-workitem copy:
try {
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
sizeBytes / sizeof(uint32_t));
} catch (std::exception& ex) {
std::cout << ex.what() << std::endl;
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
} catch (std::exception& ex) {
e = hipErrorInvalidValue;
}
}
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
stream->locked_wait();
if (HIP_LAUNCH_BLOCKING) {
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
ToString(stream).c_str());
stream->locked_wait();
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
ToString(stream).c_str());
}
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}