@@ -1542,6 +1542,63 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) {
|
||||
sizeBytes, std::move(val));
|
||||
}
|
||||
|
||||
typedef enum ihipMemsetCopyDataType {
|
||||
ihipMemsetCopyDataTypeChar = 0,
|
||||
ihipMemsetCopyDataTypeShort = 1,
|
||||
ihipMemsetCopyDataTypeInt = 2
|
||||
}ihipMemsetCopyDataType;
|
||||
|
||||
hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t stream, enum ihipMemsetCopyDataType copyDataType )
|
||||
{
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (stream) {
|
||||
if(copyDataType == ihipMemsetCopyDataTypeChar){
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
value = value & 0xff;
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
|
||||
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value32, sizeBytes/sizeof(uint32_t));
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char> (stream, static_cast<char*> (dst), value, sizeBytes);
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
if (HIP_API_BLOCKING) {
|
||||
tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str());
|
||||
stream->locked_wait();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(copyDataType == ihipMemsetCopyDataTypeInt) { // 4 Bytes value
|
||||
try {
|
||||
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value, sizeBytes);
|
||||
} catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else if(copyDataType == ihipMemsetCopyDataTypeShort) {
|
||||
try {
|
||||
value = value & 0xffff;
|
||||
ihipMemsetKernel<uint16_t> (stream, static_cast<uint16_t*> (dst), value, sizeBytes);
|
||||
} catch (std::exception &ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
return e;
|
||||
};
|
||||
|
||||
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
|
||||
@@ -1551,35 +1608,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
value = value & 0xff;
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
|
||||
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
|
||||
sizeBytes / sizeof(uint32_t));
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (HIP_API_BLOCKING) {
|
||||
tprintf(DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n",
|
||||
ToString(stream).c_str());
|
||||
stream->locked_wait();
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
|
||||
|
||||
return ihipLogStatus(e);
|
||||
};
|
||||
@@ -1590,42 +1619,12 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
value = value & 0xff;
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
|
||||
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
|
||||
sizeBytes / sizeof(uint32_t));
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
// TODO - is hipMemset supposed to be async?
|
||||
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
|
||||
stream->locked_wait();
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
stream->locked_wait();
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
@@ -1635,39 +1634,11 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
size_t sizeBytes = pitch * height;
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
value = value & 0xff;
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
|
||||
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
|
||||
sizeBytes / sizeof(uint32_t));
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
// TODO - is hipMemset supposed to be async?
|
||||
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
|
||||
stream->locked_wait();
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
stream->locked_wait();
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
@@ -1681,41 +1652,13 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
if ((sizeBytes & 0x3) == 0) {
|
||||
// use a faster dword-per-workitem copy:
|
||||
try {
|
||||
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value);
|
||||
ihipMemsetKernel<uint32_t>(stream, static_cast<uint32_t*>(dst), value32,
|
||||
sizeBytes / sizeof(uint32_t));
|
||||
} catch (std::exception& ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
// use a slow byte-per-workitem copy:
|
||||
try {
|
||||
ihipMemsetKernel<char>(stream, static_cast<char*>(dst), value, sizeBytes);
|
||||
} catch (std::exception& ex) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetCopyDataTypeChar);
|
||||
stream->locked_wait();
|
||||
|
||||
if (HIP_LAUNCH_BLOCKING) {
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
stream->locked_wait();
|
||||
tprintf(DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__,
|
||||
ToString(stream).c_str());
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user