SWDEV-231874 - Make hipMemset patten size 8byte aligned when possible
Change-Id: Ida98bd89212af9b00f3c9c7c5d22ae81f3b5396a
[ROCm/hip commit: 1ed3af2789]
Tento commit je obsažen v:
odevzdal
Karthik Jayaprakash
rodič
376810dfcc
revize
c312f36eda
@@ -1654,7 +1654,7 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
|
||||
HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true));
|
||||
}
|
||||
|
||||
hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value, size_t valueSize,
|
||||
hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int64_t value, size_t valueSize,
|
||||
size_t sizeBytes, amd::HostQueue* queue, bool isAsync = false) {
|
||||
|
||||
if ((memory == nullptr) || (queue == nullptr)) {
|
||||
@@ -1680,7 +1680,7 @@ hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value,
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes,
|
||||
hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeBytes,
|
||||
hipStream_t stream, bool isAsync = false) {
|
||||
if (sizeBytes == 0) {
|
||||
// Skip if nothing needs filling.
|
||||
@@ -1702,38 +1702,42 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes,
|
||||
hipError_t hip_error = hipSuccess;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
|
||||
int32_t value32 = 0;
|
||||
const size_t dwordModSize = (sizeBytes % sizeof(int32_t));
|
||||
int64_t value64 = 0;
|
||||
const size_t uint64ModSize = (sizeBytes % sizeof(int64_t));
|
||||
|
||||
if (sizeBytes/sizeof(int32_t) > 0) {
|
||||
if (sizeBytes/sizeof(int64_t) > 0) {
|
||||
if (valueSize == sizeof(int8_t)) {
|
||||
value = value & 0xff;
|
||||
value32 = ((value << 24) | (value << 16) | (value << 8) | (value));
|
||||
value64 = ((value << 56) | (value << 48) | (value << 40) | (value << 32)
|
||||
| (value << 24) | (value << 16) | (value << 8) | (value));
|
||||
} else if (valueSize == sizeof(int16_t)) {
|
||||
value = value & 0xffff;
|
||||
value32 = ((value<<16) | (value));
|
||||
value64 = ((value << 48) | (value << 32) | (value<<16) | (value));
|
||||
} else if(valueSize == sizeof(int32_t)) {
|
||||
value32 = value;
|
||||
value = value & 0xffffffff;
|
||||
value64 = ((value<<32) | (value));
|
||||
} else if (valueSize == sizeof(int64_t)) {
|
||||
value64 = value;
|
||||
} else {
|
||||
LogPrintfError("Unsupported Pattern size: %u \n", valueSize);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
// If dwordModSize is != 0 then we will do a second fillBuffer Command
|
||||
// If uint64ModSize is != 0 then we will do a second fillBuffer Command
|
||||
// on the same stream below, dont wait, do the first call async.
|
||||
hip_error = packFillMemoryCommand(memory, offset, value32, sizeof(int32_t),
|
||||
sizeBytes - dwordModSize, queue,
|
||||
((dwordModSize != 0) || isAsync));
|
||||
hip_error = packFillMemoryCommand(memory, offset, value64, sizeof(int64_t),
|
||||
sizeBytes - uint64ModSize, queue,
|
||||
((uint64ModSize != 0) || isAsync));
|
||||
if(hip_error != hipSuccess) {
|
||||
return hip_error;
|
||||
}
|
||||
}
|
||||
|
||||
if (dwordModSize != 0) {
|
||||
if (uint64ModSize != 0) {
|
||||
void* new_dst = reinterpret_cast<void*>((reinterpret_cast<address>(dst)
|
||||
+ sizeBytes) - dwordModSize);
|
||||
+ sizeBytes) - uint64ModSize);
|
||||
memory = getMemoryObject(new_dst, offset);
|
||||
hip_error = packFillMemoryCommand(memory, offset, value, valueSize,
|
||||
dwordModSize, queue, isAsync);
|
||||
uint64ModSize, queue, isAsync);
|
||||
}
|
||||
|
||||
return hip_error;
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele