From b4719ee8aeeeecaad04bf8e8a5d83caf950ed0ce Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 8 May 2018 15:43:13 -0400
Subject: [PATCH] P4 to Git Change 1552011 by
skudchad@skudchad_test2_win_opencl on 2018/05/08 14:48:45
SWDEV-145570 - [HIP] Fix offset calculation when getting a memory object. Also include case when destination VA may just be a CPU host VA and not nessarily device alloced.
- Fix hipMemset* to write each byte and now a dword as per the spec
ReviewBoardURL = http://ocltc.amd.com/reviews/r/14787/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#24 edit
[ROCm/hip commit: 2df122be02c7baefcc74d6ad32a0e48f7ff7bf1b]
---
projects/hip/api/hip/hip_memory.cpp | 262 +++++++++++++++++++---------
1 file changed, 178 insertions(+), 84 deletions(-)
diff --git a/projects/hip/api/hip/hip_memory.cpp b/projects/hip/api/hip/hip_memory.cpp
index 09fefe4c8b..568ea557ec 100644
--- a/projects/hip/api/hip/hip_memory.cpp
+++ b/projects/hip/api/hip/hip_memory.cpp
@@ -36,6 +36,14 @@ extern void getDrvChannelOrderAndType(const enum hipArray_Format Format,
cl_channel_order* channelOrder,
cl_channel_type* channelType);
+inline amd::Memory* getMemoryObject(const void* ptr, size_t& offset) {
+ amd::Memory *memObj = amd::SvmManager::FindSvmBuffer(ptr);
+ if (memObj != nullptr) {
+ offset = reinterpret_cast(ptr) - reinterpret_cast(memObj->getSvmPtr());
+ }
+ return memObj;
+}
+
hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
if (sizeBytes == 0) {
@@ -63,8 +71,13 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
amd::Command* command = nullptr;
amd::Command::EventWaitList waitList;
- amd::Memory *srcMemory = amd::SvmManager::FindSvmBuffer(src);;
- amd::Memory *dstMemory = amd::SvmManager::FindSvmBuffer(dst);
+ size_t sOffset = 0;
+ amd::Memory *srcMemory = getMemoryObject(src, sOffset);
+ size_t dOffset = 0;
+ amd::Memory *dstMemory = getMemoryObject(dst, dOffset);
+
+ amd::Coord3D srcOffset(sOffset, 0, 0);
+ amd::Coord3D dstOffset(dOffset, 0, 0);
if (kind == hipMemcpyDefault) {
// Determine kind on VA
@@ -82,15 +95,15 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
switch (kind) {
case hipMemcpyDeviceToHost:
command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER, waitList,
- *srcMemory->asBuffer(), 0, sizeBytes, dst);
+ *srcMemory->asBuffer(), srcOffset, sizeBytes, dst);
break;
case hipMemcpyHostToDevice:
command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER, waitList,
- *dstMemory->asBuffer(), 0, sizeBytes, src);
+ *dstMemory->asBuffer(), dstOffset, sizeBytes, src);
break;
case hipMemcpyDeviceToDevice:
command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
- *srcMemory->asBuffer(),*dstMemory->asBuffer(), 0, 0, sizeBytes);
+ *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcOffset, dstOffset, sizeBytes);
break;
case hipMemcpyHostToHost:
memcpy(dst, src, sizeBytes);
@@ -124,24 +137,35 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, amd::HostQueue& queue,
bool isAsync = false) {
- amd::Command::EventWaitList waitList;
- amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst);
-
- amd::Coord3D fillOffset(0, 0, 0);
- amd::Coord3D fillSize(sizeBytes, 1, 1);
- amd::FillMemoryCommand* command =
- new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
- &value, sizeof(int), fillOffset, fillSize);
-
- if (command == nullptr) {
- return hipErrorOutOfMemory;
+ if (dst == nullptr) {
+ return hipErrorInvalidValue;
}
- command->enqueue();
- if (!isAsync) {
- command->awaitCompletion();
+ size_t offset = 0;
+ amd::Memory* memory = getMemoryObject(dst, offset);
+
+ if (memory != nullptr) {
+ // Device memory
+ amd::Command::EventWaitList waitList;
+ amd::Coord3D fillOffset(offset, 0, 0);
+ amd::Coord3D fillSize(sizeBytes, 1, 1);
+ amd::FillMemoryCommand* command =
+ new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
+ &value, sizeof(char), fillOffset, fillSize);
+
+ if (command == nullptr) {
+ return hipErrorOutOfMemory;
+ }
+
+ command->enqueue();
+ if (!isAsync) {
+ command->awaitCompletion();
+ }
+ command->release();
+ } else {
+ // Host alloced memory
+ memset(dst, value, sizeBytes);
}
- command->release();
return hipSuccess;
}
@@ -198,7 +222,8 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) {
HIP_INIT_API(ptr, size);
- amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr);
+ size_t offset = 0;
+ amd::Memory* svmMem = getMemoryObject(ptr, offset);
if (svmMem == nullptr) {
return hipErrorInvalidValue;
@@ -234,13 +259,14 @@ hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDevice
// Since we are using SVM buffer DevicePtr and HostPtr is the same
void* ptr = dptr;
- amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(ptr);
+ size_t offset = 0;
+ amd::Memory* svmMem = getMemoryObject(ptr, offset);
if (svmMem == nullptr) {
return hipErrorInvalidDevicePointer;
}
- *pbase = ptr;
+ *pbase = svmMem->getSvmPtr();
*psize = svmMem->getSize();
return hipSuccess;
@@ -509,7 +535,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) {
amd::HostQueue* queue = hip::getNullStream();
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue);
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice, *queue);
}
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) {
@@ -517,7 +543,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) {
amd::HostQueue* queue = hip::getNullStream();
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue);
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost, *queue);
}
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) {
@@ -525,7 +551,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte
amd::HostQueue* queue = hip::getNullStream();
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue);
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice, *queue);
}
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
@@ -533,7 +559,7 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
amd::HostQueue* queue = hip::getNullStream();
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue);
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToHost, *queue);
}
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
@@ -564,7 +590,7 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes,
queue = as_amd(reinterpret_cast(stream))->asHostQueue();
}
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyHostToDevice,
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyHostToDevice,
*queue, true);
}
@@ -580,7 +606,7 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t siz
queue = as_amd(reinterpret_cast(stream))->asHostQueue();
}
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToDevice,
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToDevice,
*queue, true);
}
@@ -596,7 +622,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes,
queue = as_amd(reinterpret_cast(stream))->asHostQueue();
}
- return ihipMemcpy((void*) dst, (const void*) src, sizeBytes, hipMemcpyDeviceToHost,
+ return ihipMemcpy(reinterpret_cast(dst), (const void*) src, sizeBytes, hipMemcpyDeviceToHost,
*queue, true);
}
@@ -614,34 +640,70 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
// Create buffer rectangle info structure
amd::BufferRect srcRect;
amd::BufferRect dstRect;
- amd::Memory* srcPtr = amd::SvmManager::FindSvmBuffer(src);
- amd::Memory* dstPtr = amd::SvmManager::FindSvmBuffer(dst);
+
size_t region[3] = {width, height, 1};
size_t src_slice_pitch = spitch * height;
size_t dst_slice_pitch = dpitch * height;
- size_t origin[3] = { };
+ size_t sOrigin[3] = { };
+ size_t dOrigin[3] = { };
+ amd::Memory* srcPtr = getMemoryObject(src, sOrigin[0]);
+ amd::Memory* dstPtr = getMemoryObject(dst, dOrigin[0]);
- if (!srcRect.create(origin, region, spitch, src_slice_pitch) ||
- !dstRect.create(origin, region, dpitch, dst_slice_pitch)) {
- return hipErrorInvalidValue;
+ if (kind == hipMemcpyDefault) {
+ // Determine kind on VA
+ if (srcPtr == nullptr && dstPtr != nullptr) {
+ kind = hipMemcpyHostToDevice;
+ } else if (srcPtr != nullptr && dstPtr == nullptr) {
+ kind = hipMemcpyDeviceToHost;
+ } else if (srcPtr != nullptr && dstPtr != nullptr) {
+ kind = hipMemcpyDeviceToDevice;
+ } else {
+ kind = hipMemcpyHostToHost;
+ }
}
amd::Coord3D srcStart(srcRect.start_, 0, 0);
amd::Coord3D dstStart(dstRect.start_, 0, 0);
amd::Coord3D srcEnd(srcRect.end_, 1, 1);
amd::Coord3D dstEnd(dstRect.end_, 1, 1);
-
- if (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd) ||
- !dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)) {
- return hipErrorInvalidValue;
- }
-
- amd::Command::EventWaitList waitList;
amd::Coord3D size(region[0], region[1], region[2]);
- amd::CopyMemoryCommand* command =
- new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(),
- *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect);
+ if (!srcRect.create(sOrigin, region, spitch, src_slice_pitch) ||
+ !dstRect.create(dOrigin, region, dpitch, dst_slice_pitch)) {
+ return hipErrorInvalidValue;
+ }
+/*
+ if (((srcPtr != nullptr) && (!srcPtr->asBuffer()->validateRegion(srcStart, srcEnd))) ||
+ ((srcPtr != nullptr) && (!dstPtr->asBuffer()->validateRegion(dstStart, dstEnd)))) {
+ return hipErrorInvalidValue;
+ }
+*/
+ amd::Command* command = nullptr;
+ amd::Command::EventWaitList waitList;
+ switch (kind) {
+ case hipMemcpyDeviceToHost:
+ command = new amd::ReadMemoryCommand(queue, CL_COMMAND_READ_BUFFER_RECT, waitList,
+ *srcPtr->asBuffer(), srcStart, size, dst, srcRect, dstRect);
+ break;
+ case hipMemcpyHostToDevice:
+ command = new amd::WriteMemoryCommand(queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList,
+ *dstPtr->asBuffer(), dstStart, size, src, dstRect, srcRect);
+ break;
+ case hipMemcpyDeviceToDevice:
+ command = new amd::CopyMemoryCommand(queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, *srcPtr->asBuffer(),
+ *dstPtr->asBuffer(), srcStart, dstStart, size, srcRect, dstRect);
+ break;
+ case hipMemcpyHostToHost:
+ for(unsigned int y = 0; y < height; y++) {
+ void* pDst = reinterpret_cast(reinterpret_cast(dst) + y * dpitch);
+ void* pSrc = reinterpret_cast(reinterpret_cast(src) + y * spitch);
+ memcpy(pDst, pSrc, width);
+ }
+ return hipSuccess;
+ default:
+ assert(!"Shouldn't reach here");
+ break;
+ }
if (command == nullptr) {
return hipErrorOutOfMemory;
@@ -700,14 +762,15 @@ hipError_t hipMemcpyToArray(hipArray* dstArray, size_t wOffset, size_t hOffset,
amd::Command* command = nullptr;
amd::Command::EventWaitList waitList;
amd::Memory* memory;
-
+ size_t offset = 0;
amd::Coord3D dstOffset(wOffset, hOffset, 0);
switch (kind) {
case hipMemcpyDeviceToHost:
assert(!"Invalid case");
case hipMemcpyHostToDevice:
- memory = amd::SvmManager::FindSvmBuffer(dstArray->data);
+ memory = getMemoryObject(dstArray->data, offset);
+ assert(offset == 0);
command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList,
*memory->asBuffer(), dstOffset, count, src);
break;
@@ -738,13 +801,15 @@ hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffs
amd::Command::EventWaitList waitList;
amd::Memory* memory;
+ size_t offset = 0;
amd::Coord3D srcOffset(wOffset, hOffset, 0);
switch (kind) {
case hipMemcpyHostToDevice:
assert(!"Invalid case");
case hipMemcpyDeviceToHost:
- memory = amd::SvmManager::FindSvmBuffer(srcArray->data);
+ memory = getMemoryObject(srcArray->data, offset);
+ assert(offset == 0);
command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList,
*memory->asBuffer(), srcOffset, count, dst);
break;
@@ -771,7 +836,9 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo
amd::HostQueue* queue = hip::getNullStream();
amd::Command::EventWaitList waitList;
- amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dstArray->data);
+ size_t offset = 0;
+ amd::Memory* memory = getMemoryObject(dstArray->data, offset);
+ assert(offset == 0);
amd::Command* command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER, waitList,
*memory->asBuffer(), dstOffset, count, srcHost);
@@ -792,7 +859,9 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t
amd::HostQueue* queue = hip::getNullStream();
amd::Command::EventWaitList waitList;
- amd::Memory* memory = amd::SvmManager::FindSvmBuffer(srcArray->data);
+ size_t offset = 0;
+ amd::Memory* memory = getMemoryObject(srcArray->data, offset);
+ assert(offset == 0);
amd::Command* command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER, waitList,
*memory->asBuffer(), srcOffset, count, dst);
@@ -870,8 +939,11 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
// Create buffer rectangle info structure
amd::BufferRect srcRect;
amd::BufferRect dstRect;
- amd::Memory* src = amd::SvmManager::FindSvmBuffer(srcPtr);
- amd::Memory* dst = amd::SvmManager::FindSvmBuffer(dstPtr);
+ size_t offset = 0;
+ amd::Memory* src = getMemoryObject(srcPtr, offset);
+ assert(offset == 0);
+ amd::Memory* dst = getMemoryObject(dstPtr, offset);
+ assert(offset == 0);
size_t src_slice_pitch = srcPitchInBytes * p->srcHeight;
size_t dst_slice_pitch = dstPitchInbytes * p->dstHeight;
@@ -923,26 +995,39 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
hipError_t ihipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height,
amd::HostQueue& queue, bool isAsync = false) {
- amd::Command::EventWaitList waitList;
- amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst);
-
- amd::Coord3D fillOffset(0, 0, 0);
+ if (dst == nullptr) {
+ return hipErrorInvalidValue;
+ }
size_t sizeBytes = pitch * height;
- amd::Coord3D fillSize(sizeBytes, 1, 1);
- amd::FillMemoryCommand* command =
- new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
- &value, sizeof(int), fillOffset, fillSize);
+ size_t offset = 0;
- if (command == nullptr) {
- return hipErrorOutOfMemory;
- }
+ amd::Memory* memory = getMemoryObject(dst, offset);
- command->enqueue();
- if(!isAsync) {
- command->awaitCompletion();
+ if (memory != nullptr) {
+ // Device memory
+ amd::Command::EventWaitList waitList;
+ amd::Coord3D fillOffset(offset, 0, 0);
+ amd::Coord3D fillSize(sizeBytes, 1, 1);
+
+ // TODO: Byte copies are inefficient. Combine multiple writes inside runtime
+ amd::FillMemoryCommand* command =
+ new amd::FillMemoryCommand(queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
+ &value, sizeof(char), fillOffset, fillSize);
+
+ if (command == nullptr) {
+ return hipErrorOutOfMemory;
+ }
+
+ command->enqueue();
+ if(!isAsync) {
+ command->awaitCompletion();
+ }
+ command->release();
+ } else {
+ // Host alloced memory
+ memset(dst, value, sizeBytes);
}
- command->release();
return hipSuccess;
}
@@ -971,24 +1056,33 @@ hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value,
hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes) {
HIP_INIT_API(dst, value, sizeBytes);
- amd::HostQueue* queue = hip::getNullStream();
-
- amd::Command::EventWaitList waitList;
- amd::Memory* memory = amd::SvmManager::FindSvmBuffer(dst);
-
- amd::Coord3D fillOffset(0, 0, 0);
- amd::Coord3D fillSize(sizeBytes, 1, 1);
- amd::FillMemoryCommand* command =
- new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
- &value, sizeof(char), fillOffset, fillSize);
-
- if (command == nullptr) {
- return hipErrorOutOfMemory;
+ if (dst == nullptr) {
+ return hipErrorInvalidValue;
}
- command->enqueue();
- command->awaitCompletion();
- command->release();
+ amd::HostQueue* queue = hip::getNullStream();
+ size_t offset = 0;
+ amd::Command::EventWaitList waitList;
+ amd::Memory* memory = getMemoryObject(dst, offset);
+ if (memory != nullptr) {
+ // Device memory
+ amd::Coord3D fillOffset(offset, 0, 0);
+ amd::Coord3D fillSize(sizeBytes, 1, 1);
+ amd::FillMemoryCommand* command =
+ new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
+ &value, sizeof(char), fillOffset, fillSize);
+
+ if (command == nullptr) {
+ return hipErrorOutOfMemory;
+ }
+
+ command->enqueue();
+ command->awaitCompletion();
+ command->release();
+ } else {
+ // Host alloced memory
+ memset(dst, value, sizeBytes);
+ }
return hipSuccess;
}