Added new memory API's hipMemAllocPitch, hipMemAllocHost, hipMemsetD16, hipMemsetD16Async, hipMemsetD8Async
Modified to support all scenarios hipMemcpyParam2DAsync, hipMemcpyParam2D.


[ROCm/clr commit: 96890792b0]
Этот коммит содержится в:
ansurya
2019-10-04 13:36:31 +05:30
коммит произвёл Maneesh Gupta
родитель e5a2ba9602
Коммит efc64a4f8a
12 изменённых файлов: 404 добавлений и 48 удалений
+8
Просмотреть файл
@@ -198,6 +198,8 @@ sub simpleSubstitutions {
$ft{'memory'} += s/\bcuIpcOpenMemHandle\b/hipIpcOpenMemHandle/g;
$ft{'memory'} += s/\bcuMemAlloc\b/hipMalloc/g;
$ft{'memory'} += s/\bcuMemAllocManaged\b/hipMemAllocManaged/g;
$ft{'memory'} += s/\bcuMemAllocPitch\b/hipMemAllocPitch/g;
$ft{'memory'} += s/\bcuMemAllocPitch_v2\b/hipMemAllocPitch/g;
$ft{'memory'} += s/\bcuMemAlloc_v2\b/hipMalloc/g;
$ft{'memory'} += s/\bcuMemFree\b/hipFree/g;
$ft{'memory'} += s/\bcuMemFreeHost\b/hipHostFree/g;
@@ -207,6 +209,8 @@ sub simpleSubstitutions {
$ft{'memory'} += s/\bcuMemGetInfo\b/hipMemGetInfo/g;
$ft{'memory'} += s/\bcuMemGetInfo_v2\b/hipMemGetInfo/g;
$ft{'memory'} += s/\bcuMemHostAlloc\b/hipHostMalloc/g;
$ft{'memory'} += s/\bcuMemAllocHost\b/hipMemAllocHost/g;
$ft{'memory'} += s/\bcuMemAllocHost_v2\b/hipMemAllocHost/g;
$ft{'memory'} += s/\bcuMemHostGetDevicePointer\b/hipHostGetDevicePointer/g;
$ft{'memory'} += s/\bcuMemHostGetDevicePointer_v2\b/hipHostGetDevicePointer/g;
$ft{'memory'} += s/\bcuMemHostGetFlags\b/hipMemHostGetFlags/g;
@@ -238,6 +242,10 @@ sub simpleSubstitutions {
$ft{'memory'} += s/\bcuMemsetD32_v2\b/hipMemsetD32/g;
$ft{'memory'} += s/\bcuMemsetD8\b/hipMemsetD8/g;
$ft{'memory'} += s/\bcuMemsetD8_v2\b/hipMemsetD8/g;
$ft{'memory'} += s/\bcuMemsetD8Async\b/hipMemsetD8Async/g;
$ft{'memory'} += s/\bcuMemsetD16\b/hipMemsetD16/g;
$ft{'memory'} += s/\bcuMemsetD16_v2\b/hipMemsetD16/g;
$ft{'memory'} += s/\bcuMemsetD16Async\b/hipMemsetD16Async/g;
$ft{'memory'} += s/\bcudaFree\b/hipFree/g;
$ft{'memory'} += s/\bcudaFreeArray\b/hipFreeArray/g;
$ft{'memory'} += s/\bcudaFreeHost\b/hipHostFree/g;
+5 -5
Просмотреть файл
@@ -857,9 +857,9 @@
| `cuIpcOpenEventHandle` | |
| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` |
| `cuMemAlloc` | `hipMalloc` |
| `cuMemAllocHost` | |
| `cuMemAllocHost` | `hipMemAllocHost` |
| `cuMemAllocManaged` | `hipMemAllocManaged` |
| `cuMemAllocPitch` | |
| `cuMemAllocPitch` | `hipMemAllocPitch` |
| `cuMemcpy` | |
| `cuMemcpy2D` | `hipMemcpyParam2D` |
| `cuMemcpy2DAsync` | `hipMemcpyParam2DAsync` |
@@ -893,8 +893,8 @@
| `cuMemHostGetFlags` | `hipHostGetFlags` |
| `cuMemHostRegister` | `hipHostRegister` |
| `cuMemHostUnregister` | `hipHostUnregister` |
| `cuMemsetD16` | |
| `cuMemsetD16Async` | |
| `cuMemsetD16` | `hipMemsetD16` |
| `cuMemsetD16Async` | `hipMemsetD16Async` |
| `cuMemsetD2D16` | |
| `cuMemsetD2D16Async` | |
| `cuMemsetD2D32` | |
@@ -904,7 +904,7 @@
| `cuMemsetD32` | `hipMemsetD32` |
| `cuMemsetD32Async` | `hipMemsetD32Async` |
| `cuMemsetD8` | `hipMemsetD8` |
| `cuMemsetD8Async` | |
| `cuMemsetD8Async` | `hipMemsetD8Async` |
| `cuMipmappedArrayCreate` | |
| `cuMipmappedArrayDestroy` | |
| `cuMipmappedArrayGetLevel` | |
+8 -8
Просмотреть файл
@@ -162,14 +162,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
{"cuMemAlloc", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}},
{"cuMemAlloc_v2", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}},
// cudaHostAlloc
{"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}},
{"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}},
// cudaMallocManaged
{"cuMemAllocManaged", {"hipMemAllocManaged", "", CONV_MEMORY, API_DRIVER}},
// no analogue
// NOTE: Not equal to cudaMallocPitch due to different signatures
{"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}},
{"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}},
// no analogue
// NOTE: Not equal to cudaMemcpy due to different signatures
{"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
@@ -271,10 +271,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
// cudaHostUnregister
{"cuMemHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_DRIVER}},
// no analogue
{"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}},
{"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}},
// no analogue
{"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER}},
// no analogue
{"cuMemsetD2D16", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemsetD2D16_v2", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
@@ -299,7 +299,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
{"cuMemsetD8", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}},
{"cuMemsetD8_v2", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}},
// no analogue
{"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER}},
// no analogue
// NOTE: Not equal to cudaMallocMipmappedArray due to different signatures
{"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
+84
Просмотреть файл
@@ -1115,6 +1115,21 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
DEPRECATED("use hipHostMalloc instead")
hipError_t hipMallocHost(void** ptr, size_t size);
/**
* @brief Allocate pinned host memory [Deprecated]
*
* @param[out] ptr Pointer to the allocated host pinned memory
* @param[in] size Requested memory size
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess, #hipErrorMemoryAllocation
*
* @deprecated use hipHostMalloc() instead
*/
DEPRECATED("use hipHostMalloc instead")
hipError_t hipMemAllocHost(void** ptr, size_t size);
/**
* @brief Allocate device accessible page locked host memory
*
@@ -1250,6 +1265,30 @@ hipError_t hipHostUnregister(void* hostPtr);
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
/**
* Allocates at least width (in bytes) * height bytes of linear memory
* Padding may occur to ensure alighnment requirements are met for the given row
* The change in width size due to padding will be returned in *pitch.
* Currently the alignment is set to 128 bytes
*
* @param[out] dptr Pointer to the allocated device memory
* @param[out] pitch Pitch for allocation (in bytes)
* @param[in] width Requested pitched allocation width (in bytes)
* @param[in] height Requested pitched allocation height
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
* The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.
* Given the row and column of an array element of type T, the address is computed as:
* T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
*
* @return Error code
*
* @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
* hipMalloc3DArray, hipHostMalloc
*/
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes);
/**
* @brief Free memory allocated by the hcc hip memory allocation API.
* This API performs an implicit hipDeviceSynchronize() call.
@@ -1708,6 +1747,51 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes);
*/
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes);
/**
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
* byte value value.
*
* hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the
* memset is complete. The operation can optionally be associated to a stream by passing a non-zero
* stream argument. If stream is non-zero, the operation may overlap with operations in other
* streams.
*
* @param[out] dst Data ptr to be filled
* @param[in] constant value to be set
* @param[in] sizeBytes Data size in bytes
* @param[in] stream - Stream identifier
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
*/
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, hipStream_t stream __dparm(0));
/**
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
* short value value.
*
* @param[out] dst Data ptr to be filled
* @param[in] constant value to be set
* @param[in] sizeBytes Data size in bytes
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
*/
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes);
/**
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
* short value value.
*
* hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the
* memset is complete. The operation can optionally be associated to a stream by passing a non-zero
* stream argument. If stream is non-zero, the operation may overlap with operations in other
* streams.
*
* @param[out] dst Data ptr to be filled
* @param[in] constant value to be set
* @param[in] sizeBytes Data size in bytes
* @param[in] stream - Stream identifier
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
*/
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, hipStream_t stream __dparm(0));
/**
* @brief Fills the memory area pointed to by dest with the constant integer
* value for specified number of times.
+24
Просмотреть файл
@@ -459,6 +459,10 @@ inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width,
return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
}
inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
}
inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
}
@@ -471,6 +475,12 @@ inline static hipError_t hipMallocHost(void** ptr, size_t size) {
return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
}
inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
__attribute__((deprecated("use hipHostMalloc instead")));
inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
return hipCUResultTohipError(cuMemAllocHost(ptr, size));
}
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
__attribute__((deprecated("use hipHostMalloc instead")));
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
@@ -761,6 +771,20 @@ inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, s
return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
}
inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
hipStream_t stream __dparm(0)) {
return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
}
inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
}
inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
hipStream_t stream __dparm(0)) {
return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
}
inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
}
+100 -16
Просмотреть файл
@@ -391,6 +391,8 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
// Deprecated function:
hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
// Deprecated function:
hipError_t hipMemAllocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
// Deprecated function:
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
@@ -465,6 +467,15 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
return ihipLogStatus(hip_status);
}
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes){
HIP_INIT_SPECIAL_API(hipMemAllocPitch, (TRACE_MEM), dptr, pitch, widthInBytes, height,elementSizeBytes);
HIP_SET_DEVICE();
if (widthInBytes == 0 || height == 0) return ihipLogStatus(hipErrorInvalidValue);
return ihipLogStatus(ihipMallocPitch(tls, dptr, pitch, widthInBytes, height, 0));
}
hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent);
HIP_SET_DEVICE();
@@ -1786,28 +1797,65 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
return ihipLogStatus(e);
}
hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) {
if (pCopy == nullptr) {
return hipErrorInvalidValue;
}
void* dst; const void* src;
size_t spitch = pCopy->srcPitch;
size_t dpitch = pCopy->dstPitch;
switch(pCopy->srcMemoryType){
case hipMemoryTypeHost:
src = pCopy->srcHost;
break;
case hipMemoryTypeArray:
src = pCopy->srcArray->data;
spitch = pCopy->WidthInBytes;
break;
case hipMemoryTypeUnified:
case hipMemoryTypeDevice:
src = pCopy->srcDevice;
break;
default:
return hipErrorInvalidValue;
}
switch(pCopy->dstMemoryType){
case hipMemoryTypeHost:
dst = pCopy->dstHost;
break;
case hipMemoryTypeArray:
dst = pCopy->dstArray->data;
dpitch = pCopy->WidthInBytes;
break;
case hipMemoryTypeUnified:
case hipMemoryTypeDevice:
dst = pCopy->dstDevice;
break;
default:
return hipErrorInvalidValue;
}
if(pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes || pCopy->srcY >= pCopy->Height){
return hipErrorInvalidValue;
} else if(pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes || pCopy->dstY >= pCopy->Height){
return hipErrorInvalidValue;
}
src = (void*)((char*)src+pCopy->srcY*pCopy->srcPitch + pCopy->srcXInBytes);
dst = (void*)((char*)dst+pCopy->dstY*pCopy->dstPitch + pCopy->dstXInBytes);
if(isAsync){
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream);
} else{
return ihipMemcpy2D(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault);
}
}
hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), pCopy);
hipError_t e = hipSuccess;
if (pCopy == nullptr) {
e = hipErrorInvalidValue;
} else {
e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch,
pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault);
}
return ihipLogStatus(e);
return ihipLogStatus(ihipMemcpyParam2D(pCopy, hipStreamNull, false));
}
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemcpyParam2DAsync, (TRACE_MCMD), pCopy, stream);
hipError_t e = hipSuccess;
if (pCopy == nullptr) {
e = hipErrorInvalidValue;
} else {
e = ihipMemcpy2DAsync(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch,
pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream);
}
return ihipLogStatus(e);
return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true));
}
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
@@ -1903,6 +1951,42 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes
return ihipLogStatus(e);
}
hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes , hipStream_t stream ) {
HIP_INIT_SPECIAL_API(hipMemsetD8Async, (TRACE_MCMD), dst, value, sizeBytes, stream);
stream = ihipSyncAndResolveStream(stream);
if (stream) {
return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar));
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
}
hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes){
HIP_INIT_SPECIAL_API(hipMemsetD16, (TRACE_MCMD), dst, value, sizeBytes);
hipError_t e = hipSuccess;
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (stream) {
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort);
if(hipSuccess == e)
stream->locked_wait();
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes, hipStream_t stream ){
HIP_INIT_SPECIAL_API(hipMemsetD16Async, (TRACE_MCMD), dst, value, sizeBytes, stream);
stream = ihipSyncAndResolveStream(stream);
if (stream) {
return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort));
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
}
hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) {
HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count);
+1 -1
Просмотреть файл
@@ -234,7 +234,7 @@ void checkFunctional() {
int main() {
bool* result{nullptr};
hipHostMalloc(&result, sizeof(result));
hipMemAllocHost((void**)&result, sizeof(result));
result[0] = false;
hipLaunchKernelGGL(
+37 -10
Просмотреть файл
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* TEST: %t
* HIT_END
*/
@@ -34,6 +34,21 @@ void printSep() {
"======================================================================================\n");
}
inline void initMemCpyParam2D(hip_Memcpy2D &ins, const size_t dpitch,
const size_t spitch, const size_t width,
const size_t height, hipMemoryType dstType,
enum hipMemoryType srcType) {
ins.srcXInBytes=0;
ins.srcY=0;
ins.srcPitch=spitch;
ins.dstXInBytes=0;
ins.dstY=0;
ins.dstPitch=dpitch;
ins.WidthInBytes=width;
ins.Height=height;
ins.dstMemoryType= dstType;
ins.srcMemoryType= srcType;
}
//---
// Test copies of a matrix numW by numH
@@ -65,7 +80,11 @@ void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost) {
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH);
HIPCHECK(hipMemcpy2D(A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy2D(B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice));
hip_Memcpy2D ins;
initMemCpyParam2D(ins,pitch_B,width,width,numH,hipMemoryTypeDevice,hipMemoryTypeHost);
ins.dstDevice = (hipDeviceptr_t)B_d;
ins.srcHost = B_h;
HIPCHECK(hipMemcpyParam2D(&ins));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d,
(pitch_C / sizeof(T)) * numH);
@@ -115,7 +134,11 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH);
HIPCHECK(hipMemcpyToArray(A_d, 0, 0, (void*)A_h, width, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpyToArray(B_d, 0, 0, (void*)B_h, width, hipMemcpyHostToDevice));
hip_Memcpy2D ins;
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost);
ins.dstArray = B_d;
ins.srcHost = B_h;
HIPCHECK(hipMemcpyParam2D(&ins));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
(T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);
@@ -152,15 +175,21 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch
} else {
HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, (void*)A_h, width, width, numH,
hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy2DToArray(B_d, 0, 0, (void*)B_h, width, width, numH,
hipMemcpyHostToDevice));
hip_Memcpy2D ins;
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost);
ins.dstArray = B_d;
ins.srcHost = B_h;
HIPCHECK(hipMemcpyParam2D(&ins));
}
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
(T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW * numH);
HIPCHECK(hipMemcpy2D((void*)C_h, width, (void*)C_d->data, width, width, numH,
hipMemcpyDeviceToHost));
printf("memcpy srcArray to dstHost\n");
hip_Memcpy2D ins;
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeHost,hipMemoryTypeArray);
ins.srcArray = C_d;
ins.dstHost = C_h;
HIPCHECK(hipMemcpyParam2D(&ins));
HIPCHECK(hipDeviceSynchronize());
HipTest::checkVectorADD(A_h, B_h, C_h, numW * numH);
@@ -227,8 +256,6 @@ int main(int argc, char* argv[]) {
printf("\n\n=== tests&1 (types)\n");
printSep();
HIPCHECK(hipDeviceReset());
size_t width = N / 6;
size_t height = N / 6;
memcpy2Dtest<float>(321, 211, 0);
memcpy2Dtest<double>(322, 211, 0);
memcpy2Dtest<char>(320, 211, 0);
+113 -3
Просмотреть файл
@@ -26,11 +26,11 @@ THE SOFTWARE.
* BUILD: %t %s ../../test_common.cpp
* TEST: %t
* //Small copy
* TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101
* TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 --memsetD16val 0x10 --memsetD8val 0x1
* // Oddball size
* TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF
* TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF --memsetD16val 0xDEAD --memsetD8val 0xDE
* // Big copy
* TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE
* TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA
* HIT_END
*/
@@ -86,6 +86,54 @@ bool testhipMemsetD32(int memsetD32val,int p_gpuDevice)
return testResult;
}
bool testhipMemsetD16(short memsetD16val,int p_gpuDevice)
{
size_t Nbytes = N*sizeof(int);
printf ("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", N, memsetD16val, p_gpuDevice);
short *A_d;
short *A_h;
bool testResult = true;
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
A_h = (short*)malloc(Nbytes);
HIPCHECK ( hipMemsetD16((hipDeviceptr_t)A_d, memsetD16val, N) );
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
for (int i=0; i<N; i++) {
if (A_h[i] != memsetD16val) {
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD16val:%08x\n", i, A_h[i], memsetD32val);
break;
}
}
HIPCHECK(hipFree(A_d));
free(A_h);
return testResult;
}
bool testhipMemsetD8(char memsetD8val,int p_gpuDevice)
{
size_t Nbytes = N*sizeof(int);
printf ("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", N, memsetD8val, p_gpuDevice);
char *A_d;
char *A_h;
bool testResult = true;
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
A_h = (char*)malloc(Nbytes);
HIPCHECK ( hipMemsetD8((hipDeviceptr_t)A_d, memsetD8val, N) );
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
for (int i=0; i<N; i++) {
if (A_h[i] != memsetD8val) {
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD8val:%08x\n", i, A_h[i], memsetD8val);
break;
}
}
HIPCHECK(hipFree(A_d));
free(A_h);
return testResult;
}
bool testhipMemsetAsync(int memsetval,int p_gpuDevice)
{
size_t Nbytes = N*sizeof(int);
@@ -144,6 +192,64 @@ bool testhipMemsetD32Async(int memsetD32val,int p_gpuDevice)
return testResult;
}
bool testhipMemsetD16Async(short memsetD16val,int p_gpuDevice)
{
size_t Nbytes = N*sizeof(int);
printf ("testhipMemsetD16Async N=%zu memsetval=%8x device=%d\n", N, memsetD16val, p_gpuDevice);
short *A_d;
short *A_h;
bool testResult = true;
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
A_h = (short*)malloc(Nbytes);
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
HIPCHECK ( hipMemsetD16Async((hipDeviceptr_t)A_d, memsetD16val, N, stream ));
HIPCHECK ( hipStreamSynchronize(stream));
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
for (int i=0; i<N; i++) {
if (A_h[i] != memsetD16val) {
testResult = false;
printf("mismatch at index:%d computed:%02x, memsetD16val:%02x\n", i, A_h[i], memsetD16val);
break;
}
}
HIPCHECK(hipFree((void*)A_d));
HIPCHECK(hipStreamDestroy(stream));
free(A_h);
return testResult;
}
bool testhipMemsetD8Async(char memsetD8val,int p_gpuDevice)
{
size_t Nbytes = N*sizeof(int);
printf ("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n", N, memsetD8val, p_gpuDevice);
char *A_d;
char *A_h;
bool testResult = true;
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
A_h = (char*)malloc(Nbytes);
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
HIPCHECK ( hipMemsetD8Async((hipDeviceptr_t)A_d, memsetD8val, N, stream ));
HIPCHECK ( hipStreamSynchronize(stream));
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
for (int i=0; i<N; i++) {
if (A_h[i] != memsetD8val) {
testResult = false;
printf("mismatch at index:%d computed:%02x, memsetD8val:%02x\n", i, A_h[i], memsetD8val);
break;
}
}
HIPCHECK(hipFree((void*)A_d));
HIPCHECK(hipStreamDestroy(stream));
free(A_h);
return testResult;
}
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
@@ -153,6 +259,10 @@ int main(int argc, char *argv[])
testResult &= testhipMemsetAsync(memsetval, p_gpuDevice);
testResult &= testhipMemsetD32(memsetD32val, p_gpuDevice);
testResult &= testhipMemsetD32Async(memsetD32val, p_gpuDevice);
testResult &= testhipMemsetD16(memsetD16val, p_gpuDevice);
testResult &= testhipMemsetD16Async(memsetD16val, p_gpuDevice);
testResult &= testhipMemsetD8(memsetD8val, p_gpuDevice);
testResult &= testhipMemsetD8Async(memsetD8val, p_gpuDevice);
if (testResult) passed();
failed("Output Mismatch\n");
}
+8 -5
Просмотреть файл
@@ -45,8 +45,7 @@ bool testhipMemset2D(int memsetval,int p_gpuDevice)
char *A_d;
char *A_h;
bool testResult = true;
HIPCHECK ( hipMallocPitch((void**)&A_d, &pitch_A, width , numH) );
HIPCHECK ( hipMemAllocPitch((hipDeviceptr_t*)&A_d, &pitch_A, width , numH,16) );
A_h = (char*)malloc(sizeElements);
HIPASSERT(A_h != NULL);
for (size_t i=0; i<elements; i++) {
@@ -109,11 +108,15 @@ bool testhipMemset2DAsync(int memsetval,int p_gpuDevice)
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
bool testResult = false;
HIPCHECK(hipSetDevice(p_gpuDevice));
hipCtx_t context;
hipCtxCreate(&context, 0, p_gpuDevice);
bool testResult = false;
testResult = testhipMemset2D(memsetval, p_gpuDevice);
testResult = testhipMemset2DAsync(memsetval, p_gpuDevice);
passed();
hipCtxDestroy(context);
if(testResult){
passed();
}
}
+14
Просмотреть файл
@@ -25,6 +25,8 @@ THE SOFTWARE.
size_t N = 4 * 1024 * 1024;
char memsetval = 0x42;
int memsetD32val = 0xDEADBEEF;
short memsetD16val = 0xDEAD;
char memsetD8val = 0xDE;
int iterations = 1;
unsigned blocksPerCU = 6; // to hide latency
unsigned threadsPerBlock = 256;
@@ -106,6 +108,18 @@ int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) {
failed("Bad memsetD32val argument");
}
memsetD32val = ex;
} else if (!strcmp(arg, "--memsetD16val")) {
int ex;
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
failed("Bad memsetD16val argument");
}
memsetD16val = ex;
} else if (!strcmp(arg, "--memsetD8val")) {
int ex;
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
failed("Bad memsetD8val argument");
}
memsetD8val = ex;
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) {
failed("Bad iterations argument");
+2
Просмотреть файл
@@ -109,6 +109,8 @@ THE SOFTWARE.
extern size_t N;
extern char memsetval;
extern int memsetD32val;
extern short memsetD16val;
extern char memsetD8val;
extern int iterations;
extern unsigned blocksPerCU;
extern unsigned threadsPerBlock;