Added new Memory API's (#1399)
Added new memory API's hipMemAllocPitch, hipMemAllocHost, hipMemsetD16, hipMemsetD16Async, hipMemsetD8Async
Modified to support all scenarios hipMemcpyParam2DAsync, hipMemcpyParam2D.
[ROCm/clr commit: 96890792b0]
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
e5a2ba9602
Коммит
efc64a4f8a
@@ -198,6 +198,8 @@ sub simpleSubstitutions {
|
||||
$ft{'memory'} += s/\bcuIpcOpenMemHandle\b/hipIpcOpenMemHandle/g;
|
||||
$ft{'memory'} += s/\bcuMemAlloc\b/hipMalloc/g;
|
||||
$ft{'memory'} += s/\bcuMemAllocManaged\b/hipMemAllocManaged/g;
|
||||
$ft{'memory'} += s/\bcuMemAllocPitch\b/hipMemAllocPitch/g;
|
||||
$ft{'memory'} += s/\bcuMemAllocPitch_v2\b/hipMemAllocPitch/g;
|
||||
$ft{'memory'} += s/\bcuMemAlloc_v2\b/hipMalloc/g;
|
||||
$ft{'memory'} += s/\bcuMemFree\b/hipFree/g;
|
||||
$ft{'memory'} += s/\bcuMemFreeHost\b/hipHostFree/g;
|
||||
@@ -207,6 +209,8 @@ sub simpleSubstitutions {
|
||||
$ft{'memory'} += s/\bcuMemGetInfo\b/hipMemGetInfo/g;
|
||||
$ft{'memory'} += s/\bcuMemGetInfo_v2\b/hipMemGetInfo/g;
|
||||
$ft{'memory'} += s/\bcuMemHostAlloc\b/hipHostMalloc/g;
|
||||
$ft{'memory'} += s/\bcuMemAllocHost\b/hipMemAllocHost/g;
|
||||
$ft{'memory'} += s/\bcuMemAllocHost_v2\b/hipMemAllocHost/g;
|
||||
$ft{'memory'} += s/\bcuMemHostGetDevicePointer\b/hipHostGetDevicePointer/g;
|
||||
$ft{'memory'} += s/\bcuMemHostGetDevicePointer_v2\b/hipHostGetDevicePointer/g;
|
||||
$ft{'memory'} += s/\bcuMemHostGetFlags\b/hipMemHostGetFlags/g;
|
||||
@@ -238,6 +242,10 @@ sub simpleSubstitutions {
|
||||
$ft{'memory'} += s/\bcuMemsetD32_v2\b/hipMemsetD32/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD8\b/hipMemsetD8/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD8_v2\b/hipMemsetD8/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD8Async\b/hipMemsetD8Async/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD16\b/hipMemsetD16/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD16_v2\b/hipMemsetD16/g;
|
||||
$ft{'memory'} += s/\bcuMemsetD16Async\b/hipMemsetD16Async/g;
|
||||
$ft{'memory'} += s/\bcudaFree\b/hipFree/g;
|
||||
$ft{'memory'} += s/\bcudaFreeArray\b/hipFreeArray/g;
|
||||
$ft{'memory'} += s/\bcudaFreeHost\b/hipHostFree/g;
|
||||
|
||||
+5
-5
@@ -857,9 +857,9 @@
|
||||
| `cuIpcOpenEventHandle` | |
|
||||
| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` |
|
||||
| `cuMemAlloc` | `hipMalloc` |
|
||||
| `cuMemAllocHost` | |
|
||||
| `cuMemAllocHost` | `hipMemAllocHost` |
|
||||
| `cuMemAllocManaged` | `hipMemAllocManaged` |
|
||||
| `cuMemAllocPitch` | |
|
||||
| `cuMemAllocPitch` | `hipMemAllocPitch` |
|
||||
| `cuMemcpy` | |
|
||||
| `cuMemcpy2D` | `hipMemcpyParam2D` |
|
||||
| `cuMemcpy2DAsync` | `hipMemcpyParam2DAsync` |
|
||||
@@ -893,8 +893,8 @@
|
||||
| `cuMemHostGetFlags` | `hipHostGetFlags` |
|
||||
| `cuMemHostRegister` | `hipHostRegister` |
|
||||
| `cuMemHostUnregister` | `hipHostUnregister` |
|
||||
| `cuMemsetD16` | |
|
||||
| `cuMemsetD16Async` | |
|
||||
| `cuMemsetD16` | `hipMemsetD16` |
|
||||
| `cuMemsetD16Async` | `hipMemsetD16Async` |
|
||||
| `cuMemsetD2D16` | |
|
||||
| `cuMemsetD2D16Async` | |
|
||||
| `cuMemsetD2D32` | |
|
||||
@@ -904,7 +904,7 @@
|
||||
| `cuMemsetD32` | `hipMemsetD32` |
|
||||
| `cuMemsetD32Async` | `hipMemsetD32Async` |
|
||||
| `cuMemsetD8` | `hipMemsetD8` |
|
||||
| `cuMemsetD8Async` | |
|
||||
| `cuMemsetD8Async` | `hipMemsetD8Async` |
|
||||
| `cuMipmappedArrayCreate` | |
|
||||
| `cuMipmappedArrayDestroy` | |
|
||||
| `cuMipmappedArrayGetLevel` | |
|
||||
|
||||
@@ -162,14 +162,14 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
|
||||
{"cuMemAlloc", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}},
|
||||
{"cuMemAlloc_v2", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}},
|
||||
// cudaHostAlloc
|
||||
{"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}},
|
||||
{"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}},
|
||||
// cudaMallocManaged
|
||||
{"cuMemAllocManaged", {"hipMemAllocManaged", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cudaMallocPitch due to different signatures
|
||||
{"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}},
|
||||
{"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cudaMemcpy due to different signatures
|
||||
{"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
@@ -271,10 +271,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
|
||||
// cudaHostUnregister
|
||||
{"cuMemHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
{"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}},
|
||||
{"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
{"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
{"cuMemsetD2D16", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemsetD2D16_v2", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
@@ -299,7 +299,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
|
||||
{"cuMemsetD8", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}},
|
||||
{"cuMemsetD8_v2", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
{"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cudaMallocMipmappedArray due to different signatures
|
||||
{"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
|
||||
@@ -1115,6 +1115,21 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
|
||||
DEPRECATED("use hipHostMalloc instead")
|
||||
hipError_t hipMallocHost(void** ptr, size_t size);
|
||||
|
||||
/**
|
||||
* @brief Allocate pinned host memory [Deprecated]
|
||||
*
|
||||
* @param[out] ptr Pointer to the allocated host pinned memory
|
||||
* @param[in] size Requested memory size
|
||||
*
|
||||
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorMemoryAllocation
|
||||
*
|
||||
* @deprecated use hipHostMalloc() instead
|
||||
*/
|
||||
DEPRECATED("use hipHostMalloc instead")
|
||||
hipError_t hipMemAllocHost(void** ptr, size_t size);
|
||||
|
||||
/**
|
||||
* @brief Allocate device accessible page locked host memory
|
||||
*
|
||||
@@ -1250,6 +1265,30 @@ hipError_t hipHostUnregister(void* hostPtr);
|
||||
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* Allocates at least width (in bytes) * height bytes of linear memory
|
||||
* Padding may occur to ensure alighnment requirements are met for the given row
|
||||
* The change in width size due to padding will be returned in *pitch.
|
||||
* Currently the alignment is set to 128 bytes
|
||||
*
|
||||
* @param[out] dptr Pointer to the allocated device memory
|
||||
* @param[out] pitch Pitch for allocation (in bytes)
|
||||
* @param[in] width Requested pitched allocation width (in bytes)
|
||||
* @param[in] height Requested pitched allocation height
|
||||
*
|
||||
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
|
||||
* The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array.
|
||||
* Given the row and column of an array element of type T, the address is computed as:
|
||||
* T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column;
|
||||
*
|
||||
* @return Error code
|
||||
*
|
||||
* @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D,
|
||||
* hipMalloc3DArray, hipHostMalloc
|
||||
*/
|
||||
|
||||
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes);
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated by the hcc hip memory allocation API.
|
||||
* This API performs an implicit hipDeviceSynchronize() call.
|
||||
@@ -1708,6 +1747,51 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes);
|
||||
*/
|
||||
hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes);
|
||||
|
||||
/**
|
||||
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
|
||||
* byte value value.
|
||||
*
|
||||
* hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the
|
||||
* memset is complete. The operation can optionally be associated to a stream by passing a non-zero
|
||||
* stream argument. If stream is non-zero, the operation may overlap with operations in other
|
||||
* streams.
|
||||
*
|
||||
* @param[out] dst Data ptr to be filled
|
||||
* @param[in] constant value to be set
|
||||
* @param[in] sizeBytes Data size in bytes
|
||||
* @param[in] stream - Stream identifier
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
|
||||
*/
|
||||
hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, hipStream_t stream __dparm(0));
|
||||
|
||||
/**
|
||||
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
|
||||
* short value value.
|
||||
*
|
||||
* @param[out] dst Data ptr to be filled
|
||||
* @param[in] constant value to be set
|
||||
* @param[in] sizeBytes Data size in bytes
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
|
||||
*/
|
||||
hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes);
|
||||
|
||||
/**
|
||||
* @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant
|
||||
* short value value.
|
||||
*
|
||||
* hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the
|
||||
* memset is complete. The operation can optionally be associated to a stream by passing a non-zero
|
||||
* stream argument. If stream is non-zero, the operation may overlap with operations in other
|
||||
* streams.
|
||||
*
|
||||
* @param[out] dst Data ptr to be filled
|
||||
* @param[in] constant value to be set
|
||||
* @param[in] sizeBytes Data size in bytes
|
||||
* @param[in] stream - Stream identifier
|
||||
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized
|
||||
*/
|
||||
hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, hipStream_t stream __dparm(0));
|
||||
|
||||
/**
|
||||
* @brief Fills the memory area pointed to by dest with the constant integer
|
||||
* value for specified number of times.
|
||||
|
||||
@@ -459,6 +459,10 @@ inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width,
|
||||
return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){
|
||||
return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
|
||||
return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
|
||||
}
|
||||
@@ -471,6 +475,12 @@ inline static hipError_t hipMallocHost(void** ptr, size_t size) {
|
||||
return hipCUDAErrorTohipError(cudaMallocHost(ptr, size));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemAllocHost(void** ptr, size_t size)
|
||||
__attribute__((deprecated("use hipHostMalloc instead")));
|
||||
inline static hipError_t hipMemAllocHost(void** ptr, size_t size) {
|
||||
return hipCUResultTohipError(cuMemAllocHost(ptr, size));
|
||||
}
|
||||
|
||||
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags)
|
||||
__attribute__((deprecated("use hipHostMalloc instead")));
|
||||
inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) {
|
||||
@@ -761,6 +771,20 @@ inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, s
|
||||
return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes,
|
||||
hipStream_t stream __dparm(0)) {
|
||||
return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) {
|
||||
return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes,
|
||||
hipStream_t stream __dparm(0)) {
|
||||
return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
|
||||
return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height));
|
||||
}
|
||||
|
||||
@@ -391,6 +391,8 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
|
||||
// Deprecated function:
|
||||
hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
|
||||
|
||||
// Deprecated function:
|
||||
hipError_t hipMemAllocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
|
||||
|
||||
// Deprecated function:
|
||||
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
@@ -465,6 +467,15 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes){
|
||||
HIP_INIT_SPECIAL_API(hipMemAllocPitch, (TRACE_MEM), dptr, pitch, widthInBytes, height,elementSizeBytes);
|
||||
HIP_SET_DEVICE();
|
||||
|
||||
if (widthInBytes == 0 || height == 0) return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
return ihipLogStatus(ihipMallocPitch(tls, dptr, pitch, widthInBytes, height, 0));
|
||||
}
|
||||
|
||||
hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
|
||||
HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent);
|
||||
HIP_SET_DEVICE();
|
||||
@@ -1786,28 +1797,65 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) {
|
||||
if (pCopy == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
void* dst; const void* src;
|
||||
size_t spitch = pCopy->srcPitch;
|
||||
size_t dpitch = pCopy->dstPitch;
|
||||
switch(pCopy->srcMemoryType){
|
||||
case hipMemoryTypeHost:
|
||||
src = pCopy->srcHost;
|
||||
break;
|
||||
case hipMemoryTypeArray:
|
||||
src = pCopy->srcArray->data;
|
||||
spitch = pCopy->WidthInBytes;
|
||||
break;
|
||||
case hipMemoryTypeUnified:
|
||||
case hipMemoryTypeDevice:
|
||||
src = pCopy->srcDevice;
|
||||
break;
|
||||
default:
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
switch(pCopy->dstMemoryType){
|
||||
case hipMemoryTypeHost:
|
||||
dst = pCopy->dstHost;
|
||||
break;
|
||||
case hipMemoryTypeArray:
|
||||
dst = pCopy->dstArray->data;
|
||||
dpitch = pCopy->WidthInBytes;
|
||||
break;
|
||||
case hipMemoryTypeUnified:
|
||||
case hipMemoryTypeDevice:
|
||||
dst = pCopy->dstDevice;
|
||||
break;
|
||||
default:
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
if(pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes || pCopy->srcY >= pCopy->Height){
|
||||
return hipErrorInvalidValue;
|
||||
} else if(pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes || pCopy->dstY >= pCopy->Height){
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
src = (void*)((char*)src+pCopy->srcY*pCopy->srcPitch + pCopy->srcXInBytes);
|
||||
dst = (void*)((char*)dst+pCopy->dstY*pCopy->dstPitch + pCopy->dstXInBytes);
|
||||
if(isAsync){
|
||||
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream);
|
||||
} else{
|
||||
return ihipMemcpy2D(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
|
||||
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), pCopy);
|
||||
hipError_t e = hipSuccess;
|
||||
if (pCopy == nullptr) {
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch,
|
||||
pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(ihipMemcpyParam2D(pCopy, hipStreamNull, false));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipMemcpyParam2DAsync, (TRACE_MCMD), pCopy, stream);
|
||||
hipError_t e = hipSuccess;
|
||||
if (pCopy == nullptr) {
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
e = ihipMemcpy2DAsync(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch,
|
||||
pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true));
|
||||
}
|
||||
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
@@ -1903,6 +1951,42 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes , hipStream_t stream ) {
|
||||
HIP_INIT_SPECIAL_API(hipMemsetD8Async, (TRACE_MCMD), dst, value, sizeBytes, stream);
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
if (stream) {
|
||||
return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar));
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes){
|
||||
HIP_INIT_SPECIAL_API(hipMemsetD16, (TRACE_MCMD), dst, value, sizeBytes);
|
||||
hipError_t e = hipSuccess;
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
if (stream) {
|
||||
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort);
|
||||
if(hipSuccess == e)
|
||||
stream->locked_wait();
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes, hipStream_t stream ){
|
||||
HIP_INIT_SPECIAL_API(hipMemsetD16Async, (TRACE_MCMD), dst, value, sizeBytes, stream);
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
if (stream) {
|
||||
return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort));
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) {
|
||||
HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count);
|
||||
|
||||
|
||||
@@ -234,7 +234,7 @@ void checkFunctional() {
|
||||
|
||||
int main() {
|
||||
bool* result{nullptr};
|
||||
hipHostMalloc(&result, sizeof(result));
|
||||
hipMemAllocHost((void**)&result, sizeof(result));
|
||||
|
||||
result[0] = false;
|
||||
hipLaunchKernelGGL(
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -34,6 +34,21 @@ void printSep() {
|
||||
"======================================================================================\n");
|
||||
}
|
||||
|
||||
inline void initMemCpyParam2D(hip_Memcpy2D &ins, const size_t dpitch,
|
||||
const size_t spitch, const size_t width,
|
||||
const size_t height, hipMemoryType dstType,
|
||||
enum hipMemoryType srcType) {
|
||||
ins.srcXInBytes=0;
|
||||
ins.srcY=0;
|
||||
ins.srcPitch=spitch;
|
||||
ins.dstXInBytes=0;
|
||||
ins.dstY=0;
|
||||
ins.dstPitch=dpitch;
|
||||
ins.WidthInBytes=width;
|
||||
ins.Height=height;
|
||||
ins.dstMemoryType= dstType;
|
||||
ins.srcMemoryType= srcType;
|
||||
}
|
||||
|
||||
//---
|
||||
// Test copies of a matrix numW by numH
|
||||
@@ -65,7 +80,11 @@ void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost) {
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH);
|
||||
|
||||
HIPCHECK(hipMemcpy2D(A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy2D(B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice));
|
||||
hip_Memcpy2D ins;
|
||||
initMemCpyParam2D(ins,pitch_B,width,width,numH,hipMemoryTypeDevice,hipMemoryTypeHost);
|
||||
ins.dstDevice = (hipDeviceptr_t)B_d;
|
||||
ins.srcHost = B_h;
|
||||
HIPCHECK(hipMemcpyParam2D(&ins));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d,
|
||||
(pitch_C / sizeof(T)) * numH);
|
||||
@@ -115,7 +134,11 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH);
|
||||
|
||||
HIPCHECK(hipMemcpyToArray(A_d, 0, 0, (void*)A_h, width, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpyToArray(B_d, 0, 0, (void*)B_h, width, hipMemcpyHostToDevice));
|
||||
hip_Memcpy2D ins;
|
||||
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost);
|
||||
ins.dstArray = B_d;
|
||||
ins.srcHost = B_h;
|
||||
HIPCHECK(hipMemcpyParam2D(&ins));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
(T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);
|
||||
@@ -152,15 +175,21 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch
|
||||
} else {
|
||||
HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, (void*)A_h, width, width, numH,
|
||||
hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy2DToArray(B_d, 0, 0, (void*)B_h, width, width, numH,
|
||||
hipMemcpyHostToDevice));
|
||||
hip_Memcpy2D ins;
|
||||
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost);
|
||||
ins.dstArray = B_d;
|
||||
ins.srcHost = B_h;
|
||||
HIPCHECK(hipMemcpyParam2D(&ins));
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
(T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW * numH);
|
||||
|
||||
HIPCHECK(hipMemcpy2D((void*)C_h, width, (void*)C_d->data, width, width, numH,
|
||||
hipMemcpyDeviceToHost));
|
||||
printf("memcpy srcArray to dstHost\n");
|
||||
hip_Memcpy2D ins;
|
||||
initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeHost,hipMemoryTypeArray);
|
||||
ins.srcArray = C_d;
|
||||
ins.dstHost = C_h;
|
||||
HIPCHECK(hipMemcpyParam2D(&ins));
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, numW * numH);
|
||||
@@ -227,8 +256,6 @@ int main(int argc, char* argv[]) {
|
||||
printf("\n\n=== tests&1 (types)\n");
|
||||
printSep();
|
||||
HIPCHECK(hipDeviceReset());
|
||||
size_t width = N / 6;
|
||||
size_t height = N / 6;
|
||||
memcpy2Dtest<float>(321, 211, 0);
|
||||
memcpy2Dtest<double>(322, 211, 0);
|
||||
memcpy2Dtest<char>(320, 211, 0);
|
||||
|
||||
@@ -26,11 +26,11 @@ THE SOFTWARE.
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* TEST: %t
|
||||
* //Small copy
|
||||
* TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101
|
||||
* TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 --memsetD16val 0x10 --memsetD8val 0x1
|
||||
* // Oddball size
|
||||
* TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF
|
||||
* TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF --memsetD16val 0xDEAD --memsetD8val 0xDE
|
||||
* // Big copy
|
||||
* TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE
|
||||
* TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -86,6 +86,54 @@ bool testhipMemsetD32(int memsetD32val,int p_gpuDevice)
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD16(short memsetD16val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", N, memsetD16val, p_gpuDevice);
|
||||
short *A_d;
|
||||
short *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (short*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemsetD16((hipDeviceptr_t)A_d, memsetD16val, N) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD16val) {
|
||||
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD16val:%08x\n", i, A_h[i], memsetD32val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD8(char memsetD8val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", N, memsetD8val, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc(&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
HIPCHECK ( hipMemsetD8((hipDeviceptr_t)A_d, memsetD8val, N) );
|
||||
HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD8val) {
|
||||
testResult = false; printf("mismatch at index:%d computed:%08x, memsetD8val:%08x\n", i, A_h[i], memsetD8val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetAsync(int memsetval,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
@@ -144,6 +192,64 @@ bool testhipMemsetD32Async(int memsetD32val,int p_gpuDevice)
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD16Async(short memsetD16val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD16Async N=%zu memsetval=%8x device=%d\n", N, memsetD16val, p_gpuDevice);
|
||||
short *A_d;
|
||||
short *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (short*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetD16Async((hipDeviceptr_t)A_d, memsetD16val, N, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD16val) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetD16val:%02x\n", i, A_h[i], memsetD16val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
bool testhipMemsetD8Async(char memsetD8val,int p_gpuDevice)
|
||||
{
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
printf ("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n", N, memsetD8val, p_gpuDevice);
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
HIPCHECK ( hipMemsetD8Async((hipDeviceptr_t)A_d, memsetD8val, N, stream ));
|
||||
HIPCHECK ( hipStreamSynchronize(stream));
|
||||
HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i<N; i++) {
|
||||
if (A_h[i] != memsetD8val) {
|
||||
testResult = false;
|
||||
printf("mismatch at index:%d computed:%02x, memsetD8val:%02x\n", i, A_h[i], memsetD8val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
HIPCHECK(hipFree((void*)A_d));
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
return testResult;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
@@ -153,6 +259,10 @@ int main(int argc, char *argv[])
|
||||
testResult &= testhipMemsetAsync(memsetval, p_gpuDevice);
|
||||
testResult &= testhipMemsetD32(memsetD32val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD32Async(memsetD32val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD16(memsetD16val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD16Async(memsetD16val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD8(memsetD8val, p_gpuDevice);
|
||||
testResult &= testhipMemsetD8Async(memsetD8val, p_gpuDevice);
|
||||
if (testResult) passed();
|
||||
failed("Output Mismatch\n");
|
||||
}
|
||||
|
||||
@@ -45,8 +45,7 @@ bool testhipMemset2D(int memsetval,int p_gpuDevice)
|
||||
char *A_d;
|
||||
char *A_h;
|
||||
bool testResult = true;
|
||||
|
||||
HIPCHECK ( hipMallocPitch((void**)&A_d, &pitch_A, width , numH) );
|
||||
HIPCHECK ( hipMemAllocPitch((hipDeviceptr_t*)&A_d, &pitch_A, width , numH,16) );
|
||||
A_h = (char*)malloc(sizeElements);
|
||||
HIPASSERT(A_h != NULL);
|
||||
for (size_t i=0; i<elements; i++) {
|
||||
@@ -109,11 +108,15 @@ bool testhipMemset2DAsync(int memsetval,int p_gpuDevice)
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
bool testResult = false;
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
hipCtx_t context;
|
||||
hipCtxCreate(&context, 0, p_gpuDevice);
|
||||
|
||||
bool testResult = false;
|
||||
testResult = testhipMemset2D(memsetval, p_gpuDevice);
|
||||
testResult = testhipMemset2DAsync(memsetval, p_gpuDevice);
|
||||
passed();
|
||||
|
||||
hipCtxDestroy(context);
|
||||
if(testResult){
|
||||
passed();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,8 @@ THE SOFTWARE.
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
char memsetval = 0x42;
|
||||
int memsetD32val = 0xDEADBEEF;
|
||||
short memsetD16val = 0xDEAD;
|
||||
char memsetD8val = 0xDE;
|
||||
int iterations = 1;
|
||||
unsigned blocksPerCU = 6; // to hide latency
|
||||
unsigned threadsPerBlock = 256;
|
||||
@@ -106,6 +108,18 @@ int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) {
|
||||
failed("Bad memsetD32val argument");
|
||||
}
|
||||
memsetD32val = ex;
|
||||
} else if (!strcmp(arg, "--memsetD16val")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetD16val argument");
|
||||
}
|
||||
memsetD16val = ex;
|
||||
} else if (!strcmp(arg, "--memsetD8val")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetD8val argument");
|
||||
}
|
||||
memsetD8val = ex;
|
||||
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) {
|
||||
failed("Bad iterations argument");
|
||||
|
||||
@@ -109,6 +109,8 @@ THE SOFTWARE.
|
||||
extern size_t N;
|
||||
extern char memsetval;
|
||||
extern int memsetD32val;
|
||||
extern short memsetD16val;
|
||||
extern char memsetD8val;
|
||||
extern int iterations;
|
||||
extern unsigned blocksPerCU;
|
||||
extern unsigned threadsPerBlock;
|
||||
|
||||
Ссылка в новой задаче
Block a user