EXSWCPHIPT-118 - Added testing for hipMemset Synchronous behavoiour. (#2750)
[ROCm/hip-tests commit: 871c75e8f0]
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -287,19 +287,19 @@ struct Pinned {
|
||||
|
||||
//---
|
||||
struct Unpinned {
|
||||
static const bool isPinned = false;
|
||||
static const char* str() { return "Unpinned"; };
|
||||
static const bool isPinned = false;
|
||||
static const char* str() { return "Unpinned"; };
|
||||
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p = malloc(sizeBytes);
|
||||
HIPASSERT(p);
|
||||
return p;
|
||||
};
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p = malloc(sizeBytes);
|
||||
HIPASSERT(p);
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct Memcpy {
|
||||
static const char* str() { return "Memcpy"; };
|
||||
static const char* str() { return "Memcpy"; };
|
||||
};
|
||||
|
||||
struct MemcpyAsync {
|
||||
@@ -307,33 +307,104 @@ struct MemcpyAsync {
|
||||
};
|
||||
|
||||
|
||||
template <typename C>
|
||||
struct MemTraits;
|
||||
template <typename C> struct MemTraits;
|
||||
|
||||
|
||||
template <>
|
||||
struct MemTraits<Memcpy> {
|
||||
template <> struct MemTraits<Memcpy> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
hipStream_t stream) {
|
||||
(void)stream;
|
||||
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct MemTraits<MemcpyAsync> {
|
||||
template <> struct MemTraits<MemcpyAsync> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
hipStream_t stream) {
|
||||
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace HipTest
|
||||
|
||||
namespace {
|
||||
static __global__ void waitKernel(clock_t offset) {
|
||||
auto start = clock();
|
||||
while ((clock() - start) < offset) {
|
||||
}
|
||||
}
|
||||
|
||||
// helper function used to set the device frequency variable
|
||||
// estimates the number of clock ticks in 1 second
|
||||
static size_t findTicksPerSecond() {
|
||||
// first read the reported clockRate as a starting point
|
||||
hipDeviceProp_t prop;
|
||||
int device;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
||||
clock_t devFreq = static_cast<clock_t>(prop.clockRate); // in kHz
|
||||
clock_t clockTicksPerSecond = devFreq * 1000;
|
||||
|
||||
// init
|
||||
hipEvent_t start, stop;
|
||||
HIP_CHECK(hipEventCreate(&start));
|
||||
HIP_CHECK(hipEventCreate(&stop));
|
||||
|
||||
// Warmup
|
||||
hipLaunchKernelGGL(waitKernel, dim3(1), dim3(1), 0, 0, clockTicksPerSecond);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// try 10 times to find device frequency
|
||||
// after 10 attempts the result is likely good enough so just accept it
|
||||
for (int attempts = 10; attempts > 0; --attempts) {
|
||||
HIP_CHECK(hipEventRecord(start));
|
||||
hipLaunchKernelGGL(waitKernel, dim3(1), dim3(1), 0, 0, clockTicksPerSecond);
|
||||
HIP_CHECK(hipEventRecord(stop));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
|
||||
float executionTimeMs = 0;
|
||||
HIP_CHECK(hipEventElapsedTime(&executionTimeMs, start, stop));
|
||||
|
||||
constexpr float tolerance = 20;
|
||||
if (fabs(executionTimeMs - 1000) <= tolerance) {
|
||||
// Timing is within accepted tolerance, break here
|
||||
break;
|
||||
} else {
|
||||
clockTicksPerSecond = (clockTicksPerSecond * 1000) / executionTimeMs;
|
||||
--attempts;
|
||||
}
|
||||
}
|
||||
|
||||
// deinit
|
||||
HIP_CHECK(hipEventDestroy(start));
|
||||
HIP_CHECK(hipEventDestroy(stop));
|
||||
return clockTicksPerSecond;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Launches a kernel which runs for specified amount of time
|
||||
// Note: The current implementation uses HIP_CHECK which is not thread safe!
|
||||
// Note: the function assumes execution on a single device and caches the number of clock ticks per
|
||||
// second
|
||||
static inline void runKernelForDuration(std::chrono::milliseconds duration,
|
||||
hipStream_t stream = nullptr) {
|
||||
// number of clocks the device is running at (device frequency)
|
||||
// each translation unit will have a copy of ticksPerSecond but this function isn't designed for
|
||||
// precision so that's acceptable.
|
||||
static size_t ticksPerSecond = findTicksPerSecond();
|
||||
const auto millis = duration.count();
|
||||
hipLaunchKernelGGL(waitKernel, dim3(1), dim3(1), 0, stream, ticksPerSecond * millis / 1000);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
} // namespace HipTest
|
||||
|
||||
// This must be called in the beginning of image test app's main() to indicate whether image
|
||||
// is supported.
|
||||
#define CHECK_IMAGE_SUPPORT \
|
||||
if (!HipTest::isImageSupported()) \
|
||||
{ INFO("Texture is not support on the device. Skipped."); return; }
|
||||
#define CHECK_IMAGE_SUPPORT \
|
||||
if (!HipTest::isImageSupported()) { \
|
||||
INFO("Texture is not support on the device. Skipped."); \
|
||||
return; \
|
||||
}
|
||||
|
||||
@@ -92,6 +92,10 @@ set(TEST_SRC
|
||||
hipArray.cc
|
||||
hipMemVmm.cc
|
||||
hipMemGetInfo.cc
|
||||
hipFree.cc
|
||||
hipMemcpySync.cc
|
||||
hipMemsetSync.cc
|
||||
hipMemsetAsync.cc
|
||||
)
|
||||
else()
|
||||
set(TEST_SRC
|
||||
@@ -159,6 +163,10 @@ set(TEST_SRC
|
||||
hipDrvPtrGetAttributes.cc
|
||||
hipMemPrefetchAsync.cc
|
||||
hipMemGetInfo.cc
|
||||
hipFree.cc
|
||||
hipMemcpySync.cc
|
||||
hipMemsetSync.cc
|
||||
hipMemsetAsync.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -21,6 +21,8 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include <hip_test_context.hh>
|
||||
|
||||
|
||||
@@ -0,0 +1,407 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
namespace mem_utils {
|
||||
|
||||
enum class allocType { deviceMalloc, hostMalloc, hostRegisted, devRegistered };
|
||||
enum class memType { hipMem, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipMem2D, hipMem3D };
|
||||
|
||||
// helper struct containing vars needed for 2D and 3D mem Testing
|
||||
struct MultiDData {
|
||||
size_t width{}; // in elements not bytes
|
||||
// set to 0 for 1D
|
||||
size_t height{}; // in elements not bytes
|
||||
size_t getH() { return height == 0 ? 1 : height; }; // return 1 if height == 0 || height
|
||||
// set to 0 for 2D
|
||||
size_t depth{}; // in elements not bytes
|
||||
size_t getD() { return depth == 0 ? 1 : depth; }; // return 1 if depth == 0 || depth
|
||||
size_t pitch{}; // pitch = (width * sizeofData) + alignment
|
||||
size_t offset{}; // for simplicity use same offset for x,y and z dimentions of memory
|
||||
size_t getCount() { return width * getH() * getD(); }
|
||||
};
|
||||
|
||||
// set of helper functions to tidy the nested switch statements
|
||||
template <typename T>
|
||||
static inline std::pair<T*, T*> deviceMallocHelper(memType memType, size_t dataW, size_t dataH,
|
||||
size_t dataD, size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr{};
|
||||
switch (memType) {
|
||||
case memType::hipMem3D: {
|
||||
hipPitchedPtr pitchedAPtr;
|
||||
hipExtent extent = make_hipExtent(dataW * elementSize, dataH, dataD);
|
||||
|
||||
HIP_CHECK(hipMalloc3D(&pitchedAPtr, extent));
|
||||
aPtr = reinterpret_cast<T*>(pitchedAPtr.ptr);
|
||||
dataPitch = pitchedAPtr.pitch;
|
||||
break;
|
||||
}
|
||||
|
||||
case memType::hipMem2D:
|
||||
HIP_CHECK(
|
||||
hipMallocPitch(reinterpret_cast<void**>(&aPtr), &dataPitch, dataW * elementSize, dataH));
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMalloc(&aPtr, sizeInBytes));
|
||||
dataPitch = dataW * elementSize;
|
||||
break;
|
||||
}
|
||||
return {aPtr, nullptr};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline std::pair<T*, T*> hostMallocHelper(size_t dataW, size_t dataH, size_t dataD,
|
||||
size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr;
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&aPtr, sizeInBytes));
|
||||
dataPitch = dataW * elementSize;
|
||||
|
||||
return {aPtr, nullptr};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline std::pair<T*, T*> hostRegisteredHelper(size_t dataW, size_t dataH, size_t dataD,
|
||||
size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr = new T[dataW * dataH * dataD];
|
||||
|
||||
HIP_CHECK(hipHostRegister(aPtr, sizeInBytes, hipHostRegisterDefault));
|
||||
|
||||
dataPitch = dataW * elementSize;
|
||||
return {aPtr, nullptr};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline std::pair<T*, T*> devRegisteredHelper(size_t dataW, size_t dataH, size_t dataD,
|
||||
size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr = new T[dataW * dataH * dataD];
|
||||
T* retPtr{};
|
||||
|
||||
HIP_CHECK(hipHostRegister(aPtr, sizeInBytes, hipHostRegisterDefault));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&retPtr), aPtr, 0));
|
||||
|
||||
dataPitch = dataW * elementSize;
|
||||
// keep the address of the host memory
|
||||
return {retPtr, aPtr};
|
||||
}
|
||||
|
||||
/*
|
||||
* helper function to allocate memory and set it to a value.
|
||||
* return a pair of pointers due to the device registered allocation case, we need to keep track of
|
||||
* the pointer to host memory to be able to unregister and free it
|
||||
*/
|
||||
template <typename T>
|
||||
static inline std::pair<T*, T*> initMemory(allocType type, memType memType, MultiDData& data) {
|
||||
std::pair<T*, T*> retPtr{};
|
||||
// check different types of allocation
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
retPtr = deviceMallocHelper<T>(memType, data.width, data.getH(), data.getD(), data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::hostMalloc:
|
||||
retPtr = hostMallocHelper<T>(data.width, data.getH(), data.getD(), data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::hostRegisted:
|
||||
retPtr = hostRegisteredHelper<T>(data.width, data.getH(), data.getD(), data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::devRegistered:
|
||||
retPtr = devRegisteredHelper<T>(data.width, data.getH(), data.getD(), data.pitch);
|
||||
break;
|
||||
|
||||
default:
|
||||
REQUIRE(false);
|
||||
break;
|
||||
}
|
||||
return retPtr;
|
||||
}
|
||||
// create a hipMemcpy3DParams struct for the 3d version of memcpy to verify the memset operation
|
||||
template <typename T>
|
||||
hipMemcpy3DParms createParams(hipMemcpyKind kind, T* src, T* host_dst, size_t srcPitch,
|
||||
size_t dataW, size_t dataH, size_t dataD) {
|
||||
hipMemcpy3DParms p = {};
|
||||
p.kind = kind;
|
||||
|
||||
p.srcPtr.ptr = src;
|
||||
p.srcPtr.pitch = srcPitch;
|
||||
p.srcPtr.xsize = dataW;
|
||||
p.srcPtr.ysize = dataH;
|
||||
|
||||
p.dstPtr.ptr = host_dst;
|
||||
p.dstPtr.pitch = dataW * sizeof(T);
|
||||
p.dstPtr.xsize = dataW;
|
||||
p.dstPtr.ysize = dataH;
|
||||
|
||||
hipExtent extent = make_hipExtent(dataW * sizeof(T), dataH, dataD);
|
||||
p.extent = extent;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
// set of helper functions to tidy the nested switch statements
|
||||
template <typename T>
|
||||
static inline void deviceMallocCopy(memType memType, T* aPtr, T* hostMem, size_t dataW,
|
||||
size_t dataH, size_t dataD, size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
switch (memType) {
|
||||
case memType::hipMem3D: {
|
||||
hipMemcpy3DParms params =
|
||||
createParams(hipMemcpyDeviceToHost, aPtr, hostMem, dataPitch, dataW, dataH, dataD);
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memType::hipMem2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyDeviceToHost));
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyDeviceToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline void hostCopy(memType memType, T* aPtr, T* hostMem, size_t dataW, size_t dataH,
|
||||
size_t dataD, size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
switch (memType) {
|
||||
case memType::hipMem3D: {
|
||||
hipMemcpy3DParms params =
|
||||
createParams(hipMemcpyHostToHost, aPtr, hostMem, dataPitch, dataW, dataH, dataD);
|
||||
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memType::hipMem2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyHostToHost));
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyHostToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline void devRegisteredCopy(memType memType, T* aPtr, T* hostMem, size_t dataW,
|
||||
size_t dataH, size_t dataD, size_t& dataPitch) {
|
||||
constexpr size_t elementSize = sizeof(T);
|
||||
|
||||
switch (memType) {
|
||||
case memType::hipMem3D: {
|
||||
hipMemcpy3DParms params =
|
||||
createParams(hipMemcpyDeviceToHost, aPtr, hostMem, dataPitch, dataW, dataH, dataD);
|
||||
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memType::hipMem2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyDeviceToHost));
|
||||
break;
|
||||
|
||||
default: {
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyDeviceToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* function returns an offset location in memory based on the provided data, taking pitch into
|
||||
* account
|
||||
* (for 1D requires data.depth & data.height = 0, for 2D data.depth = 0)
|
||||
*/
|
||||
static inline size_t getPtrOffset(MultiDData data) {
|
||||
if (data.height == 0) { // 1D
|
||||
return data.offset;
|
||||
} else if (data.depth == 0) {
|
||||
return (data.offset + (data.pitch * data.offset));
|
||||
} else { // 2D or 3D
|
||||
return (data.offset + (data.pitch * data.offset) + (data.pitch * data.offset * data.height));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to allow reuse of functions for testing versions of the memset API, at a specified
|
||||
* offset
|
||||
*/
|
||||
template <typename T>
|
||||
static inline void memsetCheck(T* aPtr, size_t value, memType memType, MultiDData& data,
|
||||
hipStream_t stream = nullptr, bool async = true) {
|
||||
size_t count = data.getCount();
|
||||
size_t ptrOffset{};
|
||||
switch (memType) {
|
||||
case memType::hipMem:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetAsync(aPtr + data.offset, value, count * sizeof(T), stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemset(aPtr + data.offset, value, count * sizeof(T)));
|
||||
}
|
||||
break;
|
||||
|
||||
case memType::hipMemsetD8:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value,
|
||||
count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD8(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memType::hipMemsetD16:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value,
|
||||
count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD16(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memType::hipMemsetD32:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value,
|
||||
count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD32(reinterpret_cast<hipDeviceptr_t>(aPtr + data.offset), value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memType::hipMem2D:
|
||||
ptrOffset = getPtrOffset(data);
|
||||
if (async) {
|
||||
HIP_CHECK(
|
||||
hipMemset2DAsync(aPtr + ptrOffset, data.pitch, value, data.width, data.height, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemset2D(aPtr + ptrOffset, data.pitch, value, data.width, data.height));
|
||||
}
|
||||
break;
|
||||
|
||||
case memType::hipMem3D: {
|
||||
ptrOffset = getPtrOffset(data);
|
||||
hipExtent extent = make_hipExtent(data.width * sizeof(T), data.height, data.depth);
|
||||
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemset3DAsync(
|
||||
make_hipPitchedPtr(aPtr + ptrOffset, data.pitch, data.width, data.height), value,
|
||||
extent, stream));
|
||||
} else {
|
||||
HIP_CHECK(
|
||||
hipMemset3D(make_hipPitchedPtr(aPtr + ptrOffset, data.pitch, data.width, data.height),
|
||||
value, extent));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
REQUIRE(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> static inline void freeStuff(T* aPtr, allocType type) {
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
hipFree(aPtr);
|
||||
break;
|
||||
case allocType::hostMalloc:
|
||||
hipHostFree(aPtr);
|
||||
break;
|
||||
default: // for host and device registered
|
||||
HIP_CHECK(hipHostUnregister(aPtr));
|
||||
delete[] aPtr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copies device data to host and checks that each element is equal to the
|
||||
* specified value
|
||||
*/
|
||||
template <typename T>
|
||||
static inline void verifyData(T* aPtr, size_t value, MultiDData& data, allocType type,
|
||||
memType memType) {
|
||||
std::unique_ptr<T[]> hostPtr = std::make_unique<T[]>(data.getCount());
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
deviceMallocCopy(memType, aPtr + getPtrOffset(data), hostPtr.get(), data.width, data.getH(),
|
||||
data.getD(), data.pitch);
|
||||
break;
|
||||
case allocType::devRegistered:
|
||||
devRegisteredCopy(memType, aPtr + getPtrOffset(data), hostPtr.get(), data.width, data.getH(),
|
||||
data.getD(), data.pitch);
|
||||
break;
|
||||
default: // host malloc and host registered
|
||||
hostCopy(memType, aPtr + getPtrOffset(data), hostPtr.get(), data.width, data.getH(),
|
||||
data.getD(), data.pitch);
|
||||
break;
|
||||
}
|
||||
|
||||
size_t idx;
|
||||
bool allMatch{true};
|
||||
for (size_t k = 0; k < data.getD(); k++) {
|
||||
for (size_t j = 0; j < data.getH(); j++) {
|
||||
for (size_t i = 0; i < data.width; i++) {
|
||||
idx = data.width * data.getH() * k + data.width * j + i;
|
||||
allMatch = allMatch && static_cast<size_t>(hostPtr.get()[idx]) == value;
|
||||
if (!allMatch) REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// function used to abstract the test
|
||||
template <typename T, typename F, typename... fArgs>
|
||||
static inline void doMemTest(F func, fArgs... funcArgs) {
|
||||
SECTION("Synchronous") { func(nullptr, false, funcArgs...); }
|
||||
SECTION("Asynchronous - null stream") { func(nullptr, true, funcArgs...); }
|
||||
SECTION("Asynchronous - created stream") {
|
||||
hipStream_t stream{};
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
func(stream, true, funcArgs...);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
}
|
||||
} // namespace mem_utils
|
||||
@@ -0,0 +1,421 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include "hipArrayCommon.hh"
|
||||
#include "DriverContext.hh"
|
||||
|
||||
/*
|
||||
* This testcase verifies [ hipFree || hipFreeArray || hipFreeType::ArrayDestroy ||
|
||||
* hipFreeType::HostFree with hipHostMalloc ]
|
||||
* 1. Check that hipFree implicitly synchronises the device.
|
||||
* 2. Perform multiple allocations and then call hipFree on each pointer concurrently (from unique
|
||||
* threads) for different memory types and different allocation sizes.
|
||||
* 3. Pass nullptr as argument and check that no operation is performed and hipSuccess is returned.
|
||||
* 4. Pass an invalid ptr and check that hipErrorInvalidValue is returned.
|
||||
* 5. Call hipFree twice on the same pointer and check that the implementation handles the second
|
||||
* call correctly.
|
||||
* 6. HipFreeType::HostFree only:
|
||||
* Try to free memory that has been registered with hipHostRegister and check that
|
||||
* hipErrorInvalidValue is returned.
|
||||
*/
|
||||
|
||||
|
||||
enum class FreeType { DevFree, ArrayFree, ArrayDestroy, HostFree };
|
||||
|
||||
// Amount of time kernel should wait
|
||||
using namespace std::chrono_literals;
|
||||
const std::chrono::duration<uint64_t, std::milli> delay = 50ms;
|
||||
constexpr size_t numAllocs = 10;
|
||||
|
||||
#if HT_AMD /* Disabled because frequency based wait is timing out on nvidia platforms */
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeImplicitSyncDev", "", char, float, float2, float4) {
|
||||
TestType* devPtr{};
|
||||
size_t size_mult = GENERATE(1, 32, 64, 128, 256);
|
||||
HIP_CHECK(hipMalloc(&devPtr, sizeof(TestType) * size_mult));
|
||||
|
||||
HipTest::runKernelForDuration(delay);
|
||||
// make sure device is busy
|
||||
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
|
||||
HIP_CHECK(hipFree(devPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeImplicitSyncHost", "", char, float, float2, float4) {
|
||||
TestType* hostPtr{};
|
||||
size_t size_mult = GENERATE(1, 32, 64, 128, 256);
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&hostPtr, sizeof(TestType) * size_mult));
|
||||
|
||||
HipTest::runKernelForDuration(delay);
|
||||
// make sure device is busy
|
||||
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
|
||||
HIP_CHECK(hipHostFree(hostPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Meaningless at the moment, since we are not running wait kernel on nvidia.
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeImplicitSyncArray", "", char, float, float2, float4) {
|
||||
using vec_info = vector_info<TestType>;
|
||||
DriverContext ctx;
|
||||
|
||||
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
size_t height = GENERATE(32, 512, 1024);
|
||||
|
||||
SECTION("ArrayFree") {
|
||||
hipArray_t arrayPtr{};
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<TestType>();
|
||||
|
||||
HIP_CHECK(hipMallocArray(&arrayPtr, &desc, width, height, hipArrayDefault));
|
||||
HipTest::runKernelForDuration(delay);
|
||||
// make sure device is busy
|
||||
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
|
||||
HIP_CHECK(hipFreeArray(arrayPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
SECTION("ArrayDestroy") {
|
||||
hiparray cuArrayPtr{};
|
||||
|
||||
HIP_ARRAY_DESCRIPTOR cuDesc;
|
||||
cuDesc.Width = width;
|
||||
cuDesc.Height = height;
|
||||
cuDesc.Format = vec_info::format;
|
||||
cuDesc.NumChannels = vec_info::size;
|
||||
HIP_CHECK(hipArrayCreate(&cuArrayPtr, &cuDesc));
|
||||
HipTest::runKernelForDuration(delay);
|
||||
// make sure device is busy
|
||||
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
|
||||
HIP_CHECK(hipArrayDestroy(cuArrayPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
}
|
||||
#else // AMD
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeImplicitSyncArray", "", char, float, float2, float4) {
|
||||
hipArray_t arrayPtr{};
|
||||
hipExtent extent{};
|
||||
extent.width = GENERATE(32, 128, 256, 512, 1024);
|
||||
extent.height = GENERATE(0, 32, 128, 256, 512, 1024);
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<TestType>();
|
||||
|
||||
HIP_CHECK(hipMallocArray(&arrayPtr, &desc, extent.width, extent.height, hipArrayDefault));
|
||||
HipTest::runKernelForDuration(delay);
|
||||
// make sure device is busy
|
||||
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
|
||||
// Second free segfaults
|
||||
SECTION("ArrayDestroy") {
|
||||
HIP_CHECK(hipArrayDestroy(arrayPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
SECTION("ArrayFree") {
|
||||
HIP_CHECK(hipFreeArray(arrayPtr));
|
||||
HIP_CHECK(hipStreamQuery(nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Freeing a invalid pointer with on device
|
||||
TEST_CASE("Unit_hipFreeNegativeDev") {
|
||||
SECTION("InvalidPtr") {
|
||||
char value;
|
||||
HIP_CHECK_ERROR(hipFree(&value), hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("NullPtr") { HIP_CHECK(hipFree(nullptr)); }
|
||||
}
|
||||
|
||||
// Freeing a invalid pointer with on host
|
||||
TEST_CASE("Unit_hipFreeNegativeHost") {
|
||||
SECTION("NullPtr") { HIP_CHECK(hipHostFree(nullptr)); }
|
||||
SECTION("InvalidPtr") {
|
||||
char hostPtr;
|
||||
HIP_CHECK_ERROR(hipHostFree(&hostPtr), hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("hipHostRegister") {
|
||||
char* hostPtr = new char;
|
||||
auto flag = GENERATE(hipHostRegisterDefault, hipHostRegisterPortable, hipHostRegisterMapped);
|
||||
HIP_CHECK(hipHostRegister((void*)hostPtr, sizeof(char), flag));
|
||||
HIP_CHECK_ERROR(hipHostFree(hostPtr), hipErrorInvalidValue);
|
||||
delete hostPtr;
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
TEST_CASE("Unit_hipFreeNegativeArray") {
|
||||
DriverContext ctx;
|
||||
hipArray_t arrayPtr{};
|
||||
hiparray cuArrayPtr{};
|
||||
|
||||
SECTION("ArrayFree") { HIP_CHECK(hipFreeArray(nullptr)); }
|
||||
SECTION("ArrayDestroy") {
|
||||
HIP_CHECK_ERROR(hipArrayDestroy(nullptr), hipErrorInvalidResourceHandle);
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
// Freeing a invalid pointer with array
|
||||
TEST_CASE("Unit_hipFreeNegativeArray") {
|
||||
SECTION("ArrayFree") { HIP_CHECK_ERROR(hipFreeArray(nullptr), hipErrorInvalidValue); }
|
||||
SECTION("ArrayDestroy") { HIP_CHECK_ERROR(hipArrayDestroy(nullptr), hipErrorInvalidValue); }
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TEST_CASE("Unit_hipFreeDoubleDevice") {
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
char* ptr{};
|
||||
size_t size_mult = width;
|
||||
HIP_CHECK(hipMalloc(&ptr, sizeof(char) * size_mult));
|
||||
|
||||
HIP_CHECK(hipFree(ptr));
|
||||
HIP_CHECK_ERROR(hipFree(ptr), hipErrorInvalidValue);
|
||||
}
|
||||
TEST_CASE("Unit_hipFreeDoubleHost") {
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
char* ptr{};
|
||||
size_t size_mult = width;
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&ptr, sizeof(char) * size_mult));
|
||||
|
||||
HIP_CHECK(hipHostFree(ptr));
|
||||
HIP_CHECK_ERROR(hipHostFree(ptr), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
TEST_CASE("Unit_hipFreeDoubleArrayFree") {
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-120");
|
||||
return;
|
||||
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
size_t height = GENERATE(0, 32, 512, 1024);
|
||||
hipArray_t arrayPtr{};
|
||||
hipExtent extent{};
|
||||
extent.width = width;
|
||||
extent.height = height;
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<char>();
|
||||
|
||||
HIP_CHECK(hipMallocArray(&arrayPtr, &desc, extent.width, extent.height, hipArrayDefault));
|
||||
|
||||
HIP_CHECK(hipFreeArray(arrayPtr));
|
||||
HIP_CHECK_ERROR(hipFreeArray(arrayPtr), hipErrorContextIsDestroyed);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipFreeDoubleArrayDestroy") {
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-120");
|
||||
return;
|
||||
using vec_info = vector_info<char>;
|
||||
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
size_t height = GENERATE(0, 32, 512, 1024);
|
||||
DriverContext ctx{};
|
||||
|
||||
hiparray ArrayPtr{};
|
||||
HIP_ARRAY_DESCRIPTOR cuDesc;
|
||||
cuDesc.Width = width;
|
||||
cuDesc.Height = height;
|
||||
cuDesc.Format = vec_info::format;
|
||||
cuDesc.NumChannels = vec_info::size;
|
||||
HIP_CHECK(hipArrayCreate(&ArrayPtr, &cuDesc));
|
||||
HIP_CHECK(hipArrayDestroy(ArrayPtr));
|
||||
HIP_CHECK_ERROR(hipArrayDestroy(ArrayPtr), hipErrorContextIsDestroyed);
|
||||
}
|
||||
|
||||
#else // AMD
|
||||
|
||||
TEST_CASE("Unit_hipFreeDoubleArray") {
|
||||
size_t width = GENERATE(32, 512, 1024);
|
||||
size_t height = GENERATE(0, 32, 512, 1024);
|
||||
hipArray_t arrayPtr{};
|
||||
hipExtent extent{};
|
||||
extent.width = width;
|
||||
extent.height = height;
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<char>();
|
||||
|
||||
HIP_CHECK(hipMallocArray(&arrayPtr, &desc, extent.width, extent.height, hipArrayDefault));
|
||||
|
||||
SECTION("ArrayFree") {
|
||||
HIP_CHECK(hipFreeArray(arrayPtr));
|
||||
HIP_CHECK_ERROR(hipFreeArray(arrayPtr), hipErrorContextIsDestroyed);
|
||||
}
|
||||
SECTION("ArrayDestroy") {
|
||||
HIP_CHECK(hipArrayDestroy(arrayPtr));
|
||||
HIP_CHECK_ERROR(hipArrayDestroy(arrayPtr), hipErrorContextIsDestroyed);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeMultiTDev", "", char, int, float2, float4) {
|
||||
std::vector<TestType*> ptrs(numAllocs);
|
||||
size_t allocSize = sizeof(TestType) * GENERATE(1, 32, 64, 128);
|
||||
|
||||
for (auto& ptr : ptrs) {
|
||||
HIP_CHECK(hipMalloc(&ptr, allocSize));
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (auto ptr : ptrs) {
|
||||
threads.emplace_back(([ptr] {
|
||||
HIP_CHECK_THREAD(hipFree(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
}));
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeMultiTHost", "", char, int, float2, float4) {
|
||||
std::vector<TestType*> ptrs(numAllocs);
|
||||
size_t allocSize = sizeof(TestType) * GENERATE(1, 32, 64, 128);
|
||||
|
||||
for (auto& ptr : ptrs) {
|
||||
HIP_CHECK(hipHostMalloc(&ptr, allocSize));
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (auto ptr : ptrs) {
|
||||
threads.emplace_back(([ptr] {
|
||||
HIP_CHECK_THREAD(hipHostFree(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
}));
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeMultiTArray", "", char, int, float2, float4) {
|
||||
using vec_info = vector_info<TestType>;
|
||||
|
||||
size_t width = GENERATE(32, 128, 256, 512, 1024);
|
||||
size_t height = GENERATE(32, 128, 256, 512, 1024);
|
||||
DriverContext ctx;
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
|
||||
SECTION("ArrayDestroy") {
|
||||
std::vector<hiparray> ptrs(numAllocs);
|
||||
HIP_ARRAY_DESCRIPTOR cuDesc;
|
||||
cuDesc.Width = width;
|
||||
cuDesc.Height = height;
|
||||
cuDesc.Format = vec_info::format;
|
||||
cuDesc.NumChannels = vec_info::size;
|
||||
for (auto& ptr : ptrs) {
|
||||
HIP_CHECK(hipArrayCreate(&ptr, &cuDesc));
|
||||
}
|
||||
|
||||
|
||||
for (auto& ptr : ptrs) {
|
||||
threads.emplace_back(([ptr] {
|
||||
HIP_CHECK_THREAD(hipArrayDestroy(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
}));
|
||||
}
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
|
||||
SECTION("ArrayFree") {
|
||||
std::vector<hipArray_t> ptrs(numAllocs);
|
||||
hipExtent extent{};
|
||||
extent.width = width;
|
||||
extent.height = height;
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<TestType>();
|
||||
|
||||
for (auto& ptr : ptrs) {
|
||||
HIP_CHECK(hipMallocArray(&ptr, &desc, extent.width, extent.height, hipArrayDefault));
|
||||
}
|
||||
|
||||
for (auto ptr : ptrs) {
|
||||
SECTION("ArrayFree") {
|
||||
threads.emplace_back(([ptr] {
|
||||
HIP_CHECK_THREAD(hipFreeArray(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
}));
|
||||
}
|
||||
}
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipFreeMultiTArray", "", char, int, float2, float4) {
|
||||
using vec_info = vector_info<TestType>;
|
||||
|
||||
hipExtent extent{};
|
||||
extent.width = GENERATE(32, 128, 256, 512, 1024);
|
||||
extent.height = GENERATE(0, 32, 128, 256, 512, 1024);
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<TestType>();
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
SECTION("ArrayFree") {
|
||||
std::vector<hipArray_t> ptrs(numAllocs);
|
||||
for (auto& ptr : ptrs) {
|
||||
HIP_CHECK(hipMallocArray(&ptr, &desc, extent.width, extent.height, hipArrayDefault));
|
||||
threads.emplace_back([ptr] {
|
||||
HIP_CHECK_THREAD(hipFreeArray(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
});
|
||||
}
|
||||
}
|
||||
SECTION("ArrayDestroy") {
|
||||
std::vector<hiparray> cuArrayPtrs(numAllocs);
|
||||
|
||||
HIP_ARRAY_DESCRIPTOR cuDesc;
|
||||
cuDesc.Width = extent.width;
|
||||
cuDesc.Height = extent.height;
|
||||
cuDesc.Format = vec_info::format;
|
||||
cuDesc.NumChannels = vec_info::size;
|
||||
for (auto ptr : cuArrayPtrs) {
|
||||
HIP_CHECK(hipArrayCreate(&ptr, &cuDesc));
|
||||
|
||||
threads.emplace_back([ptr] {
|
||||
HIP_CHECK_THREAD(hipArrayDestroy(ptr));
|
||||
HIP_CHECK_THREAD(hipStreamQuery(nullptr));
|
||||
});
|
||||
}
|
||||
}
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,16 +1,13 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -28,12 +25,13 @@ THE SOFTWARE.
|
||||
/*
|
||||
* This testcase verifies hipMemGetInfo API
|
||||
* 1. Different memory chunk allocation
|
||||
* 1.1. hipMalloc
|
||||
* 1.1. hipMalloc - smallest memory chunck that can be allocated is 1024
|
||||
* 1.2. hipMallocArray
|
||||
* 1.3. hipMalloc3D
|
||||
* 1.3. hipMalloc3DArray
|
||||
* 2. Allocation using different threads
|
||||
* 3. Negative: Invalid args
|
||||
*
|
||||
*/
|
||||
|
||||
struct MinAlloc {
|
||||
@@ -71,9 +69,9 @@ struct MinAlloc {
|
||||
// if the memory being allocated is not divisible by the minimum allocation add an extra minimum
|
||||
// allocation AddedAllocation = InitialAllocation + (MinAllocation - divisionRemainer)
|
||||
void fixAllocSize(size_t& allocation) {
|
||||
REQUIRE(MinAlloc::Get() != 0);
|
||||
REQUIRE(MinAlloc::Get() >= 0);
|
||||
if (allocation % MinAlloc::Get() != 0) {
|
||||
auto adjustment = allocation % MinAlloc::Get();
|
||||
auto adjustment = allocation % MinAlloc::Get(); // FIXME This does mod by zero
|
||||
adjustment = MinAlloc::Get() - adjustment;
|
||||
allocation = allocation + adjustment;
|
||||
}
|
||||
@@ -88,6 +86,48 @@ void fixAllocSize(size_t& allocation) {
|
||||
<< "Memory assumed to be used: \t\t" << usedMem);
|
||||
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_DifferentMallocSmall") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
|
||||
unsigned int* A_mem{nullptr};
|
||||
size_t freeMemRet;
|
||||
size_t totalMemRet;
|
||||
// allocate smaller chunk than minimum
|
||||
size_t Malloc1Size = 2;
|
||||
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size));
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, Malloc1Size);
|
||||
|
||||
auto assumedFreeMem = freeMemInit - Malloc1Size;
|
||||
// Free memory should be less than assumed for
|
||||
// single allocation smaller than min allocation chunk
|
||||
REQUIRE(freeMemRet < assumedFreeMem);
|
||||
// confirms that allocated memory is at least equal to smallest allocation
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
|
||||
// allocate smallest chunk of memory
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), MinAlloc::Get()));
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, MinAlloc::Get());
|
||||
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
// confirms that allocated memory is at least equal to smallest allocation
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
}
|
||||
|
||||
#if 0 // FIXME_jatinx Disabled for now because the formula to calulcate memget info is incorrect
|
||||
// To be enabled after correct formula is found.
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_DifferentMallocLarge") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
@@ -109,10 +149,12 @@ TEST_CASE("Unit_hipMemGetInfo_DifferentMallocLarge") {
|
||||
auto Malloc1Size = freeMemInit >> 1;
|
||||
// if the allocation is not divisible by the MinAllocation
|
||||
// take into account and add padding
|
||||
fixAllocSize(Malloc1Size);
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size));
|
||||
|
||||
// allocate an extra quarter of free mem
|
||||
auto Malloc2Size = Malloc1Size >> 1;
|
||||
fixAllocSize(Malloc2Size);
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_mem), Malloc2Size));
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
@@ -129,29 +171,6 @@ TEST_CASE("Unit_hipMemGetInfo_DifferentMallocLarge") {
|
||||
HIP_CHECK(hipFree(B_mem));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_DifferentMallocSmall") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
|
||||
unsigned int* A_mem{nullptr};
|
||||
size_t freeMemRet;
|
||||
size_t totalMemRet;
|
||||
// allocate smaller chunk than minimum
|
||||
size_t Malloc1Size = 1;
|
||||
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size));
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, Malloc1Size);
|
||||
|
||||
auto assumedFreeMem = freeMemInit - Malloc1Size;
|
||||
// Free memory should be less than assumed for
|
||||
// single allocation smaller than min allocation chunk
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_DifferentMallocMultiSmall") {
|
||||
size_t freeMemInit;
|
||||
@@ -175,13 +194,52 @@ TEST_CASE("Unit_hipMemGetInfo_DifferentMallocMultiSmall") {
|
||||
|
||||
|
||||
auto assumedFreeMem = freeMemInit - (MallocSize * 2);
|
||||
// freeMemRet should be FreeMem - (1 * MinAlloc)
|
||||
// instead of FreeMem - (MinAlloc * 2)
|
||||
// since MinAlloc > MallocSize*2
|
||||
REQUIRE(freeMemRet < assumedFreeMem);
|
||||
fixAllocSize(MallocSize);
|
||||
assumedFreeMem = freeMemInit - (MallocSize * 2);
|
||||
// Ensure memory allocated is less than 2 * minimum allocation
|
||||
REQUIRE(freeMemRet > assumedFreeMem);
|
||||
|
||||
// Confirm mem alocation results
|
||||
// confirms that allocated memory is at least equal to Min Allocation
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
HIP_CHECK(hipFree(B_mem));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_DifferentMallocNotDiv") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
|
||||
unsigned int* A_mem{nullptr};
|
||||
size_t freeMemRet;
|
||||
size_t totalMemRet;
|
||||
// Allocate memory that is just a bit larger than the min allocation
|
||||
// Expected behaviour is to allocate 2x min allocation size
|
||||
size_t MallocSize = MinAlloc::Get() + 1;
|
||||
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), MallocSize));
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, MallocSize);
|
||||
|
||||
|
||||
auto freeMemExpected = freeMemInit - MallocSize;
|
||||
// Free Memory after allocation should be less than
|
||||
// expected free memory
|
||||
REQUIRE(freeMemRet < freeMemExpected);
|
||||
// confirms that allocated memory is at least 2 x Min Allocaton
|
||||
fixAllocSize(MallocSize);
|
||||
freeMemExpected = freeMemInit - MallocSize;
|
||||
REQUIRE(freeMemRet <= freeMemExpected);
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
}
|
||||
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemGetInfo_MallocArray", "", int, int4, char) {
|
||||
// get initial mem data
|
||||
size_t freeMemInit;
|
||||
@@ -209,6 +267,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemGetInfo_MallocArray", "", int, int4, char) {
|
||||
size_t usedMem = bytesPerItem * extent.width * (extent.height != 0 ? extent.height : 1);
|
||||
|
||||
// ensure we allocate at least the min allocation for the array
|
||||
fixAllocSize(usedMem);
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, usedMem);
|
||||
|
||||
size_t assumedFreeMem = freeMemInit - usedMem;
|
||||
@@ -227,9 +286,9 @@ TEST_CASE("Unit_hipMemGetInfo_Malloc3D") {
|
||||
// Allocate 3D object
|
||||
hipExtent extent{};
|
||||
// extent is given in bytes for with
|
||||
extent.width = GENERATE(32, 128, 256);
|
||||
extent.height = GENERATE(32, 128, 256);
|
||||
extent.depth = GENERATE(32, 128, 256);
|
||||
extent.width = GENERATE(32, 128, 256, 512);
|
||||
extent.height = GENERATE(32, 128, 256, 512);
|
||||
extent.depth = GENERATE(32, 128, 256, 512);
|
||||
hipPitchedPtr A_mem{};
|
||||
HIP_CHECK(hipMalloc3D(&A_mem, extent));
|
||||
|
||||
@@ -240,6 +299,7 @@ TEST_CASE("Unit_hipMemGetInfo_Malloc3D") {
|
||||
|
||||
// Verify result
|
||||
size_t mallocSize = A_mem.pitch * extent.height * extent.depth;
|
||||
fixAllocSize(mallocSize);
|
||||
|
||||
size_t assumedFreeMem = freeMemInit - mallocSize;
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, mallocSize);
|
||||
@@ -313,6 +373,9 @@ TEMPLATE_TEST_CASE("Unit_hipMemGetInfo_Malloc3DArray", "", char, int, int4) {
|
||||
REQUIRE(mallocSize <= static_cast<size_t>(MinAlloc::Get()));
|
||||
|
||||
} else {
|
||||
// account for min allocation
|
||||
fixAllocSize(mallocSize);
|
||||
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, mallocSize);
|
||||
size_t assumedFreeMem = freeMemInit - mallocSize;
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
@@ -332,13 +395,15 @@ TEST_CASE("Unit_hipMemGetInfo_ParaLarge") {
|
||||
auto Malloc1Size = freeMemInit >> 1;
|
||||
// if the allocation is not divisible by the MinAllocation
|
||||
// take into account and add padding
|
||||
fixAllocSize(Malloc1Size);
|
||||
std::thread t1(
|
||||
[&] { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size)); });
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size)); });
|
||||
|
||||
// allocate an extra quarter of free mem
|
||||
auto Malloc2Size = Malloc1Size >> 1;
|
||||
fixAllocSize(Malloc2Size);
|
||||
std::thread t2(
|
||||
[&] { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&B_mem), Malloc2Size)); });
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&B_mem), Malloc2Size)); });
|
||||
|
||||
t1.join();
|
||||
t2.join();
|
||||
@@ -356,16 +421,18 @@ TEST_CASE("Unit_hipMemGetInfo_ParaLarge") {
|
||||
HIP_CHECK(hipFree(B_mem));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_ParaSmall") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
unsigned int* A_mem{nullptr};
|
||||
// allocate smaller chunk than minimum
|
||||
size_t Malloc1Size = 1;
|
||||
size_t Malloc1Size = 2;
|
||||
|
||||
std::thread t1(
|
||||
[&] { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size)) });
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size)) });
|
||||
t1.join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
size_t freeMemRet;
|
||||
@@ -377,13 +444,101 @@ TEST_CASE("Unit_hipMemGetInfo_ParaSmall") {
|
||||
auto assumedFreeMem = freeMemInit - Malloc1Size;
|
||||
// Free memory should be less than assumed for
|
||||
// single allocation smaller than min allocation chunk
|
||||
REQUIRE(freeMemRet < assumedFreeMem);
|
||||
// confirms that allocated memory is at least equal to smallest allocation allowed
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
|
||||
// allocate smallest chunck of memory
|
||||
std::thread t2(
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), MinAlloc::Get())); });
|
||||
t2.join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, MinAlloc::Get());
|
||||
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_ParaNonDiv") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
unsigned int* A_mem{nullptr};
|
||||
|
||||
// Allocate memory that is just 1 byte larger than the min allocation
|
||||
// Expected behaviour is to allocate 2x min allocation size
|
||||
size_t Malloc1Size = MinAlloc::Get() + 1;
|
||||
|
||||
std::thread t1(
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), Malloc1Size)); });
|
||||
t1.join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
|
||||
size_t freeMemRet;
|
||||
size_t totalMemRet;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, Malloc1Size);
|
||||
|
||||
|
||||
auto allocSize = freeMemInit - Malloc1Size;
|
||||
// should not be equal
|
||||
REQUIRE(freeMemRet != allocSize);
|
||||
// confirms that allocated memory is equal to 2 x Min Allocaton
|
||||
allocSize = MinAlloc::Get() * 2;
|
||||
auto assumedAllocSize = freeMemInit - allocSize;
|
||||
REQUIRE(freeMemRet <= assumedAllocSize);
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_ParaMultiSmall") {
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
unsigned int* A_mem{nullptr};
|
||||
unsigned int* B_mem{nullptr};
|
||||
|
||||
// Allocate memory that is a quarter of the min allocation
|
||||
// Expected behaviour is to reuse the min allocation memory
|
||||
size_t MallocSize = MinAlloc::Get() >> 2;
|
||||
|
||||
std::thread t1(
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&A_mem), MallocSize)); });
|
||||
std::thread t2(
|
||||
[&]() { HIP_CHECK_THREAD(hipMalloc(reinterpret_cast<void**>(&B_mem), MallocSize)); });
|
||||
|
||||
t1.join();
|
||||
t2.join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
|
||||
size_t freeMemRet;
|
||||
size_t totalMemRet;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, &totalMemRet));
|
||||
MEMINFO(totalMemRet, freeMemInit, freeMemRet, MallocSize * 2);
|
||||
|
||||
auto assumedFreeMem = freeMemInit - MallocSize * 2;
|
||||
// freeMemRet should be less than assumedFreeMem
|
||||
REQUIRE(freeMemRet < assumedFreeMem);
|
||||
// confirms that allocated memory is equal to Min Allocation
|
||||
assumedFreeMem = freeMemInit - MinAlloc::Get();
|
||||
REQUIRE(freeMemRet <= assumedFreeMem);
|
||||
HIP_CHECK(hipFree(A_mem));
|
||||
HIP_CHECK(hipFree(B_mem));
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("Unit_hipMemGetInfo_Negative") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST(" EXSWCPHIPT-61");
|
||||
return;
|
||||
#endif
|
||||
size_t freeMemInit;
|
||||
size_t totalMemInit;
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemInit, &totalMemInit));
|
||||
@@ -414,10 +569,6 @@ TEST_CASE("Unit_hipMemGetInfo_Negative") {
|
||||
HIP_CHECK(hipMemGetInfo(&freeMemRet, totalMemRet));
|
||||
}
|
||||
SECTION("Nullptr as both params passed to hipMemGetInfo") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-135");
|
||||
return;
|
||||
#endif
|
||||
size_t* freeMemRet = nullptr;
|
||||
size_t* totalMemRet = nullptr;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_mem), MallocSize));
|
||||
|
||||
@@ -0,0 +1,227 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of intge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "MemUtils.hh"
|
||||
|
||||
/*
|
||||
* These testcases verify that synchronization behaviour for memcpy functions with respect to
|
||||
* the host.
|
||||
*/
|
||||
|
||||
using namespace mem_utils;
|
||||
|
||||
// value used for memset operations
|
||||
constexpr int testValue = 0x11;
|
||||
|
||||
|
||||
/*
|
||||
* Set of helper functions handling the different cases for memcpy
|
||||
*/
|
||||
|
||||
static inline hipMemcpyKind getMemcpyType(allocType type, bool fromHost) {
|
||||
if (fromHost) {
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
return hipMemcpyHostToDevice;
|
||||
break;
|
||||
case allocType::devRegistered:
|
||||
return hipMemcpyHostToDevice;
|
||||
break;
|
||||
default: // host
|
||||
return hipMemcpyHostToHost;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
return hipMemcpyDeviceToDevice;
|
||||
break;
|
||||
case allocType::devRegistered:
|
||||
return hipMemcpyDeviceToDevice;
|
||||
break;
|
||||
default: // host
|
||||
return hipMemcpyDeviceToHost;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memcpyCheck(allocType type, memType memType, char* aPtr, MultiDData& data,
|
||||
char* fillerData, bool async, hipStream_t stream, bool fromHost) {
|
||||
auto cpyType = getMemcpyType(type, fromHost);
|
||||
auto sizeInBytes = data.pitch * data.getH() * data.getD() * sizeof(char);
|
||||
switch (memType) {
|
||||
case memType::hipMem:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemcpyAsync(aPtr + data.offset, fillerData, sizeInBytes, cpyType, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpy(aPtr + data.offset, fillerData, sizeInBytes, cpyType));
|
||||
}
|
||||
break;
|
||||
case memType::hipMem2D:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemcpy2DAsync(aPtr + data.offset, data.pitch, fillerData, sizeInBytes,
|
||||
data.width, data.getH(), cpyType, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpy2D(aPtr + data.offset, data.pitch, fillerData, sizeInBytes, data.width,
|
||||
data.getH(), cpyType));
|
||||
}
|
||||
break;
|
||||
case memType::hipMem3D: {
|
||||
hipMemcpy3DParms params{};
|
||||
params.kind = cpyType;
|
||||
params.srcPos = make_hipPos(0, 0, 0);
|
||||
params.dstPos = make_hipPos(data.offset, data.offset, data.offset);
|
||||
params.srcPtr = make_hipPitchedPtr(fillerData, data.width, data.width, data.getH());
|
||||
params.dstPtr = make_hipPitchedPtr(aPtr, data.pitch, data.width, data.getH());
|
||||
hipExtent extent;
|
||||
extent.width = data.width * sizeof(char);
|
||||
extent.height = data.getH();
|
||||
extent.depth = data.getD();
|
||||
|
||||
params.extent = extent;
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemcpy3DAsync(¶ms, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline char* createFillerData(size_t count, size_t value, bool fromHost) {
|
||||
if (fromHost) {
|
||||
char* fillerData = new char[count];
|
||||
std::fill(fillerData, fillerData + count, value);
|
||||
return fillerData;
|
||||
} else {
|
||||
char* fillerData;
|
||||
HIP_CHECK(hipMalloc(&fillerData, count * sizeof(char)));
|
||||
HIP_CHECK(hipMemset(fillerData, value, count * sizeof(char)));
|
||||
return fillerData;
|
||||
}
|
||||
}
|
||||
|
||||
static void checkForSync(hipStream_t stream, bool async, allocType type, bool fromHost) {
|
||||
if (fromHost) {
|
||||
if (type == allocType::deviceMalloc) {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorNotReady);
|
||||
} else {
|
||||
HIP_CHECK(hipStreamQuery(stream));
|
||||
}
|
||||
} else {
|
||||
if (type != allocType::deviceMalloc && !async) {
|
||||
HIP_CHECK(hipStreamQuery(stream));
|
||||
} else {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Helper function to run tests for hipMemset allocation types
|
||||
static void runMemcpyTests(hipStream_t stream, bool async, allocType type, memType memType,
|
||||
MultiDData data) {
|
||||
bool fromHost = GENERATE(true, false);
|
||||
|
||||
std::pair<char*, char*> aPtr = initMemory<char>(type, memType, data);
|
||||
size_t sizeInBytes = data.getCount();
|
||||
|
||||
// filler data for device memory created beforehand as it uses memset
|
||||
// which might interfere with synchronization testing
|
||||
auto fillerData = createFillerData(sizeInBytes, testValue, fromHost);
|
||||
CAPTURE(type, memType, data.width, data.height, data.depth, stream, async, fromHost, sizeInBytes);
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
const std::chrono::duration<uint64_t, std::milli> delay = 100ms;
|
||||
HipTest::runKernelForDuration(delay, stream);
|
||||
|
||||
memcpyCheck(type, memType, aPtr.first, data, fillerData, async, stream, fromHost);
|
||||
checkForSync(stream, async, type, fromHost);
|
||||
// verify
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
verifyData(aPtr.first, testValue, data, type, memType);
|
||||
if (type == allocType::devRegistered) {
|
||||
freeStuff(aPtr.second, type);
|
||||
} else {
|
||||
freeStuff(aPtr.first, type);
|
||||
}
|
||||
if (fromHost) {
|
||||
delete[] fillerData;
|
||||
} else {
|
||||
HIP_CHECK(hipFree(fillerData));
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_AMD /* Disabled because frequency based wait is timing out on nvidia platforms */
|
||||
|
||||
TEST_CASE("Unit_hipMemcpySync") {
|
||||
#if HT_AMD // To be removed when EXSWCPHIPT-127 is fixed
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127 - Sync behaviour differs on AMD and Nvidia");
|
||||
return;
|
||||
#endif
|
||||
allocType type = GENERATE(allocType::deviceMalloc, allocType::hostMalloc, allocType::hostRegisted,
|
||||
allocType::devRegistered);
|
||||
memType memcpy_type = memType::hipMem;
|
||||
MultiDData data;
|
||||
data.width = 1;
|
||||
|
||||
doMemTest<char>(runMemcpyTests, type, memcpy_type, data); // Uses long running kernel
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DSync") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127 - Sync behaviour differs on AMD and Nvidia");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
|
||||
memType memcpy_type = memType::hipMem2D;
|
||||
MultiDData data;
|
||||
data.width = 1;
|
||||
data.height = 1;
|
||||
|
||||
doMemTest<char>(runMemcpyTests, mallocType, memcpy_type, data);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy3DSync") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127 - Sync behaviour differs on AMD and Nvidia");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
|
||||
memType memcpy_type = memType::hipMem3D;
|
||||
MultiDData data;
|
||||
data.width = 1;
|
||||
data.height = 1;
|
||||
data.depth = 1;
|
||||
|
||||
doMemTest<char>(runMemcpyTests, mallocType, memcpy_type, data);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of intge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "MemUtils.hh"
|
||||
|
||||
/*
|
||||
* This testcase verifies that asynchronous memset functions are asynchronous with respect to the
|
||||
* host except when the target is pinned host memory or a Unified Memory region
|
||||
*/
|
||||
|
||||
constexpr int testValue1 = 97;
|
||||
constexpr int testValue2 = 98;
|
||||
|
||||
|
||||
using namespace mem_utils;
|
||||
|
||||
// Helper function to run tests for hipMemset allocation types
|
||||
template <typename T>
|
||||
void runAsyncTests(hipStream_t stream, allocType type, memType memType, MultiDData data1,
|
||||
MultiDData data2) {
|
||||
std::pair<T*, T*> aPtr{};
|
||||
MultiDData totalRange;
|
||||
totalRange.width = data1.width + data2.width;
|
||||
totalRange.height = data1.height + data2.height;
|
||||
totalRange.depth = data1.depth + data2.depth;
|
||||
aPtr = initMemory<T>(type, memType, totalRange);
|
||||
data1.pitch = totalRange.pitch;
|
||||
data2.pitch = totalRange.pitch;
|
||||
|
||||
memsetCheck(aPtr.first, testValue1, memType, data1, stream);
|
||||
memsetCheck(aPtr.first, testValue2, memType, data2, stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
verifyData(aPtr.first, testValue1, data1, type, memType);
|
||||
verifyData(aPtr.first, testValue2, data2, type, memType);
|
||||
|
||||
|
||||
if (type == allocType::devRegistered) {
|
||||
freeStuff(aPtr.second, type);
|
||||
} else {
|
||||
freeStuff(aPtr.first, type);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void doMemsetTest(allocType mallocType, memType memset_type, MultiDData data1,
|
||||
MultiDData data2) {
|
||||
enum StreamType { NULLSTR, CREATEDSTR };
|
||||
auto streamType = GENERATE(NULLSTR, CREATEDSTR);
|
||||
hipStream_t stream{nullptr};
|
||||
|
||||
if (streamType == CREATEDSTR) HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
runAsyncTests<T>(stream, mallocType, memset_type, data1, data2);
|
||||
|
||||
if (streamType == CREATEDSTR) HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/*
|
||||
* test 2 async hipMemset's on the same memory at different offsets
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemsetASyncMulti") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::hostMalloc, allocType::deviceMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
memType mem_type = memType::hipMemsetD8;
|
||||
MultiDData data1;
|
||||
data1.offset = 0;
|
||||
data1.width = GENERATE(1, 256);
|
||||
MultiDData data2;
|
||||
data2.width = data1.width;
|
||||
|
||||
data2.offset = data1.width;
|
||||
doMemsetTest<char>(mallocType, mem_type, data1, data2);
|
||||
}
|
||||
|
||||
/*
|
||||
* test 2 async hipMemsetD[8,16,32]'s on the same memory at different offsets
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemsetDASyncMulti", "", int8_t, int16_t, uint32_t) {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::hostRegisted, allocType::deviceMalloc,
|
||||
allocType::hostMalloc, allocType::devRegistered);
|
||||
memType memset_type;
|
||||
MultiDData data1;
|
||||
data1.offset = 0;
|
||||
data1.width = GENERATE(1, 256);
|
||||
MultiDData data2;
|
||||
data2.width = data1.width;
|
||||
data2.offset = data1.width;
|
||||
|
||||
if (std::is_same<int8_t, TestType>::value) {
|
||||
memset_type = memType::hipMemsetD8;
|
||||
} else if (std::is_same<int16_t, TestType>::value) {
|
||||
memset_type = memType::hipMemsetD16;
|
||||
} else if (std::is_same<uint32_t, TestType>::value) {
|
||||
memset_type = memType::hipMemsetD32;
|
||||
}
|
||||
doMemsetTest<TestType>(mallocType, memset_type, data1, data2);
|
||||
}
|
||||
|
||||
/*
|
||||
* test 2 async hipMemset2D's on the same memory at different offsets
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemset2DASyncMulti") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
memType memset_type = memType::hipMem2D;
|
||||
MultiDData data1;
|
||||
data1.offset = 0;
|
||||
data1.width = GENERATE(1, 256);
|
||||
data1.height = data1.width;
|
||||
MultiDData data2;
|
||||
data2.width = data1.width;
|
||||
data2.height = data1.height;
|
||||
data2.offset = data1.width;
|
||||
|
||||
doMemsetTest<char>(mallocType, memset_type, data1, data2);
|
||||
}
|
||||
/*
|
||||
* test 2 async hipMemset3D's on the same memory at different offsets
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemset3DASyncMulti") {
|
||||
#if HT_AMD
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-127");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
memType memset_type = memType::hipMem3D;
|
||||
MultiDData data1;
|
||||
data1.offset = 0;
|
||||
data1.width = GENERATE(1, 256);
|
||||
data1.height = data1.width;
|
||||
data1.depth = data1.width;
|
||||
MultiDData data2;
|
||||
data2.width = data1.width;
|
||||
data2.height = data1.width;
|
||||
data2.depth = data1.width;
|
||||
data2.offset = data1.width;
|
||||
|
||||
doMemsetTest<char>(mallocType, memset_type, data1, data2);
|
||||
}
|
||||
@@ -0,0 +1,505 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of intge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
/*
|
||||
* These testcases verify that synchronous memset functions are asynchronous with respect to the
|
||||
* host except when the target is pinned host memory or a Unified Memory region
|
||||
*/
|
||||
|
||||
// value used for memset operations
|
||||
constexpr int testValue = 0x11;
|
||||
|
||||
enum class allocType { deviceMalloc, hostMalloc, hostRegisted, devRegistered };
|
||||
enum class memSetType {
|
||||
hipMemset,
|
||||
hipMemsetD8,
|
||||
hipMemsetD16,
|
||||
hipMemsetD32,
|
||||
hipMemset2D,
|
||||
hipMemset3D
|
||||
};
|
||||
|
||||
// helper struct containing vars needed for 2D and 3D memset Testing
|
||||
struct MultiDData {
|
||||
size_t width{};
|
||||
// set to 0 for 1D
|
||||
size_t height{};
|
||||
// set to 0 for 2D
|
||||
size_t depth{};
|
||||
size_t pitch{};
|
||||
};
|
||||
|
||||
// set of helper functions to tidy the nested switch statements
|
||||
template <typename T>
|
||||
static std::pair<T*,T*> deviceMallocHelper(memSetType memType, size_t dataW, size_t dataH, size_t dataD,
|
||||
size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr{};
|
||||
switch (memType) {
|
||||
case memSetType::hipMemset3D: {
|
||||
hipPitchedPtr pitchedAPtr{};
|
||||
hipExtent extent;
|
||||
extent.width = dataW * elementSize;
|
||||
extent.height = dataH;
|
||||
extent.depth = dataD;
|
||||
|
||||
pitchedAPtr =
|
||||
make_hipPitchedPtr(aPtr, extent.width, extent.width / elementSize, extent.height);
|
||||
HIP_CHECK(hipMalloc3D(&pitchedAPtr, extent));
|
||||
aPtr = reinterpret_cast<T*>(pitchedAPtr.ptr);
|
||||
dataPitch = pitchedAPtr.pitch;
|
||||
break;
|
||||
}
|
||||
|
||||
case memSetType::hipMemset2D:
|
||||
HIP_CHECK(
|
||||
hipMallocPitch(reinterpret_cast<void**>(&aPtr), &dataPitch, dataW * elementSize, dataH));
|
||||
|
||||
dataPitch = dataW * elementSize;
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMalloc(&aPtr, sizeInBytes));
|
||||
dataPitch = dataW * elementSize;
|
||||
break;
|
||||
}
|
||||
return std::make_pair(aPtr, nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::pair<T*, T*> hostMallocHelper(size_t dataW, size_t dataH, size_t dataD, size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr;
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&aPtr, sizeInBytes));
|
||||
dataPitch = dataW * elementSize;
|
||||
|
||||
return std::make_pair(aPtr, nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::pair<T*, T*> hostRegisteredHelper(size_t dataW, size_t dataH, size_t dataD, size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr = new T[dataW * dataH * dataD];
|
||||
|
||||
HIP_CHECK(hipHostRegister(aPtr, sizeInBytes, hipHostRegisterDefault));
|
||||
|
||||
dataPitch = dataW * elementSize;
|
||||
return std::make_pair(aPtr, nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::pair<T*, T*> devRegisteredHelper(size_t dataW, size_t dataH, size_t dataD,
|
||||
size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
T* aPtr = new T[dataW * dataH * dataD];
|
||||
T* retPtr;
|
||||
|
||||
HIP_CHECK(hipHostRegister(aPtr, sizeInBytes, hipHostRegisterDefault));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&retPtr), aPtr, 0));
|
||||
|
||||
dataPitch = dataW * elementSize;
|
||||
// keep the address of the host memory
|
||||
return std::make_pair(retPtr, aPtr);
|
||||
}
|
||||
|
||||
// helper function to allocate memory and set it to a value.
|
||||
// retunr a pair of pointers due to the device registered allocation case, we need to keep track of
|
||||
// the pointer to host memory to be able to unregister and free it
|
||||
template <typename T>
|
||||
static std::pair<T*, T*> initMemory(allocType type, memSetType memType, MultiDData& data) {
|
||||
size_t dataH = data.height == 0 ? 1 : data.height;
|
||||
size_t dataD = data.depth == 0 ? 1 : data.depth;
|
||||
std::pair<T*, T*> retPtr{};
|
||||
// check different types of allocation
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
retPtr = deviceMallocHelper<T>(memType, data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::hostMalloc:
|
||||
retPtr = hostMallocHelper<T>(data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::hostRegisted:
|
||||
retPtr = hostRegisteredHelper<T>(data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
|
||||
case allocType::devRegistered:
|
||||
retPtr = devRegisteredHelper<T>(data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
|
||||
default:
|
||||
REQUIRE(false);
|
||||
break;
|
||||
}
|
||||
return retPtr;
|
||||
}
|
||||
|
||||
// set of helper functions to tidy the nested switch statements
|
||||
template <typename T>
|
||||
static void deviceMallocCopy(memSetType memType, T* aPtr, T* hostMem, size_t dataW, size_t dataH,
|
||||
size_t dataD, size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
switch (memType) {
|
||||
case memSetType::hipMemset3D: {
|
||||
hipMemcpy3DParms params{};
|
||||
params.kind = hipMemcpyDeviceToHost;
|
||||
params.srcPos = make_hipPos(0, 0, 0);
|
||||
params.srcPtr = make_hipPitchedPtr(aPtr, dataPitch, dataW, dataH);
|
||||
params.dstPos = make_hipPos(0, 0, 0);
|
||||
params.dstPtr = make_hipPitchedPtr(hostMem, dataPitch, dataW, dataH);
|
||||
|
||||
hipExtent extent;
|
||||
extent.width = dataPitch;
|
||||
extent.height = dataH;
|
||||
extent.depth = dataD;
|
||||
|
||||
params.extent = extent;
|
||||
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memSetType::hipMemset2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyDeviceToHost));
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyDeviceToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void hostCopy(memSetType memType, T* aPtr, T* hostMem, size_t dataW, size_t dataH,
|
||||
size_t dataD, size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
hipMemcpy3DParms params{};
|
||||
switch (memType) {
|
||||
case memSetType::hipMemset3D: {
|
||||
params.kind = hipMemcpyHostToHost;
|
||||
params.srcPos = make_hipPos(0, 0, 0);
|
||||
params.dstPos = make_hipPos(0, 0, 0);
|
||||
params.srcPtr = make_hipPitchedPtr(aPtr, dataPitch, dataW, dataH);
|
||||
params.dstPtr = make_hipPitchedPtr(hostMem, dataW, dataW, dataH);
|
||||
|
||||
hipExtent extent;
|
||||
extent.width = dataW;
|
||||
extent.height = dataH;
|
||||
extent.depth = dataD;
|
||||
|
||||
params.extent = extent;
|
||||
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memSetType::hipMemset2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyHostToHost));
|
||||
break;
|
||||
|
||||
default:
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyHostToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void devRegisteredCopy(memSetType memType, T* aPtr, T* hostMem, size_t dataW, size_t dataH,
|
||||
size_t dataD, size_t& dataPitch) {
|
||||
size_t elementSize = sizeof(T);
|
||||
|
||||
switch (memType) {
|
||||
case memSetType::hipMemset3D: {
|
||||
hipMemcpy3DParms params{};
|
||||
params.kind = hipMemcpyHostToHost;
|
||||
params.srcPos = make_hipPos(0, 0, 0);
|
||||
params.dstPos = make_hipPos(0, 0, 0);
|
||||
params.srcPtr = make_hipPitchedPtr(aPtr, dataPitch, dataW, dataH);
|
||||
params.dstPtr = make_hipPitchedPtr(hostMem, dataW, dataW, dataH);
|
||||
|
||||
hipExtent extent;
|
||||
extent.width = dataW;
|
||||
extent.height = dataH;
|
||||
extent.depth = dataD;
|
||||
|
||||
params.extent = extent;
|
||||
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
break;
|
||||
}
|
||||
|
||||
case memSetType::hipMemset2D:
|
||||
HIP_CHECK(hipMemcpy2D(hostMem, dataW * elementSize, aPtr, dataPitch, dataW, dataH,
|
||||
hipMemcpyDeviceToHost));
|
||||
break;
|
||||
|
||||
default: {
|
||||
size_t sizeInBytes = elementSize * dataW * dataH * dataD;
|
||||
HIP_CHECK(hipMemcpy(hostMem, aPtr, sizeInBytes, hipMemcpyDeviceToHost));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copies device data to host and checks that each element is equal to the
|
||||
// specified value
|
||||
template <typename T>
|
||||
void verifyData(T* aPtr, size_t value, MultiDData& data, allocType type, memSetType memType) {
|
||||
auto dataH = data.height == 0 ? 1 : data.height;
|
||||
auto dataD = data.depth == 0 ? 1 : data.depth;
|
||||
std::unique_ptr<T[]> hostPtr = std::make_unique<T[]>(data.pitch * dataH * dataD / sizeof(T));
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
deviceMallocCopy(memType, aPtr, hostPtr.get(), data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
case allocType::devRegistered:
|
||||
devRegisteredCopy(memType, aPtr, hostPtr.get(), data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
default: // host allocated or host registered memory
|
||||
hostCopy(memType, aPtr, hostPtr.get(), data.width, dataH, dataD, data.pitch);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
size_t idx;
|
||||
bool allMatch = true;
|
||||
|
||||
for (size_t k = 0; k < dataD; k++) {
|
||||
for (size_t j = 0; j < dataH; j++) {
|
||||
for (size_t i = 0; i < data.width; i++) {
|
||||
idx = data.pitch * dataH * k + data.pitch * j + i;
|
||||
allMatch = allMatch && static_cast<size_t>(hostPtr.get()[idx]) == value;
|
||||
if (!allMatch) REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// macro to allow reuse of functions for testing versions of hipMemset
|
||||
template <typename T>
|
||||
void memsetCheck(T* aPtr, size_t value, memSetType memsetType, MultiDData& data, bool async = false,
|
||||
hipStream_t stream = nullptr) {
|
||||
size_t dataW = data.width;
|
||||
size_t dataH = data.height == 0 ? 1 : data.height;
|
||||
size_t dataD = data.depth == 0 ? 1 : data.depth;
|
||||
size_t count = dataW * dataH * dataD;
|
||||
|
||||
switch (memsetType) {
|
||||
case memSetType::hipMemset:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetAsync(aPtr, value, count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemset(aPtr, value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memSetType::hipMemsetD8:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD8(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count));
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case memSetType::hipMemsetD16:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD16(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memSetType::hipMemsetD32:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemsetD32(reinterpret_cast<hipDeviceptr_t>(aPtr), value, count));
|
||||
}
|
||||
break;
|
||||
|
||||
case memSetType::hipMemset2D:
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemset2DAsync(aPtr, data.pitch, value, data.width, data.height, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemset2D(aPtr, data.pitch, value, data.width, data.height));
|
||||
}
|
||||
break;
|
||||
|
||||
case memSetType::hipMemset3D:
|
||||
hipExtent extent;
|
||||
extent.width = data.width;
|
||||
extent.height = data.height;
|
||||
extent.depth = data.depth;
|
||||
if (async) {
|
||||
HIP_CHECK(hipMemset3DAsync(make_hipPitchedPtr(aPtr, data.pitch, data.width, data.height),
|
||||
value, extent, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemset3D(make_hipPitchedPtr(aPtr, data.pitch, data.width, data.height), value,
|
||||
extent));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
REQUIRE(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> void freeStuff(T* aPtr, allocType type) {
|
||||
switch (type) {
|
||||
case allocType::deviceMalloc:
|
||||
hipFree(aPtr);
|
||||
break;
|
||||
case allocType::hostMalloc:
|
||||
hipHostFree(aPtr);
|
||||
break;
|
||||
case allocType::hostRegisted:
|
||||
HIP_CHECK(hipHostUnregister(aPtr));
|
||||
delete[] aPtr;
|
||||
break;
|
||||
case allocType::devRegistered:
|
||||
HIP_CHECK(hipHostUnregister(aPtr));
|
||||
delete[] aPtr;
|
||||
break;
|
||||
default:
|
||||
REQUIRE(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to run tests for hipMemset allocation types
|
||||
template <typename T>
|
||||
void runTests(allocType type, memSetType memsetType, MultiDData data, hipStream_t stream) {
|
||||
bool async = GENERATE(true, false);
|
||||
CAPTURE(type, memsetType, data.width, data.height, data.depth, stream, async);
|
||||
std::pair<T*, T*> aPtr = initMemory<T>(type, memsetType, data);
|
||||
using namespace std::chrono_literals;
|
||||
const std::chrono::duration<uint64_t, std::milli> delay = 100ms;
|
||||
HipTest::runKernelForDuration(delay, stream);
|
||||
memsetCheck(aPtr.first, testValue, memsetType, data, async, stream);
|
||||
|
||||
if (async || type == allocType::deviceMalloc) {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorNotReady);
|
||||
} else {
|
||||
HIP_CHECK(hipStreamQuery(stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
verifyData(aPtr.first, testValue, data, type, memsetType);
|
||||
|
||||
if (type == allocType::devRegistered) {
|
||||
freeStuff(aPtr.second, type);
|
||||
} else {
|
||||
freeStuff(aPtr.first, type);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void doMemsetTest(allocType mallocType, memSetType memset_type, MultiDData data) {
|
||||
enum StreamType { NULLSTR, CREATEDSTR };
|
||||
auto streamType = GENERATE(NULLSTR, CREATEDSTR);
|
||||
hipStream_t stream{nullptr};
|
||||
|
||||
if (streamType == CREATEDSTR) HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
runTests<T>(mallocType, memset_type, data, stream);
|
||||
|
||||
if (streamType == CREATEDSTR) HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemsetSync") {
|
||||
#if HT_AMD || HT_NVIDIA
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-86");
|
||||
return;
|
||||
#endif
|
||||
allocType type = GENERATE(allocType::deviceMalloc, allocType::hostMalloc, allocType::hostRegisted,
|
||||
allocType::devRegistered);
|
||||
memSetType memset_type = memSetType::hipMemset;
|
||||
MultiDData data;
|
||||
data.width = GENERATE(1, 1024);
|
||||
doMemsetTest<char>(type, memset_type, data);
|
||||
}
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemsetDSync", "", int8_t, int16_t, uint32_t) {
|
||||
#if HT_AMD || HT_NVIDIA
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-86");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::hostRegisted, allocType::deviceMalloc,
|
||||
allocType::hostMalloc, allocType::devRegistered);
|
||||
memSetType memset_type;
|
||||
MultiDData data;
|
||||
data.width = GENERATE(1, 1024);
|
||||
|
||||
if (std::is_same<int8_t, TestType>::value) {
|
||||
memset_type = memSetType::hipMemsetD8;
|
||||
} else if (std::is_same<int16_t, TestType>::value) {
|
||||
memset_type = memSetType::hipMemsetD16;
|
||||
} else if (std::is_same<uint32_t, TestType>::value) {
|
||||
memset_type = memSetType::hipMemsetD32;
|
||||
}
|
||||
|
||||
doMemsetTest<TestType>(mallocType, memset_type, data);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemset2DSync") {
|
||||
#if HT_AMD || HT_NVIDIA
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-86");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
memSetType memset_type = memSetType::hipMemset2D;
|
||||
MultiDData data;
|
||||
data.width = GENERATE(1, 1024);
|
||||
data.height = GENERATE(1, 1024);
|
||||
|
||||
doMemsetTest<char>(mallocType, memset_type, data);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemset3DSync") {
|
||||
#if HT_AMD || HT_NVIDIA
|
||||
HipTest::HIP_SKIP_TEST("EXSWCPHIPT-86");
|
||||
return;
|
||||
#endif
|
||||
allocType mallocType = GENERATE(allocType::deviceMalloc, allocType::hostMalloc,
|
||||
allocType::hostRegisted, allocType::devRegistered);
|
||||
memSetType memset_type = memSetType::hipMemset3D;
|
||||
MultiDData data;
|
||||
data.width = GENERATE(1, 256);
|
||||
data.height = GENERATE(1, 256);
|
||||
data.depth = GENERATE(1, 256);
|
||||
|
||||
doMemsetTest<char>(mallocType, memset_type, data);
|
||||
}
|
||||
@@ -28,7 +28,7 @@ Following scenarios are verified for hipPointerGetAttributes API
|
||||
4. Multi-threaded test with many simul allocs.
|
||||
|
||||
*/
|
||||
#include<hip_test_common.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
@@ -37,22 +37,18 @@ size_t Nbytes = 0;
|
||||
constexpr size_t N{1000000};
|
||||
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// Utility Functions:
|
||||
//=================================================================================================
|
||||
|
||||
bool operator==(const hipPointerAttribute_t& lhs,
|
||||
const hipPointerAttribute_t& rhs) {
|
||||
return ((lhs.hostPointer == rhs.hostPointer) &&
|
||||
(lhs.devicePointer == rhs.devicePointer) &&
|
||||
(lhs.memoryType == rhs.memoryType) && (lhs.device == rhs.device) &&
|
||||
(lhs.allocationFlags == rhs.allocationFlags));
|
||||
bool operator==(const hipPointerAttribute_t& lhs, const hipPointerAttribute_t& rhs) {
|
||||
return ((lhs.hostPointer == rhs.hostPointer) && (lhs.devicePointer == rhs.devicePointer) &&
|
||||
(lhs.memoryType == rhs.memoryType) && (lhs.device == rhs.device) &&
|
||||
(lhs.allocationFlags == rhs.allocationFlags));
|
||||
}
|
||||
|
||||
|
||||
bool operator!=(const hipPointerAttribute_t& lhs,
|
||||
const hipPointerAttribute_t& rhs) {
|
||||
bool operator!=(const hipPointerAttribute_t& lhs, const hipPointerAttribute_t& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
@@ -70,53 +66,50 @@ const char* memoryTypeToString(hipMemoryType memoryType) {
|
||||
|
||||
|
||||
void resetAttribs(hipPointerAttribute_t* attribs) {
|
||||
attribs->hostPointer = reinterpret_cast<void*>(-1);
|
||||
attribs->devicePointer = reinterpret_cast<void*>(-1);
|
||||
attribs->memoryType = hipMemoryTypeHost;
|
||||
attribs->device = -2;
|
||||
attribs->isManaged = -1;
|
||||
attribs->allocationFlags = 0xffff;
|
||||
attribs->hostPointer = reinterpret_cast<void*>(-1);
|
||||
attribs->devicePointer = reinterpret_cast<void*>(-1);
|
||||
attribs->memoryType = hipMemoryTypeHost;
|
||||
attribs->device = -2;
|
||||
attribs->isManaged = -1;
|
||||
attribs->allocationFlags = 0xffff;
|
||||
}
|
||||
|
||||
|
||||
void printAttribs(const hipPointerAttribute_t* attribs) {
|
||||
printf(
|
||||
"hostPointer:%p devicePointer:%p memType:%s deviceId:%d isManaged:%d "
|
||||
"allocationFlags:%u\n",
|
||||
attribs->hostPointer, attribs->devicePointer,
|
||||
memoryTypeToString(attribs->memoryType),
|
||||
attribs->device, attribs->isManaged, attribs->allocationFlags);
|
||||
"hostPointer:%p devicePointer:%p memType:%s deviceId:%d isManaged:%d "
|
||||
"allocationFlags:%u\n",
|
||||
attribs->hostPointer, attribs->devicePointer, memoryTypeToString(attribs->memoryType),
|
||||
attribs->device, attribs->isManaged, attribs->allocationFlags);
|
||||
}
|
||||
|
||||
|
||||
inline int zrand(int max) { return rand() % max; }
|
||||
|
||||
|
||||
|
||||
// Store the hipPointer attrib and some extra info
|
||||
// so can later compare the looked-up info against
|
||||
// the reference expectation
|
||||
struct SuperPointerAttribute {
|
||||
void* _pointer;
|
||||
size_t _sizeBytes;
|
||||
hipPointerAttribute_t _attrib;
|
||||
void* _pointer;
|
||||
size_t _sizeBytes;
|
||||
hipPointerAttribute_t _attrib;
|
||||
};
|
||||
|
||||
|
||||
// Support function to check result against a reference:
|
||||
void checkPointer(const SuperPointerAttribute& ref, int major,
|
||||
int minor, void* pointer) {
|
||||
hipPointerAttribute_t attribs;
|
||||
resetAttribs(&attribs);
|
||||
void checkPointer(const SuperPointerAttribute& ref, int major, int minor, void* pointer) {
|
||||
hipPointerAttribute_t attribs;
|
||||
resetAttribs(&attribs);
|
||||
|
||||
hipError_t e = hipPointerGetAttributes(&attribs, pointer);
|
||||
if ((e != hipSuccess) || (attribs != ref._attrib)) {
|
||||
HIP_CHECK(e);
|
||||
REQUIRE(attribs != ref._attrib);
|
||||
} else {
|
||||
printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer);
|
||||
printAttribs(&attribs);
|
||||
}
|
||||
hipError_t e = hipPointerGetAttributes(&attribs, pointer);
|
||||
if ((e != hipSuccess) || (attribs != ref._attrib)) {
|
||||
HIP_CHECK(e);
|
||||
REQUIRE(attribs != ref._attrib);
|
||||
} else {
|
||||
printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer);
|
||||
printAttribs(&attribs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -129,8 +122,7 @@ void checkPointer(const SuperPointerAttribute& ref, int major,
|
||||
// we do this in the testMultiThreaded_1 test.
|
||||
void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) {
|
||||
Nbytes = N * sizeof(char);
|
||||
printf("clusterAllocs numAllocs=%d size=%lu..%lu\n",
|
||||
numAllocs, minSize, maxSize);
|
||||
printf("clusterAllocs numAllocs=%d size=%lu..%lu\n", numAllocs, minSize, maxSize);
|
||||
const int Max_Devices = 256;
|
||||
std::vector<SuperPointerAttribute> reference(numAllocs);
|
||||
|
||||
@@ -157,18 +149,15 @@ void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) {
|
||||
|
||||
void* ptr;
|
||||
if (isDevice) {
|
||||
totalDeviceAllocated[reference[i]._attrib.device] +=
|
||||
reference[i]._sizeBytes;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr),
|
||||
reference[i]._sizeBytes));
|
||||
totalDeviceAllocated[reference[i]._attrib.device] += reference[i]._sizeBytes;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr), reference[i]._sizeBytes));
|
||||
reference[i]._attrib.memoryType = hipMemoryTypeDevice;
|
||||
reference[i]._attrib.devicePointer = ptr;
|
||||
reference[i]._attrib.hostPointer = NULL;
|
||||
reference[i]._attrib.allocationFlags = 0;
|
||||
} else {
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr),
|
||||
reference[i]._sizeBytes,
|
||||
hipHostMallocDefault));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr), reference[i]._sizeBytes,
|
||||
hipHostMallocDefault));
|
||||
reference[i]._attrib.memoryType = hipMemoryTypeHost;
|
||||
reference[i]._attrib.devicePointer = ptr;
|
||||
reference[i]._attrib.hostPointer = ptr;
|
||||
@@ -182,32 +171,29 @@ void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
printf(
|
||||
" device#%d: hipMemGetInfo: "
|
||||
"free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu "
|
||||
"(%4.2fMB)\n",
|
||||
i, free, (free / 1024.0 / 1024.0), totalDeviceAllocated[i],
|
||||
(totalDeviceAllocated[i]) / 1024.0 / 1024.0, total,
|
||||
(total / 1024.0 / 1024.0));
|
||||
" device#%d: hipMemGetInfo: "
|
||||
"free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu "
|
||||
"(%4.2fMB)\n",
|
||||
i, free, (free / 1024.0 / 1024.0), totalDeviceAllocated[i],
|
||||
(totalDeviceAllocated[i]) / 1024.0 / 1024.0, total, (total / 1024.0 / 1024.0));
|
||||
REQUIRE(free + totalDeviceAllocated[i] <= total);
|
||||
}
|
||||
|
||||
// Now look up each pointer we inserted and verify we can find it:
|
||||
char * ptr;
|
||||
char* ptr;
|
||||
for (int i = 0; i < numAllocs; i++) {
|
||||
SuperPointerAttribute& ref = reference[i];
|
||||
ptr = static_cast<char *>(ref._pointer);
|
||||
ptr = static_cast<char*>(ref._pointer);
|
||||
checkPointer(ref, i, 0, ref._pointer);
|
||||
checkPointer(ref, i, 1, (ptr +
|
||||
ref._sizeBytes / 2));
|
||||
checkPointer(ref, i, 1, (ptr + ref._sizeBytes / 2));
|
||||
if (ref._sizeBytes > 1) {
|
||||
checkPointer(ref, i, 2, (ptr +
|
||||
ref._sizeBytes - 1));
|
||||
checkPointer(ref, i, 2, (ptr + ref._sizeBytes - 1));
|
||||
}
|
||||
|
||||
if (ref._attrib.memoryType == hipMemoryTypeDevice) {
|
||||
hipFree(ref._pointer);
|
||||
HIP_CHECK(hipFree(ref._pointer));
|
||||
} else {
|
||||
hipHostFree(ref._pointer);
|
||||
HIP_CHECK(hipHostFree(ref._pointer));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -231,15 +217,13 @@ TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
|
||||
hipError_t e;
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_Pinned_h), Nbytes,
|
||||
hipHostMallocDefault));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_Pinned_h), Nbytes, hipHostMallocDefault));
|
||||
A_OSAlloc_h = reinterpret_cast<char*>(malloc(Nbytes));
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
printf("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free,
|
||||
(free / 1024.0 / 1024.0), Nbytes, total,
|
||||
(total / 1024.0 / 1024.0));
|
||||
(free / 1024.0 / 1024.0), Nbytes, total, (total / 1024.0 / 1024.0));
|
||||
REQUIRE(free + Nbytes <= total);
|
||||
|
||||
|
||||
@@ -253,23 +237,20 @@ TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
|
||||
// Check pointer arithmetic cases:
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + 100));
|
||||
char *ptr = reinterpret_cast<char *>(attribs.devicePointer);
|
||||
REQUIRE(ptr + 100 ==
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
char* ptr = reinterpret_cast<char*>(attribs.devicePointer);
|
||||
REQUIRE(ptr + 100 == reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
|
||||
// Corner case at end of array:
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + Nbytes - 1));
|
||||
REQUIRE((ptr + Nbytes - 1) ==
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
REQUIRE((ptr + Nbytes - 1) == reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
|
||||
// Pointer just beyond array must be invalid or at least a different pointer
|
||||
resetAttribs(&attribs2);
|
||||
e = hipPointerGetAttributes(&attribs2, A_d + Nbytes + 1);
|
||||
if (e != hipErrorInvalidValue) {
|
||||
// We might have strayed into another pointer area.
|
||||
REQUIRE(reinterpret_cast<char*>(ptr) !=
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
REQUIRE(reinterpret_cast<char*>(ptr) != reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
}
|
||||
|
||||
|
||||
@@ -278,7 +259,7 @@ TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
|
||||
if (e != hipErrorInvalidValue) {
|
||||
REQUIRE(attribs.devicePointer != attribs2.devicePointer);
|
||||
}
|
||||
hipFree(A_d);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
e = hipPointerGetAttributes(&attribs, A_d);
|
||||
REQUIRE(e == hipErrorInvalidValue);
|
||||
|
||||
@@ -288,12 +269,11 @@ TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
|
||||
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_Pinned_h + Nbytes / 2));
|
||||
char *ptr1 = reinterpret_cast<char *>(attribs.hostPointer);
|
||||
REQUIRE((ptr1 + Nbytes / 2)
|
||||
== reinterpret_cast<char*>(attribs2.hostPointer));
|
||||
char* ptr1 = reinterpret_cast<char*>(attribs.hostPointer);
|
||||
REQUIRE((ptr1 + Nbytes / 2) == reinterpret_cast<char*>(attribs2.hostPointer));
|
||||
|
||||
|
||||
hipHostFree(A_Pinned_h);
|
||||
HIP_CHECK(hipHostFree(A_Pinned_h));
|
||||
e = hipPointerGetAttributes(&attribs, A_Pinned_h);
|
||||
REQUIRE(e == hipErrorInvalidValue);
|
||||
|
||||
@@ -317,33 +297,37 @@ TEST_CASE("Unit_hipPointerGetAttributes_TinyClusterAlloc") {
|
||||
|
||||
// Multi-threaded test with many simul allocs.
|
||||
// IN : serialize will force the test to run in serial fashion.
|
||||
#if 0 // FIXME_jatinx These need to be ported to HIP_CHECK_THREAD. Disabling it for now
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_MultiThread") {
|
||||
srand(0x300);
|
||||
auto serialize = 1;
|
||||
printf("\n=============================================\n");
|
||||
printf("MultiThreaded_1\n");
|
||||
if (serialize) printf("[SERIALIZE]\n");
|
||||
printf("===============================================\n");
|
||||
std::thread t1(clusterAllocs, 1000, 101, 1000);
|
||||
if (serialize) t1.join();
|
||||
srand(0x300);
|
||||
auto serialize = 1;
|
||||
printf("\n=============================================\n");
|
||||
printf("MultiThreaded_1\n");
|
||||
if (serialize) printf("[SERIALIZE]\n");
|
||||
printf("===============================================\n");
|
||||
std::thread t1(clusterAllocs, 1000, 101, 1000);
|
||||
if (serialize) t1.join();
|
||||
|
||||
std::thread t2(clusterAllocs, 1000, 11, 100);
|
||||
if (serialize) t2.join();
|
||||
std::thread t2(clusterAllocs, 1000, 11, 100);
|
||||
if (serialize) t2.join();
|
||||
|
||||
std::thread t3(clusterAllocs, 1000, 5, 10);
|
||||
if (serialize) t3.join();
|
||||
std::thread t3(clusterAllocs, 1000, 5, 10);
|
||||
if (serialize) t3.join();
|
||||
|
||||
std::thread t4(clusterAllocs, 1000, 1, 4);
|
||||
if (serialize) t4.join();
|
||||
std::thread t4(clusterAllocs, 1000, 1, 4);
|
||||
if (serialize) t4.join();
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_Negative") {
|
||||
#if HT_AMD // Nvidia crashed in hipPointerGetAttributes on nullptr
|
||||
SECTION("Invalid Attributes Pointer") {
|
||||
int* dPtr{nullptr};
|
||||
HIP_CHECK(hipMalloc(&dPtr, sizeof(int)));
|
||||
HIP_CHECK_ERROR(hipPointerGetAttributes(nullptr, dPtr), hipErrorInvalidValue);
|
||||
HIP_CHECK(hipFree(dPtr));
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Invalid Device Pointer") {
|
||||
hipPointerAttribute_t attributes{};
|
||||
|
||||
@@ -78,8 +78,8 @@ TEST_CASE("Unit_hipStreamPerThread_StreamSynchronize") {
|
||||
constexpr unsigned int MAX_THREAD_CNT = 10;
|
||||
std::vector<std::thread> threads(MAX_THREAD_CNT);
|
||||
|
||||
for (auto &th : threads) {
|
||||
th = std::thread([](){HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));});
|
||||
for (auto& th : threads) {
|
||||
th = std::thread([]() { HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); });
|
||||
}
|
||||
|
||||
for (auto& th : threads) {
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user