/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* hipMallocArray API test scenarios 1. Basic Functionality 2. Negative Scenarios 3. Allocating Small and big chunk data 4. Multithreaded scenario */ #include static constexpr auto NUM_W{4}; static constexpr auto BIGNUM_W{100}; static constexpr auto BIGNUM_H{100}; static constexpr auto NUM_H{4}; static constexpr auto ARRAY_LOOP{100}; /* * This API verifies memory allocations for small and * bigger chunks of data. * Two scenarios are verified in this API * 1. NUM_W(small Data): Allocates NUM_W*NUM_H in a loop and * releases the memory and verifies the meminfo. * 2. BIGNUM_W(big data): Allocates BIGNUM_W*BIGNUM_H in a loop and * releases the memory and verifies the meminfo * * In both cases, the memory info before allocation and * after releasing the memory should be the same * */ static void MallocArray_DiffSizes(int gpu) { HIP_CHECK(hipSetDevice(gpu)); std::vector array_size; array_size.push_back(NUM_W); array_size.push_back(BIGNUM_W); for (auto& size : array_size) { hipArray* A_d[ARRAY_LOOP]; size_t tot, avail, ptot, pavail; hipChannelFormatDesc desc = hipCreateChannelDesc(); HIP_CHECK(hipMemGetInfo(&pavail, &ptot)); for (int i = 0; i < ARRAY_LOOP; i++) { if (size == NUM_W) { HIP_CHECK(hipMallocArray(&A_d[i], &desc, NUM_W, NUM_H, hipArrayDefault)); } else { HIP_CHECK(hipMallocArray(&A_d[i], &desc, BIGNUM_W, BIGNUM_H, hipArrayDefault)); } } for (int i = 0; i < ARRAY_LOOP; i++) { HIP_CHECK(hipFreeArray(A_d[i])); } HIP_CHECK(hipMemGetInfo(&avail, &tot)); if ((pavail != avail)) { HIPASSERT(false); } } } /* * This testcase verifies the negative scenarios of * hipMallocArray API */ TEST_CASE("Unit_hipMallocArray_Negative") { hipArray* A_d; hipChannelFormatDesc desc = hipCreateChannelDesc(); #if HT_NVIDIA SECTION("NullPointer to Array") { REQUIRE(hipMallocArray(nullptr, &desc, NUM_W, NUM_H, hipArrayDefault) != hipSuccess); } SECTION("NullPointer to Channel Descriptor") { REQUIRE(hipMallocArray(&A_d, nullptr, NUM_W, NUM_H, hipArrayDefault) != hipSuccess); } #endif SECTION("Width 0 in hipMallocArray") { REQUIRE(hipMallocArray(&A_d, &desc, 0, NUM_H, hipArrayDefault) != hipSuccess); } SECTION("Height 0 in hipMallocArray") { REQUIRE(hipMallocArray(&A_d, &desc, NUM_W, 0, hipArrayDefault) == hipSuccess); } SECTION("Invalid Flag") { REQUIRE(hipMallocArray(&A_d, &desc, NUM_W, NUM_H, 100) != hipSuccess); } SECTION("Max int values") { REQUIRE(hipMallocArray(&A_d, &desc, std::numeric_limits::max(), std::numeric_limits::max(), hipArrayDefault) != hipSuccess); } } TEST_CASE("Unit_hipMallocArray_DiffSizes") { MallocArray_DiffSizes(0); } /* This testcase verifies the hipMallocArray API in multithreaded scenario by launching threads in parallel on multiple GPUs and verifies the hipMallocArray API with small and big chunks data */ TEST_CASE("Unit_hipMallocArray_MultiThread") { std::vector threadlist; int devCnt = 0; devCnt = HipTest::getDeviceCount(); size_t tot, avail, ptot, pavail; HIP_CHECK(hipMemGetInfo(&pavail, &ptot)); for (int i = 0; i < devCnt; i++) { threadlist.push_back(std::thread(MallocArray_DiffSizes, i)); } for (auto& t : threadlist) { t.join(); } HIP_CHECK(hipMemGetInfo(&avail, &tot)); if (pavail != avail) { WARN("Memory leak of hipMalloc3D API in multithreaded scenario"); REQUIRE(false); } } constexpr size_t BlockSize = 16; template struct type_and_size { using type = T; static constexpr size_t size = N; }; // scalars are interpreted as a vector of 1 length. // template using int_constant = std::integral_constant; template struct vector_info; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; template <> struct vector_info : type_and_size {}; // Kernels /////////////////////////////////////// // read from a texture using normalized coordinates constexpr size_t ChannelToRead = 1; template __global__ void readFromTexture(T* output, hipTextureObject_t texObj, size_t width, size_t height, bool textureGather) { // Calculate normalized texture coordinates const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const float u = x / (float)width; // Read from texture and write to global memory if (height == 0) { output[x] = tex1D(texObj, u); } else { const float v = y / (float)height; output[y * width + x] = textureGather ? tex2Dgather(texObj, u, v, ChannelToRead) : tex2D(texObj, u, v); } } template __device__ void addOne(T* a) { using scalar_type = typename vector_info::type; auto as = reinterpret_cast(a); for (size_t i = 0; i < vector_info::size; ++i) { as[i] = as[i] + static_cast(1); } } // read from a surface and write to another template __global__ void incSurface(hipSurfaceObject_t surf, size_t height) { // Calculate surface coordinates unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; if (height == 0) { T data; surf1Dread(&data, surf, x * sizeof(T)); addOne(&data); // change the value to show that write works surf1Dwrite(data, surf, x * sizeof(T)); } else { T data; surf2Dread(&data, surf, x * sizeof(T), y); addOne(&data); // change the value to show that write works surf2Dwrite(data, surf, x * sizeof(T), y); } } // Helpers /////////////////////////////////////// template size_t getAllocSize(const size_t width, const size_t height) noexcept { return sizeof(T) * width * (height ? height : 1); } template void checkDataIsAscending(const std::vector& hostData) { bool allMatch = true; size_t i = 0; for (; i < hostData.size(); ++i) { allMatch = allMatch && hostData[i] == static_cast(i); if (!allMatch) break; } INFO("hostData[" << i << "] == " << static_cast(hostData[i])); REQUIRE(allMatch); } // Tests ///////////////////////////////////////// // Test the default array by generating a texture from it then reading from that texture. // Textures are read-only so write to the array then copy from the texture into normal device memory template void testArrayAsTexture(hipArray_t arrayPtr, const size_t width, const size_t height) { using scalar_type = typename vector_info::type; constexpr auto vec_size = vector_info::size; const auto h = height ? height : 1; const size_t pitch = width * sizeof(T); // no padding const auto size = pitch * h; // create an array to initialize the hip array, then later use it to hold the result std::vector hostData(width * h * vec_size); // Setup backing array // assign ascending values to the data array to show indexing is working. std::iota(std::begin(hostData), std::end(hostData), 0); HIP_CHECK( hipMemcpy2DToArray(arrayPtr, 0, 0, hostData.data(), pitch, pitch, h, hipMemcpyHostToDevice)); // create texture hipTextureObject_t textObj{}; hipResourceDesc resDesc{}; memset(&resDesc, 0, sizeof(hipResourceDesc)); // enum to store how to resDesc.res union is being used resDesc.resType = hipResourceTypeArray; resDesc.res.array.array = arrayPtr; hipTextureDesc textDesc{}; memset(&textDesc, 0, sizeof(hipTextureDesc)); textDesc.filterMode = hipFilterModePoint; // use the actual values in the texture, not normalized data textDesc.readMode = hipReadModeElementType; // don't convert the data to floats textDesc.normalizedCoords = 1; // use normalized coordinates (0.0-1.0) HIP_CHECK(hipCreateTextureObject(&textObj, &resDesc, &textDesc, nullptr)); // run kernel T* device_data{}; HIP_CHECK(hipMalloc(&device_data, size)); readFromTexture<<>>(device_data, textObj, width, height, false); HIP_CHECK(hipGetLastError()); // check for errors when running the kernel // copy data back and then test it std::fill(std::begin(hostData), std::end(hostData), 0); HIP_CHECK(hipMemcpy(hostData.data(), device_data, size, hipMemcpyDeviceToHost)); checkDataIsAscending(hostData); // clean up HIP_CHECK(hipDestroyTextureObject(textObj)); HIP_CHECK(hipFree(device_data)); } // Test the an array created with the SurfaceLoadStore flag by generating a surface and reading from // it and writing to it. template void testArrayAsSurface(hipArray_t arrayPtr, const size_t width, const size_t height) { using scalar_type = typename vector_info::type; constexpr auto vec_size = vector_info::size; const auto h = height ? height : 1; const size_t pitch = width * sizeof(T); // no padding const auto size = pitch * h; std::vector hostData(width * h * vec_size); // Setup backing array // assign ascending values to the data array to show indexing is working. std::iota(std::begin(hostData), std::end(hostData), 0); HIP_CHECK( hipMemcpy2DToArray(arrayPtr, 0, 0, hostData.data(), pitch, pitch, h, hipMemcpyHostToDevice)); // create surface hipSurfaceObject_t surfObj{}; hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(hipResourceDesc)); resDesc.resType = hipResourceTypeArray; resDesc.res.array.array = arrayPtr; HIP_CHECK(hipCreateSurfaceObject(&surfObj, &resDesc)); // run kernel T* device_data{}; HIP_CHECK(hipMalloc(&device_data, size)); // This will increment the values of the surface, so this is undone later incSurface<<>>(surfObj, height); HIP_CHECK(hipGetLastError()); // check for errors when running the kernel // copy data back and then test it std::fill(std::begin(hostData), std::end(hostData), 0); HIP_CHECK(hipMemcpy2DFromArray(hostData.data(), pitch, arrayPtr, 0, 0, pitch, h, hipMemcpyDeviceToHost)); // undo the increment std::for_each(std::begin(hostData), std::end(hostData), [](scalar_type& x) { x -= static_cast(1); }); checkDataIsAscending(hostData); // clean up HIP_CHECK(hipDestroySurfaceObject(surfObj)); HIP_CHECK(hipFree(device_data)); } size_t getFreeMem() { size_t free = 0, total = 0; HIP_CHECK(hipMemGetInfo(&free, &total)); return free; } // The happy path of a default array and a SurfaceLoadStore array should work // Selection of types chosen to reduce compile times TEMPLATE_TEST_CASE("Unit_hipMallocArray_happy", "", uint, int, int4, ushort, short2, char, uchar2, char4, float, float2, float4) { #if HT_AMD HipTest::HIP_SKIP_TEST("EXSWCPHIPT-62"); #endif hipChannelFormatDesc desc = hipCreateChannelDesc(); size_t init_free = getFreeMem(); // pointer to the array in device memory hipArray_t arrayPtr{}; size_t width = 1024; size_t height = GENERATE(0, 1024); SECTION("hipArrayDefault") { INFO("flag is hipArrayDefault"); INFO("height: " << height); HIP_CHECK(hipMallocArray(&arrayPtr, &desc, width, height, hipArrayDefault)); testArrayAsTexture(arrayPtr, width, height); } #if HT_NVIDIA // surfaces and texture gather not supported on AMD SECTION("hipArraySurfaceLoadStore") { INFO("flag is hipArraySurfaceLoadStore"); INFO("height: " << height); HIP_CHECK(hipMallocArray(&arrayPtr, &desc, width, height, hipArraySurfaceLoadStore)); testArrayAsSurface(arrayPtr, width, height); } #endif size_t final_free = getFreeMem(); const size_t alloc_size = getAllocSize(width, height); // alloc will be chunked, so this is not exact REQUIRE(init_free - final_free >= alloc_size); HIP_CHECK(hipFreeArray(arrayPtr)); }