/* * Copyright (C) Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include static __global__ void hadd_kernel(int* a, int* b, int* res, size_t size) { int i = threadIdx.x; if (i < size) { res[i] = __hadd(a[i], b[i]); } } static __global__ void rhadd_kernel(int* a, int* b, int* res, size_t size) { int i = threadIdx.x; if (i < size) { res[i] = __rhadd(a[i], b[i]); } } static __global__ void uhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) { int i = threadIdx.x; if (i < size) { res[i] = __uhadd(a[i], b[i]); } } static __global__ void urhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) { int i = threadIdx.x; if (i < size) { res[i] = __urhadd(a[i], b[i]); } } static auto get_hadd_inputs() -> std::pair, std::vector> { std::vector a, b; a.reserve(1024); b.reserve(1024); // Underflow // [{INT_MIN, INT_MIN}, {INT_MIN, INT_MIN + 1}, ...] for (int i = INT_MIN, count = 0; count < 10; i++, count++) { a.push_back(INT_MIN); b.push_back(i); } // [{INT_MIN, -10}, {INT_MIN, -9} ... , {INT_MIN, 10}] for (int i = -10; i <= 10; i++) { a.push_back(INT_MIN); b.push_back(i); } // [{INT_MAX, -10}, {INT_MAX, -9} ... , {INT_MAX, 10}] for (int i = -10; i <= 10; i++) { a.push_back(INT_MAX); b.push_back(i); } // [{INT_MIN, INT_MAX - 10}, {INT_MIN, INT_MAX - 9}, ... , [INT_MIN, INT_MAX]] for (int count = 10, i = INT_MAX - count; count >= 0; i++, count--) { a.push_back(INT_MIN); b.push_back(i); } // [{INT_MAX, INT_MIN + 10}, {INT_MAX, INT_MIN + 9}, ... , [INT_MAX, INT_MIN]] for (int count = 10, i = INT_MIN + count; count >= 0; i++, count--) { a.push_back(INT_MAX); b.push_back(i); } // [{INT_MAX, INT_MIN}, {INT_MAX - 1, INT_MIN + 1}, ...] for (int i = INT_MAX, j = INT_MIN, count = 0; count < 10; count++, i--, j++) { a.push_back(i); b.push_back(j); } // Overflow // [{INT_MAX, INT_MAX}, {INT_MAX, INT_MAX - 1}, ...] for (int i = INT_MAX, count = 0; count < 10; count++, i--) { a.push_back(INT_MAX); b.push_back(i); } // [-10, -10], [-10, -9], ... [10, 10] for (int i = -10; i <= 10; i++) { for (int j = -10; j <= 10; j++) { a.push_back(i); b.push_back(j); } } return std::make_pair(a, b); } static auto get_uadd_inputs() -> std::pair, std::vector> { std::vector a, b; a.reserve(1024); b.reserve(1024); for (unsigned i = 0; i <= 20; i++) { for (unsigned j = 0; j <= 20; j++) { a.push_back(i); b.push_back(j); } } // Overflow for (unsigned count_i = 0, i = UINT_MAX; count_i <= 20; count_i++, i--) { for (unsigned count_j = 0, j = UINT_MAX; count_j <= 20; count_j++, j--) { a.push_back(i); b.push_back(j); } } return std::make_pair(a, b); } TEST_CASE("Unit_hadd_int_varaint") { auto [a, b] = get_hadd_inputs(); REQUIRE(a.size() == b.size()); const size_t size = a.size(); REQUIRE(size <= 1024); // Manually calculated results on both platforms const std::vector hadd_expected{ -2147483648, -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, -2147483645, -2147483644, -2147483644, -1073741829, -1073741829, -1073741828, -1073741828, -1073741827, -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, -1073741824, -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, -1073741820, -1073741820, -1073741819, 1073741818, 1073741819, 1073741819, 1073741820, 1073741820, 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, 1073741824, 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, 1073741827, 1073741828, 1073741828, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, 2147483644, 2147483643, 2147483643, 2147483642, -10, -10, -9, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, -10, -9, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, -9, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10}; int *d_a, *d_b, *d_res; HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); hadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); std::vector gpu_res(size, 0); HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); for (size_t i = 0; i < size; i++) { INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << hadd_expected[i] << " got: " << gpu_res[i]); CHECK(hadd_expected[i] == gpu_res[i]); } HIP_CHECK(hipFree(d_a)); HIP_CHECK(hipFree(d_b)); HIP_CHECK(hipFree(d_res)); } TEST_CASE("Unit_rhadd_int_varaint") { auto [a, b] = get_hadd_inputs(); REQUIRE(a.size() == b.size()); const size_t size = a.size(); REQUIRE(size <= 1024); // Manually calculated results on both platforms const std::vector rhadd_expected{ -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, -2147483645, -2147483644, -2147483644, -2147483643, -1073741829, -1073741828, -1073741828, -1073741827, -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, -1073741824, -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, -1073741820, -1073741820, -1073741819, -1073741819, 1073741819, 1073741819, 1073741820, 1073741820, 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, 1073741824, 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, 1073741827, 1073741828, 1073741828, 1073741829, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2147483647, 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, 2147483644, 2147483643, 2147483643, -10, -9, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, -9, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, -9, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, -8, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, -4, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, -3, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, -3, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, -1, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10}; int *d_a, *d_b, *d_res; HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); rhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); std::vector gpu_res(size, 0); HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); for (size_t i = 0; i < size; i++) { INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << rhadd_expected[i] << " got: " << gpu_res[i]); CHECK(rhadd_expected[i] == gpu_res[i]); } HIP_CHECK(hipFree(d_a)); HIP_CHECK(hipFree(d_b)); HIP_CHECK(hipFree(d_res)); } TEST_CASE("Unit_uhadd_int_varaint") { auto [a, b] = get_uadd_inputs(); REQUIRE(a.size() == b.size()); const size_t size = a.size(); REQUIRE(size <= 1024); // Manually calculated on both platforms const std::vector uhadd_expected{ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, 4294967275}; unsigned *d_a, *d_b, *d_res; HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); uhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); std::vector gpu_res(size, 0); HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); for (size_t i = 0; i < size; i++) { INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i] << " got: " << gpu_res[i]); CHECK(uhadd_expected[i] == gpu_res[i]); } HIP_CHECK(hipFree(d_a)); HIP_CHECK(hipFree(d_b)); HIP_CHECK(hipFree(d_res)); } TEST_CASE("Unit_urhadd_int_varaint") { auto [a, b] = get_uadd_inputs(); REQUIRE(a.size() == b.size()); const size_t size = a.size(); REQUIRE(size <= 1024); // Manually calculated on both platforms const std::vector uhadd_expected{ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 4294967295, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275}; unsigned *d_a, *d_b, *d_res; HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); urhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); std::vector gpu_res(size, 0); HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); for (size_t i = 0; i < size; i++) { INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i] << " got: " << gpu_res[i]); CHECK(uhadd_expected[i] == gpu_res[i]); } HIP_CHECK(hipFree(d_a)); HIP_CHECK(hipFree(d_b)); HIP_CHECK(hipFree(d_res)); }