diff --git a/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt b/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt index b43bf7c6d8..37774d4965 100644 --- a/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt @@ -58,6 +58,7 @@ set(TEST_SRC hipTestDeviceLimit.cc hipTestDeviceDouble.cc hipTestHost.cc + hadd.cc ) if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(hipTestHost.cc PROPERTIES COMPILE_OPTIONS "--expt-relaxed-constexpr") diff --git a/projects/hip-tests/catch/unit/deviceLib/hadd.cc b/projects/hip-tests/catch/unit/deviceLib/hadd.cc new file mode 100644 index 0000000000..7be1e2af57 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/hadd.cc @@ -0,0 +1,655 @@ +#include +#include + +static __global__ void hadd_kernel(int* a, int* b, int* res, size_t size) { + int i = threadIdx.x; + if (i < size) { + res[i] = __hadd(a[i], b[i]); + } +} + +static __global__ void rhadd_kernel(int* a, int* b, int* res, size_t size) { + int i = threadIdx.x; + if (i < size) { + res[i] = __rhadd(a[i], b[i]); + } +} + +static __global__ void uhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) { + int i = threadIdx.x; + if (i < size) { + res[i] = __uhadd(a[i], b[i]); + } +} + +static __global__ void urhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) { + int i = threadIdx.x; + if (i < size) { + res[i] = __urhadd(a[i], b[i]); + } +} + + +static auto get_hadd_inputs() -> std::pair, std::vector> { + std::vector a, b; + a.reserve(1024); + b.reserve(1024); + // Underflow + // [{INT_MIN, INT_MIN}, {INT_MIN, INT_MIN + 1}, ...] + for (int i = INT_MIN, count = 0; count < 10; i++, count++) { + a.push_back(INT_MIN); + b.push_back(i); + } + + // [{INT_MIN, -10}, {INT_MIN, -9} ... , {INT_MIN, 10}] + for (int i = -10; i <= 10; i++) { + a.push_back(INT_MIN); + b.push_back(i); + } + + // [{INT_MAX, -10}, {INT_MAX, -9} ... , {INT_MAX, 10}] + for (int i = -10; i <= 10; i++) { + a.push_back(INT_MAX); + b.push_back(i); + } + + // [{INT_MIN, INT_MAX - 10}, {INT_MIN, INT_MAX - 9}, ... , [INT_MIN, INT_MAX]] + for (int count = 10, i = INT_MAX - count; count >= 0; i++, count--) { + a.push_back(INT_MIN); + b.push_back(i); + } + + // [{INT_MAX, INT_MIN + 10}, {INT_MAX, INT_MIN + 9}, ... , [INT_MAX, INT_MIN]] + for (int count = 10, i = INT_MIN + count; count >= 0; i++, count--) { + a.push_back(INT_MAX); + b.push_back(i); + } + + // [{INT_MAX, INT_MIN}, {INT_MAX - 1, INT_MIN + 1}, ...] + for (int i = INT_MAX, j = INT_MIN, count = 0; count < 10; count++, i--, j++) { + a.push_back(i); + b.push_back(j); + } + + // Overflow + // [{INT_MAX, INT_MAX}, {INT_MAX, INT_MAX - 1}, ...] + for (int i = INT_MAX, count = 0; count < 10; count++, i--) { + a.push_back(INT_MAX); + b.push_back(i); + } + + // [-10, -10], [-10, -9], ... [10, 10] + for (int i = -10; i <= 10; i++) { + for (int j = -10; j <= 10; j++) { + a.push_back(i); + b.push_back(j); + } + } + + return std::make_pair(a, b); +} + +static auto get_uadd_inputs() -> std::pair, std::vector> { + std::vector a, b; + a.reserve(1024); + b.reserve(1024); + + for (unsigned i = 0; i <= 20; i++) { + for (unsigned j = 0; j <= 20; j++) { + a.push_back(i); + b.push_back(j); + } + } + + // Overflow + for (unsigned count_i = 0, i = UINT_MAX; count_i <= 20; count_i++, i--) { + for (unsigned count_j = 0, j = UINT_MAX; count_j <= 20; count_j++, j--) { + a.push_back(i); + b.push_back(j); + } + } + + return std::make_pair(a, b); +} + +TEST_CASE("Unit_hadd_int_varaint") { + auto [a, b] = get_hadd_inputs(); + + REQUIRE(a.size() == b.size()); + const size_t size = a.size(); + REQUIRE(size <= 1024); + + // Manually calculated results on both platforms + const std::vector hadd_expected{ + -2147483648, -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, + -2147483645, -2147483644, -2147483644, -1073741829, -1073741829, -1073741828, -1073741828, + -1073741827, -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, + -1073741824, -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, + -1073741820, -1073741820, -1073741819, 1073741818, 1073741819, 1073741819, 1073741820, + 1073741820, 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, + 1073741824, 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, + 1073741827, 1073741828, 1073741828, -6, -5, -5, -4, + -4, -3, -3, -2, -2, -1, -1, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, 2147483644, + 2147483643, 2147483643, 2147483642, -10, -10, -9, -9, + -8, -8, -7, -7, -6, -6, -5, + -5, -4, -4, -3, -3, -2, -2, + -1, -1, 0, -10, -9, -9, -8, + -8, -7, -7, -6, -6, -5, -5, + -4, -4, -3, -3, -2, -2, -1, + -1, 0, 0, -9, -9, -8, -8, + -7, -7, -6, -6, -5, -5, -4, + -4, -3, -3, -2, -2, -1, -1, + 0, 0, 1, -9, -8, -8, -7, + -7, -6, -6, -5, -5, -4, -4, + -3, -3, -2, -2, -1, -1, 0, + 0, 1, 1, -8, -8, -7, -7, + -6, -6, -5, -5, -4, -4, -3, + -3, -2, -2, -1, -1, 0, 0, + 1, 1, 2, -8, -7, -7, -6, + -6, -5, -5, -4, -4, -3, -3, + -2, -2, -1, -1, 0, 0, 1, + 1, 2, 2, -7, -7, -6, -6, + -5, -5, -4, -4, -3, -3, -2, + -2, -1, -1, 0, 0, 1, 1, + 2, 2, 3, -7, -6, -6, -5, + -5, -4, -4, -3, -3, -2, -2, + -1, -1, 0, 0, 1, 1, 2, + 2, 3, 3, -6, -6, -5, -5, + -4, -4, -3, -3, -2, -2, -1, + -1, 0, 0, 1, 1, 2, 2, + 3, 3, 4, -6, -5, -5, -4, + -4, -3, -3, -2, -2, -1, -1, + 0, 0, 1, 1, 2, 2, 3, + 3, 4, 4, -5, -5, -4, -4, + -3, -3, -2, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, -5, -4, -4, -3, + -3, -2, -2, -1, -1, 0, 0, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, -4, -4, -3, -3, + -2, -2, -1, -1, 0, 0, 1, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, -4, -3, -3, -2, + -2, -1, -1, 0, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, -3, -3, -2, -2, + -1, -1, 0, 0, 1, 1, 2, + 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, -3, -2, -2, -1, + -1, 0, 0, 1, 1, 2, 2, + 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, -2, -2, -1, -1, + 0, 0, 1, 1, 2, 2, 3, + 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, -1, -1, 0, 0, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, -1, 0, 0, 1, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 0, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10}; + + int *d_a, *d_b, *d_res; + HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); + + HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + + hadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); + std::vector gpu_res(size, 0); + + HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < size; i++) { + INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << hadd_expected[i] + << " got: " << gpu_res[i]); + CHECK(hadd_expected[i] == gpu_res[i]); + } + + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_b)); + HIP_CHECK(hipFree(d_res)); +} + +TEST_CASE("Unit_rhadd_int_varaint") { + auto [a, b] = get_hadd_inputs(); + + REQUIRE(a.size() == b.size()); + const size_t size = a.size(); + REQUIRE(size <= 1024); + + // Manually calculated results on both platforms + const std::vector rhadd_expected{ + -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, -2147483645, + -2147483644, -2147483644, -2147483643, -1073741829, -1073741828, -1073741828, -1073741827, + -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, -1073741824, + -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, -1073741820, + -1073741820, -1073741819, -1073741819, 1073741819, 1073741819, 1073741820, 1073741820, + 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, 1073741824, + 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, 1073741827, + 1073741828, 1073741828, 1073741829, -5, -5, -4, -4, + -3, -3, -2, -2, -1, -1, 0, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 2147483647, 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, + 2147483644, 2147483643, 2147483643, -10, -9, -9, -8, + -8, -7, -7, -6, -6, -5, -5, + -4, -4, -3, -3, -2, -2, -1, + -1, 0, 0, -9, -9, -8, -8, + -7, -7, -6, -6, -5, -5, -4, + -4, -3, -3, -2, -2, -1, -1, + 0, 0, 1, -9, -8, -8, -7, + -7, -6, -6, -5, -5, -4, -4, + -3, -3, -2, -2, -1, -1, 0, + 0, 1, 1, -8, -8, -7, -7, + -6, -6, -5, -5, -4, -4, -3, + -3, -2, -2, -1, -1, 0, 0, + 1, 1, 2, -8, -7, -7, -6, + -6, -5, -5, -4, -4, -3, -3, + -2, -2, -1, -1, 0, 0, 1, + 1, 2, 2, -7, -7, -6, -6, + -5, -5, -4, -4, -3, -3, -2, + -2, -1, -1, 0, 0, 1, 1, + 2, 2, 3, -7, -6, -6, -5, + -5, -4, -4, -3, -3, -2, -2, + -1, -1, 0, 0, 1, 1, 2, + 2, 3, 3, -6, -6, -5, -5, + -4, -4, -3, -3, -2, -2, -1, + -1, 0, 0, 1, 1, 2, 2, + 3, 3, 4, -6, -5, -5, -4, + -4, -3, -3, -2, -2, -1, -1, + 0, 0, 1, 1, 2, 2, 3, + 3, 4, 4, -5, -5, -4, -4, + -3, -3, -2, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, -5, -4, -4, -3, + -3, -2, -2, -1, -1, 0, 0, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, -4, -4, -3, -3, + -2, -2, -1, -1, 0, 0, 1, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, -4, -3, -3, -2, + -2, -1, -1, 0, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, -3, -3, -2, -2, + -1, -1, 0, 0, 1, 1, 2, + 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, -3, -2, -2, -1, + -1, 0, 0, 1, 1, 2, 2, + 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, -2, -2, -1, -1, + 0, 0, 1, 1, 2, 2, 3, + 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, -1, -1, 0, 0, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, -1, 0, 0, 1, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 0, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 0, 1, 1, 2, + 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10}; + + int *d_a, *d_b, *d_res; + HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); + + HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + + rhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); + std::vector gpu_res(size, 0); + + HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < size; i++) { + INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << rhadd_expected[i] + << " got: " << gpu_res[i]); + CHECK(rhadd_expected[i] == gpu_res[i]); + } + + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_b)); + HIP_CHECK(hipFree(d_res)); +} + +TEST_CASE("Unit_uhadd_int_varaint") { + auto [a, b] = get_uadd_inputs(); + + REQUIRE(a.size() == b.size()); + const size_t size = a.size(); + REQUIRE(size <= 1024); + + // Manually calculated on both platforms + const std::vector uhadd_expected{ + 0, 0, 1, 1, 2, 2, 3, + 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, + 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10, 11, 11, 12, 12, + 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, + 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, 14, 14, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, + 12, 13, 13, 14, 14, 15, 15, + 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, + 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, + 13, 14, 14, 15, 15, 16, 16, + 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, + 14, 14, 15, 15, 16, 16, 17, + 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, + 14, 15, 15, 16, 16, 17, 17, + 8, 8, 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, 14, 14, + 15, 15, 16, 16, 17, 17, 18, + 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, + 15, 16, 16, 17, 17, 18, 18, + 9, 9, 10, 10, 11, 11, 12, + 12, 13, 13, 14, 14, 15, 15, + 16, 16, 17, 17, 18, 18, 19, + 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, + 16, 17, 17, 18, 18, 19, 19, + 10, 10, 11, 11, 12, 12, 13, + 13, 14, 14, 15, 15, 16, 16, + 17, 17, 18, 18, 19, 19, 20, + 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, + 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, + 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, + 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, + 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, + 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, + 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, + 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, + 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, + 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, + 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, + 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, + 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, + 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, + 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, + 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, + 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, + 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, + 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, + 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, + 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, + 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, + 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, + 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, + 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, + 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, + 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, 4294967275}; + + unsigned *d_a, *d_b, *d_res; + HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); + + HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + + uhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); + std::vector gpu_res(size, 0); + + HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < size; i++) { + INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i] + << " got: " << gpu_res[i]); + CHECK(uhadd_expected[i] == gpu_res[i]); + } + + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_b)); + HIP_CHECK(hipFree(d_res)); +} + +TEST_CASE("Unit_urhadd_int_varaint") { + auto [a, b] = get_uadd_inputs(); + + REQUIRE(a.size() == b.size()); + const size_t size = a.size(); + REQUIRE(size <= 1024); + + // Manually calculated on both platforms + const std::vector uhadd_expected{ + 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, + 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, + 1, 2, 2, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, + 2, 2, 3, 3, 4, 4, 5, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, + 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10, 11, 11, 12, 12, + 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, + 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, + 4, 4, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, + 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, 14, 14, + 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, + 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, + 12, 13, 13, 14, 14, 15, 15, + 6, 6, 7, 7, 8, 8, 9, + 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, + 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, + 13, 14, 14, 15, 15, 16, 16, + 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, + 14, 14, 15, 15, 16, 16, 17, + 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, + 14, 15, 15, 16, 16, 17, 17, + 8, 8, 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, 14, 14, + 15, 15, 16, 16, 17, 17, 18, + 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, + 15, 16, 16, 17, 17, 18, 18, + 9, 9, 10, 10, 11, 11, 12, + 12, 13, 13, 14, 14, 15, 15, + 16, 16, 17, 17, 18, 18, 19, + 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, + 16, 17, 17, 18, 18, 19, 19, + 10, 10, 11, 11, 12, 12, 13, + 13, 14, 14, 15, 15, 16, 16, + 17, 17, 18, 18, 19, 19, 20, + 10, 11, 11, 12, 12, 13, 13, + 14, 14, 15, 15, 16, 16, 17, + 17, 18, 18, 19, 19, 20, 20, + 4294967295, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, + 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, + 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, + 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, + 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, + 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, + 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, + 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, + 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, + 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, + 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, + 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, + 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, + 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, + 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, + 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, + 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, + 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, + 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, + 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, + 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, + 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, + 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, + 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, + 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, + 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, + 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, + 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, + 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, + 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, + 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, + 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, + 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, + 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, + 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, + 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, + 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, + 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, + 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275}; + + unsigned *d_a, *d_b, *d_res; + HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size)); + HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size)); + + HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice)); + + urhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size); + std::vector gpu_res(size, 0); + + HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < size; i++) { + INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i] + << " got: " << gpu_res[i]); + CHECK(uhadd_expected[i] == gpu_res[i]); + } + + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_b)); + HIP_CHECK(hipFree(d_res)); +}