Files

677 lines
42 KiB
C++

/*
* Copyright (C) Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <vector>
static __global__ void hadd_kernel(int* a, int* b, int* res, size_t size) {
int i = threadIdx.x;
if (i < size) {
res[i] = __hadd(a[i], b[i]);
}
}
static __global__ void rhadd_kernel(int* a, int* b, int* res, size_t size) {
int i = threadIdx.x;
if (i < size) {
res[i] = __rhadd(a[i], b[i]);
}
}
static __global__ void uhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) {
int i = threadIdx.x;
if (i < size) {
res[i] = __uhadd(a[i], b[i]);
}
}
static __global__ void urhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) {
int i = threadIdx.x;
if (i < size) {
res[i] = __urhadd(a[i], b[i]);
}
}
static auto get_hadd_inputs() -> std::pair<std::vector<int>, std::vector<int>> {
std::vector<int> a, b;
a.reserve(1024);
b.reserve(1024);
// Underflow
// [{INT_MIN, INT_MIN}, {INT_MIN, INT_MIN + 1}, ...]
for (int i = INT_MIN, count = 0; count < 10; i++, count++) {
a.push_back(INT_MIN);
b.push_back(i);
}
// [{INT_MIN, -10}, {INT_MIN, -9} ... , {INT_MIN, 10}]
for (int i = -10; i <= 10; i++) {
a.push_back(INT_MIN);
b.push_back(i);
}
// [{INT_MAX, -10}, {INT_MAX, -9} ... , {INT_MAX, 10}]
for (int i = -10; i <= 10; i++) {
a.push_back(INT_MAX);
b.push_back(i);
}
// [{INT_MIN, INT_MAX - 10}, {INT_MIN, INT_MAX - 9}, ... , [INT_MIN, INT_MAX]]
for (int count = 10, i = INT_MAX - count; count >= 0; i++, count--) {
a.push_back(INT_MIN);
b.push_back(i);
}
// [{INT_MAX, INT_MIN + 10}, {INT_MAX, INT_MIN + 9}, ... , [INT_MAX, INT_MIN]]
for (int count = 10, i = INT_MIN + count; count >= 0; i++, count--) {
a.push_back(INT_MAX);
b.push_back(i);
}
// [{INT_MAX, INT_MIN}, {INT_MAX - 1, INT_MIN + 1}, ...]
for (int i = INT_MAX, j = INT_MIN, count = 0; count < 10; count++, i--, j++) {
a.push_back(i);
b.push_back(j);
}
// Overflow
// [{INT_MAX, INT_MAX}, {INT_MAX, INT_MAX - 1}, ...]
for (int i = INT_MAX, count = 0; count < 10; count++, i--) {
a.push_back(INT_MAX);
b.push_back(i);
}
// [-10, -10], [-10, -9], ... [10, 10]
for (int i = -10; i <= 10; i++) {
for (int j = -10; j <= 10; j++) {
a.push_back(i);
b.push_back(j);
}
}
return std::make_pair(a, b);
}
static auto get_uadd_inputs() -> std::pair<std::vector<unsigned>, std::vector<unsigned>> {
std::vector<unsigned int> a, b;
a.reserve(1024);
b.reserve(1024);
for (unsigned i = 0; i <= 20; i++) {
for (unsigned j = 0; j <= 20; j++) {
a.push_back(i);
b.push_back(j);
}
}
// Overflow
for (unsigned count_i = 0, i = UINT_MAX; count_i <= 20; count_i++, i--) {
for (unsigned count_j = 0, j = UINT_MAX; count_j <= 20; count_j++, j--) {
a.push_back(i);
b.push_back(j);
}
}
return std::make_pair(a, b);
}
TEST_CASE("Unit_hadd_int_varaint") {
auto [a, b] = get_hadd_inputs();
REQUIRE(a.size() == b.size());
const size_t size = a.size();
REQUIRE(size <= 1024);
// Manually calculated results on both platforms
const std::vector<int> hadd_expected{
-2147483648, -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645,
-2147483645, -2147483644, -2147483644, -1073741829, -1073741829, -1073741828, -1073741828,
-1073741827, -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824,
-1073741824, -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821,
-1073741820, -1073741820, -1073741819, 1073741818, 1073741819, 1073741819, 1073741820,
1073741820, 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823,
1073741824, 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827,
1073741827, 1073741828, 1073741828, -6, -5, -5, -4,
-4, -3, -3, -2, -2, -1, -1,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1,
2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, 2147483644,
2147483643, 2147483643, 2147483642, -10, -10, -9, -9,
-8, -8, -7, -7, -6, -6, -5,
-5, -4, -4, -3, -3, -2, -2,
-1, -1, 0, -10, -9, -9, -8,
-8, -7, -7, -6, -6, -5, -5,
-4, -4, -3, -3, -2, -2, -1,
-1, 0, 0, -9, -9, -8, -8,
-7, -7, -6, -6, -5, -5, -4,
-4, -3, -3, -2, -2, -1, -1,
0, 0, 1, -9, -8, -8, -7,
-7, -6, -6, -5, -5, -4, -4,
-3, -3, -2, -2, -1, -1, 0,
0, 1, 1, -8, -8, -7, -7,
-6, -6, -5, -5, -4, -4, -3,
-3, -2, -2, -1, -1, 0, 0,
1, 1, 2, -8, -7, -7, -6,
-6, -5, -5, -4, -4, -3, -3,
-2, -2, -1, -1, 0, 0, 1,
1, 2, 2, -7, -7, -6, -6,
-5, -5, -4, -4, -3, -3, -2,
-2, -1, -1, 0, 0, 1, 1,
2, 2, 3, -7, -6, -6, -5,
-5, -4, -4, -3, -3, -2, -2,
-1, -1, 0, 0, 1, 1, 2,
2, 3, 3, -6, -6, -5, -5,
-4, -4, -3, -3, -2, -2, -1,
-1, 0, 0, 1, 1, 2, 2,
3, 3, 4, -6, -5, -5, -4,
-4, -3, -3, -2, -2, -1, -1,
0, 0, 1, 1, 2, 2, 3,
3, 4, 4, -5, -5, -4, -4,
-3, -3, -2, -2, -1, -1, 0,
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, -5, -4, -4, -3,
-3, -2, -2, -1, -1, 0, 0,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, -4, -4, -3, -3,
-2, -2, -1, -1, 0, 0, 1,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, -4, -3, -3, -2,
-2, -1, -1, 0, 0, 1, 1,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, -3, -3, -2, -2,
-1, -1, 0, 0, 1, 1, 2,
2, 3, 3, 4, 4, 5, 5,
6, 6, 7, -3, -2, -2, -1,
-1, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4, 5, 5, 6,
6, 7, 7, -2, -2, -1, -1,
0, 0, 1, 1, 2, 2, 3,
3, 4, 4, 5, 5, 6, 6,
7, 7, 8, -2, -1, -1, 0,
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, -1, -1, 0, 0,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, -1, 0, 0, 1,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 0, 0, 1, 1,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10};
int *d_a, *d_b, *d_res;
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
hadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
std::vector<int> gpu_res(size, 0);
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << hadd_expected[i]
<< " got: " << gpu_res[i]);
CHECK(hadd_expected[i] == gpu_res[i]);
}
HIP_CHECK(hipFree(d_a));
HIP_CHECK(hipFree(d_b));
HIP_CHECK(hipFree(d_res));
}
TEST_CASE("Unit_rhadd_int_varaint") {
auto [a, b] = get_hadd_inputs();
REQUIRE(a.size() == b.size());
const size_t size = a.size();
REQUIRE(size <= 1024);
// Manually calculated results on both platforms
const std::vector<int> rhadd_expected{
-2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, -2147483645,
-2147483644, -2147483644, -2147483643, -1073741829, -1073741828, -1073741828, -1073741827,
-1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, -1073741824,
-1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, -1073741820,
-1073741820, -1073741819, -1073741819, 1073741819, 1073741819, 1073741820, 1073741820,
1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, 1073741824,
1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, 1073741827,
1073741828, 1073741828, 1073741829, -5, -5, -4, -4,
-3, -3, -2, -2, -1, -1, 0,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
2147483647, 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644,
2147483644, 2147483643, 2147483643, -10, -9, -9, -8,
-8, -7, -7, -6, -6, -5, -5,
-4, -4, -3, -3, -2, -2, -1,
-1, 0, 0, -9, -9, -8, -8,
-7, -7, -6, -6, -5, -5, -4,
-4, -3, -3, -2, -2, -1, -1,
0, 0, 1, -9, -8, -8, -7,
-7, -6, -6, -5, -5, -4, -4,
-3, -3, -2, -2, -1, -1, 0,
0, 1, 1, -8, -8, -7, -7,
-6, -6, -5, -5, -4, -4, -3,
-3, -2, -2, -1, -1, 0, 0,
1, 1, 2, -8, -7, -7, -6,
-6, -5, -5, -4, -4, -3, -3,
-2, -2, -1, -1, 0, 0, 1,
1, 2, 2, -7, -7, -6, -6,
-5, -5, -4, -4, -3, -3, -2,
-2, -1, -1, 0, 0, 1, 1,
2, 2, 3, -7, -6, -6, -5,
-5, -4, -4, -3, -3, -2, -2,
-1, -1, 0, 0, 1, 1, 2,
2, 3, 3, -6, -6, -5, -5,
-4, -4, -3, -3, -2, -2, -1,
-1, 0, 0, 1, 1, 2, 2,
3, 3, 4, -6, -5, -5, -4,
-4, -3, -3, -2, -2, -1, -1,
0, 0, 1, 1, 2, 2, 3,
3, 4, 4, -5, -5, -4, -4,
-3, -3, -2, -2, -1, -1, 0,
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, -5, -4, -4, -3,
-3, -2, -2, -1, -1, 0, 0,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, -4, -4, -3, -3,
-2, -2, -1, -1, 0, 0, 1,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, -4, -3, -3, -2,
-2, -1, -1, 0, 0, 1, 1,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, -3, -3, -2, -2,
-1, -1, 0, 0, 1, 1, 2,
2, 3, 3, 4, 4, 5, 5,
6, 6, 7, -3, -2, -2, -1,
-1, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4, 5, 5, 6,
6, 7, 7, -2, -2, -1, -1,
0, 0, 1, 1, 2, 2, 3,
3, 4, 4, 5, 5, 6, 6,
7, 7, 8, -2, -1, -1, 0,
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, -1, -1, 0, 0,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, -1, 0, 0, 1,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 0, 0, 1, 1,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 0, 1, 1, 2,
2, 3, 3, 4, 4, 5, 5,
6, 6, 7, 7, 8, 8, 9,
9, 10, 10};
int *d_a, *d_b, *d_res;
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
rhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
std::vector<int> gpu_res(size, 0);
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << rhadd_expected[i]
<< " got: " << gpu_res[i]);
CHECK(rhadd_expected[i] == gpu_res[i]);
}
HIP_CHECK(hipFree(d_a));
HIP_CHECK(hipFree(d_b));
HIP_CHECK(hipFree(d_res));
}
TEST_CASE("Unit_uhadd_int_varaint") {
auto [a, b] = get_uadd_inputs();
REQUIRE(a.size() == b.size());
const size_t size = a.size();
REQUIRE(size <= 1024);
// Manually calculated on both platforms
const std::vector<unsigned> uhadd_expected{
0, 0, 1, 1, 2, 2, 3,
3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10,
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12,
2, 3, 3, 4, 4, 5, 5,
6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12,
3, 3, 4, 4, 5, 5, 6,
6, 7, 7, 8, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13,
3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 12, 13, 13,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11,
11, 12, 12, 13, 13, 14, 14,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13, 14, 14, 15,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12,
12, 13, 13, 14, 14, 15, 15,
6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16,
6, 7, 7, 8, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13,
13, 14, 14, 15, 15, 16, 16,
7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 12, 13, 13,
14, 14, 15, 15, 16, 16, 17,
7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14,
14, 15, 15, 16, 16, 17, 17,
8, 8, 9, 9, 10, 10, 11,
11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18,
8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13, 14, 14, 15,
15, 16, 16, 17, 17, 18, 18,
9, 9, 10, 10, 11, 11, 12,
12, 13, 13, 14, 14, 15, 15,
16, 16, 17, 17, 18, 18, 19,
9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16,
16, 17, 17, 18, 18, 19, 19,
10, 10, 11, 11, 12, 12, 13,
13, 14, 14, 15, 15, 16, 16,
17, 17, 18, 18, 19, 19, 20,
4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292,
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291,
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291,
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290,
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290,
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289,
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275,
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, 4294967275};
unsigned *d_a, *d_b, *d_res;
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
uhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
std::vector<int> gpu_res(size, 0);
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i]
<< " got: " << gpu_res[i]);
CHECK(uhadd_expected[i] == gpu_res[i]);
}
HIP_CHECK(hipFree(d_a));
HIP_CHECK(hipFree(d_b));
HIP_CHECK(hipFree(d_res));
}
TEST_CASE("Unit_urhadd_int_varaint") {
auto [a, b] = get_uadd_inputs();
REQUIRE(a.size() == b.size());
const size_t size = a.size();
REQUIRE(size <= 1024);
// Manually calculated on both platforms
const std::vector<unsigned> uhadd_expected{
0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10,
1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11,
1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11,
2, 2, 3, 3, 4, 4, 5,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12,
2, 3, 3, 4, 4, 5, 5,
6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12,
3, 3, 4, 4, 5, 5, 6,
6, 7, 7, 8, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13,
3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 12, 13, 13,
4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14,
4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11,
11, 12, 12, 13, 13, 14, 14,
5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13, 14, 14, 15,
5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12,
12, 13, 13, 14, 14, 15, 15,
6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16,
6, 7, 7, 8, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13,
13, 14, 14, 15, 15, 16, 16,
7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 12, 13, 13,
14, 14, 15, 15, 16, 16, 17,
7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14,
14, 15, 15, 16, 16, 17, 17,
8, 8, 9, 9, 10, 10, 11,
11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18,
8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13, 14, 14, 15,
15, 16, 16, 17, 17, 18, 18,
9, 9, 10, 10, 11, 11, 12,
12, 13, 13, 14, 14, 15, 15,
16, 16, 17, 17, 18, 18, 19,
9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16,
16, 17, 17, 18, 18, 19, 19,
10, 10, 11, 11, 12, 12, 13,
13, 14, 14, 15, 15, 16, 16,
17, 17, 18, 18, 19, 19, 20,
10, 11, 11, 12, 12, 13, 13,
14, 14, 15, 15, 16, 16, 17,
17, 18, 18, 19, 19, 20, 20,
4294967295, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292,
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292,
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291,
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291,
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290,
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290,
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289,
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278,
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277,
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277,
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276,
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276,
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275};
unsigned *d_a, *d_b, *d_res;
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
urhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
std::vector<int> gpu_res(size, 0);
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i]
<< " got: " << gpu_res[i]);
CHECK(uhadd_expected[i] == gpu_res[i]);
}
HIP_CHECK(hipFree(d_a));
HIP_CHECK(hipFree(d_b));
HIP_CHECK(hipFree(d_res));
}