677 lines
42 KiB
C++
677 lines
42 KiB
C++
/*
|
|
* Copyright (C) Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
|
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <hip_test_common.hh>
|
|
#include <vector>
|
|
|
|
static __global__ void hadd_kernel(int* a, int* b, int* res, size_t size) {
|
|
int i = threadIdx.x;
|
|
if (i < size) {
|
|
res[i] = __hadd(a[i], b[i]);
|
|
}
|
|
}
|
|
|
|
static __global__ void rhadd_kernel(int* a, int* b, int* res, size_t size) {
|
|
int i = threadIdx.x;
|
|
if (i < size) {
|
|
res[i] = __rhadd(a[i], b[i]);
|
|
}
|
|
}
|
|
|
|
static __global__ void uhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) {
|
|
int i = threadIdx.x;
|
|
if (i < size) {
|
|
res[i] = __uhadd(a[i], b[i]);
|
|
}
|
|
}
|
|
|
|
static __global__ void urhadd_kernel(unsigned* a, unsigned* b, unsigned* res, size_t size) {
|
|
int i = threadIdx.x;
|
|
if (i < size) {
|
|
res[i] = __urhadd(a[i], b[i]);
|
|
}
|
|
}
|
|
|
|
|
|
static auto get_hadd_inputs() -> std::pair<std::vector<int>, std::vector<int>> {
|
|
std::vector<int> a, b;
|
|
a.reserve(1024);
|
|
b.reserve(1024);
|
|
// Underflow
|
|
// [{INT_MIN, INT_MIN}, {INT_MIN, INT_MIN + 1}, ...]
|
|
for (int i = INT_MIN, count = 0; count < 10; i++, count++) {
|
|
a.push_back(INT_MIN);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [{INT_MIN, -10}, {INT_MIN, -9} ... , {INT_MIN, 10}]
|
|
for (int i = -10; i <= 10; i++) {
|
|
a.push_back(INT_MIN);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [{INT_MAX, -10}, {INT_MAX, -9} ... , {INT_MAX, 10}]
|
|
for (int i = -10; i <= 10; i++) {
|
|
a.push_back(INT_MAX);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [{INT_MIN, INT_MAX - 10}, {INT_MIN, INT_MAX - 9}, ... , [INT_MIN, INT_MAX]]
|
|
for (int count = 10, i = INT_MAX - count; count >= 0; i++, count--) {
|
|
a.push_back(INT_MIN);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [{INT_MAX, INT_MIN + 10}, {INT_MAX, INT_MIN + 9}, ... , [INT_MAX, INT_MIN]]
|
|
for (int count = 10, i = INT_MIN + count; count >= 0; i++, count--) {
|
|
a.push_back(INT_MAX);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [{INT_MAX, INT_MIN}, {INT_MAX - 1, INT_MIN + 1}, ...]
|
|
for (int i = INT_MAX, j = INT_MIN, count = 0; count < 10; count++, i--, j++) {
|
|
a.push_back(i);
|
|
b.push_back(j);
|
|
}
|
|
|
|
// Overflow
|
|
// [{INT_MAX, INT_MAX}, {INT_MAX, INT_MAX - 1}, ...]
|
|
for (int i = INT_MAX, count = 0; count < 10; count++, i--) {
|
|
a.push_back(INT_MAX);
|
|
b.push_back(i);
|
|
}
|
|
|
|
// [-10, -10], [-10, -9], ... [10, 10]
|
|
for (int i = -10; i <= 10; i++) {
|
|
for (int j = -10; j <= 10; j++) {
|
|
a.push_back(i);
|
|
b.push_back(j);
|
|
}
|
|
}
|
|
|
|
return std::make_pair(a, b);
|
|
}
|
|
|
|
static auto get_uadd_inputs() -> std::pair<std::vector<unsigned>, std::vector<unsigned>> {
|
|
std::vector<unsigned int> a, b;
|
|
a.reserve(1024);
|
|
b.reserve(1024);
|
|
|
|
for (unsigned i = 0; i <= 20; i++) {
|
|
for (unsigned j = 0; j <= 20; j++) {
|
|
a.push_back(i);
|
|
b.push_back(j);
|
|
}
|
|
}
|
|
|
|
// Overflow
|
|
for (unsigned count_i = 0, i = UINT_MAX; count_i <= 20; count_i++, i--) {
|
|
for (unsigned count_j = 0, j = UINT_MAX; count_j <= 20; count_j++, j--) {
|
|
a.push_back(i);
|
|
b.push_back(j);
|
|
}
|
|
}
|
|
|
|
return std::make_pair(a, b);
|
|
}
|
|
|
|
TEST_CASE("Unit_hadd_int_varaint") {
|
|
auto [a, b] = get_hadd_inputs();
|
|
|
|
REQUIRE(a.size() == b.size());
|
|
const size_t size = a.size();
|
|
REQUIRE(size <= 1024);
|
|
|
|
// Manually calculated results on both platforms
|
|
const std::vector<int> hadd_expected{
|
|
-2147483648, -2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645,
|
|
-2147483645, -2147483644, -2147483644, -1073741829, -1073741829, -1073741828, -1073741828,
|
|
-1073741827, -1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824,
|
|
-1073741824, -1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821,
|
|
-1073741820, -1073741820, -1073741819, 1073741818, 1073741819, 1073741819, 1073741820,
|
|
1073741820, 1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823,
|
|
1073741824, 1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827,
|
|
1073741827, 1073741828, 1073741828, -6, -5, -5, -4,
|
|
-4, -3, -3, -2, -2, -1, -1,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, 9, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644, 2147483644,
|
|
2147483643, 2147483643, 2147483642, -10, -10, -9, -9,
|
|
-8, -8, -7, -7, -6, -6, -5,
|
|
-5, -4, -4, -3, -3, -2, -2,
|
|
-1, -1, 0, -10, -9, -9, -8,
|
|
-8, -7, -7, -6, -6, -5, -5,
|
|
-4, -4, -3, -3, -2, -2, -1,
|
|
-1, 0, 0, -9, -9, -8, -8,
|
|
-7, -7, -6, -6, -5, -5, -4,
|
|
-4, -3, -3, -2, -2, -1, -1,
|
|
0, 0, 1, -9, -8, -8, -7,
|
|
-7, -6, -6, -5, -5, -4, -4,
|
|
-3, -3, -2, -2, -1, -1, 0,
|
|
0, 1, 1, -8, -8, -7, -7,
|
|
-6, -6, -5, -5, -4, -4, -3,
|
|
-3, -2, -2, -1, -1, 0, 0,
|
|
1, 1, 2, -8, -7, -7, -6,
|
|
-6, -5, -5, -4, -4, -3, -3,
|
|
-2, -2, -1, -1, 0, 0, 1,
|
|
1, 2, 2, -7, -7, -6, -6,
|
|
-5, -5, -4, -4, -3, -3, -2,
|
|
-2, -1, -1, 0, 0, 1, 1,
|
|
2, 2, 3, -7, -6, -6, -5,
|
|
-5, -4, -4, -3, -3, -2, -2,
|
|
-1, -1, 0, 0, 1, 1, 2,
|
|
2, 3, 3, -6, -6, -5, -5,
|
|
-4, -4, -3, -3, -2, -2, -1,
|
|
-1, 0, 0, 1, 1, 2, 2,
|
|
3, 3, 4, -6, -5, -5, -4,
|
|
-4, -3, -3, -2, -2, -1, -1,
|
|
0, 0, 1, 1, 2, 2, 3,
|
|
3, 4, 4, -5, -5, -4, -4,
|
|
-3, -3, -2, -2, -1, -1, 0,
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, -5, -4, -4, -3,
|
|
-3, -2, -2, -1, -1, 0, 0,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, -4, -4, -3, -3,
|
|
-2, -2, -1, -1, 0, 0, 1,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, -4, -3, -3, -2,
|
|
-2, -1, -1, 0, 0, 1, 1,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, -3, -3, -2, -2,
|
|
-1, -1, 0, 0, 1, 1, 2,
|
|
2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, -3, -2, -2, -1,
|
|
-1, 0, 0, 1, 1, 2, 2,
|
|
3, 3, 4, 4, 5, 5, 6,
|
|
6, 7, 7, -2, -2, -1, -1,
|
|
0, 0, 1, 1, 2, 2, 3,
|
|
3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, -2, -1, -1, 0,
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, -1, -1, 0, 0,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, -1, 0, 0, 1,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 0, 0, 1, 1,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10};
|
|
|
|
int *d_a, *d_b, *d_res;
|
|
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
|
|
|
|
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
|
|
hadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
|
|
std::vector<int> gpu_res(size, 0);
|
|
|
|
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << hadd_expected[i]
|
|
<< " got: " << gpu_res[i]);
|
|
CHECK(hadd_expected[i] == gpu_res[i]);
|
|
}
|
|
|
|
HIP_CHECK(hipFree(d_a));
|
|
HIP_CHECK(hipFree(d_b));
|
|
HIP_CHECK(hipFree(d_res));
|
|
}
|
|
|
|
TEST_CASE("Unit_rhadd_int_varaint") {
|
|
auto [a, b] = get_hadd_inputs();
|
|
|
|
REQUIRE(a.size() == b.size());
|
|
const size_t size = a.size();
|
|
REQUIRE(size <= 1024);
|
|
|
|
// Manually calculated results on both platforms
|
|
const std::vector<int> rhadd_expected{
|
|
-2147483648, -2147483647, -2147483647, -2147483646, -2147483646, -2147483645, -2147483645,
|
|
-2147483644, -2147483644, -2147483643, -1073741829, -1073741828, -1073741828, -1073741827,
|
|
-1073741827, -1073741826, -1073741826, -1073741825, -1073741825, -1073741824, -1073741824,
|
|
-1073741823, -1073741823, -1073741822, -1073741822, -1073741821, -1073741821, -1073741820,
|
|
-1073741820, -1073741819, -1073741819, 1073741819, 1073741819, 1073741820, 1073741820,
|
|
1073741821, 1073741821, 1073741822, 1073741822, 1073741823, 1073741823, 1073741824,
|
|
1073741824, 1073741825, 1073741825, 1073741826, 1073741826, 1073741827, 1073741827,
|
|
1073741828, 1073741828, 1073741829, -5, -5, -4, -4,
|
|
-3, -3, -2, -2, -1, -1, 0,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 10, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
2147483647, 2147483647, 2147483646, 2147483646, 2147483645, 2147483645, 2147483644,
|
|
2147483644, 2147483643, 2147483643, -10, -9, -9, -8,
|
|
-8, -7, -7, -6, -6, -5, -5,
|
|
-4, -4, -3, -3, -2, -2, -1,
|
|
-1, 0, 0, -9, -9, -8, -8,
|
|
-7, -7, -6, -6, -5, -5, -4,
|
|
-4, -3, -3, -2, -2, -1, -1,
|
|
0, 0, 1, -9, -8, -8, -7,
|
|
-7, -6, -6, -5, -5, -4, -4,
|
|
-3, -3, -2, -2, -1, -1, 0,
|
|
0, 1, 1, -8, -8, -7, -7,
|
|
-6, -6, -5, -5, -4, -4, -3,
|
|
-3, -2, -2, -1, -1, 0, 0,
|
|
1, 1, 2, -8, -7, -7, -6,
|
|
-6, -5, -5, -4, -4, -3, -3,
|
|
-2, -2, -1, -1, 0, 0, 1,
|
|
1, 2, 2, -7, -7, -6, -6,
|
|
-5, -5, -4, -4, -3, -3, -2,
|
|
-2, -1, -1, 0, 0, 1, 1,
|
|
2, 2, 3, -7, -6, -6, -5,
|
|
-5, -4, -4, -3, -3, -2, -2,
|
|
-1, -1, 0, 0, 1, 1, 2,
|
|
2, 3, 3, -6, -6, -5, -5,
|
|
-4, -4, -3, -3, -2, -2, -1,
|
|
-1, 0, 0, 1, 1, 2, 2,
|
|
3, 3, 4, -6, -5, -5, -4,
|
|
-4, -3, -3, -2, -2, -1, -1,
|
|
0, 0, 1, 1, 2, 2, 3,
|
|
3, 4, 4, -5, -5, -4, -4,
|
|
-3, -3, -2, -2, -1, -1, 0,
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, -5, -4, -4, -3,
|
|
-3, -2, -2, -1, -1, 0, 0,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, -4, -4, -3, -3,
|
|
-2, -2, -1, -1, 0, 0, 1,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, -4, -3, -3, -2,
|
|
-2, -1, -1, 0, 0, 1, 1,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, -3, -3, -2, -2,
|
|
-1, -1, 0, 0, 1, 1, 2,
|
|
2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, -3, -2, -2, -1,
|
|
-1, 0, 0, 1, 1, 2, 2,
|
|
3, 3, 4, 4, 5, 5, 6,
|
|
6, 7, 7, -2, -2, -1, -1,
|
|
0, 0, 1, 1, 2, 2, 3,
|
|
3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, -2, -1, -1, 0,
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, -1, -1, 0, 0,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, -1, 0, 0, 1,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 0, 0, 1, 1,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10, 0, 1, 1, 2,
|
|
2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, 7, 8, 8, 9,
|
|
9, 10, 10};
|
|
|
|
int *d_a, *d_b, *d_res;
|
|
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
|
|
|
|
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
|
|
rhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
|
|
std::vector<int> gpu_res(size, 0);
|
|
|
|
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << rhadd_expected[i]
|
|
<< " got: " << gpu_res[i]);
|
|
CHECK(rhadd_expected[i] == gpu_res[i]);
|
|
}
|
|
|
|
HIP_CHECK(hipFree(d_a));
|
|
HIP_CHECK(hipFree(d_b));
|
|
HIP_CHECK(hipFree(d_res));
|
|
}
|
|
|
|
TEST_CASE("Unit_uhadd_int_varaint") {
|
|
auto [a, b] = get_uadd_inputs();
|
|
|
|
REQUIRE(a.size() == b.size());
|
|
const size_t size = a.size();
|
|
REQUIRE(size <= 1024);
|
|
|
|
// Manually calculated on both platforms
|
|
const std::vector<unsigned> uhadd_expected{
|
|
0, 0, 1, 1, 2, 2, 3,
|
|
3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, 8, 9, 9, 10,
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, 7, 8, 8, 9,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
3, 3, 4, 4, 5, 5, 6,
|
|
6, 7, 7, 8, 8, 9, 9,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, 8, 9, 9, 10,
|
|
10, 11, 11, 12, 12, 13, 13,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
11, 11, 12, 12, 13, 13, 14,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
11, 12, 12, 13, 13, 14, 14,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
12, 12, 13, 13, 14, 14, 15,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
12, 13, 13, 14, 14, 15, 15,
|
|
6, 6, 7, 7, 8, 8, 9,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
13, 13, 14, 14, 15, 15, 16,
|
|
6, 7, 7, 8, 8, 9, 9,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
13, 14, 14, 15, 15, 16, 16,
|
|
7, 7, 8, 8, 9, 9, 10,
|
|
10, 11, 11, 12, 12, 13, 13,
|
|
14, 14, 15, 15, 16, 16, 17,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
11, 11, 12, 12, 13, 13, 14,
|
|
14, 15, 15, 16, 16, 17, 17,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
11, 12, 12, 13, 13, 14, 14,
|
|
15, 15, 16, 16, 17, 17, 18,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
12, 12, 13, 13, 14, 14, 15,
|
|
15, 16, 16, 17, 17, 18, 18,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
12, 13, 13, 14, 14, 15, 15,
|
|
16, 16, 17, 17, 18, 18, 19,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
13, 13, 14, 14, 15, 15, 16,
|
|
16, 17, 17, 18, 18, 19, 19,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
13, 14, 14, 15, 15, 16, 16,
|
|
17, 17, 18, 18, 19, 19, 20,
|
|
4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292,
|
|
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291,
|
|
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291,
|
|
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290,
|
|
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290,
|
|
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289,
|
|
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
|
|
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
|
|
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
|
|
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
|
|
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
|
|
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
|
|
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
|
|
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
|
|
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
|
|
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
|
|
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
|
|
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
|
|
4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
|
|
4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
|
|
4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
|
|
4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
|
|
4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
|
|
4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275,
|
|
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
|
|
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
|
|
4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275, 4294967275};
|
|
|
|
unsigned *d_a, *d_b, *d_res;
|
|
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
|
|
|
|
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
|
|
uhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
|
|
std::vector<int> gpu_res(size, 0);
|
|
|
|
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i]
|
|
<< " got: " << gpu_res[i]);
|
|
CHECK(uhadd_expected[i] == gpu_res[i]);
|
|
}
|
|
|
|
HIP_CHECK(hipFree(d_a));
|
|
HIP_CHECK(hipFree(d_b));
|
|
HIP_CHECK(hipFree(d_res));
|
|
}
|
|
|
|
TEST_CASE("Unit_urhadd_int_varaint") {
|
|
auto [a, b] = get_uadd_inputs();
|
|
|
|
REQUIRE(a.size() == b.size());
|
|
const size_t size = a.size();
|
|
REQUIRE(size <= 1024);
|
|
|
|
// Manually calculated on both platforms
|
|
const std::vector<unsigned> uhadd_expected{
|
|
0, 1, 1, 2, 2, 3, 3,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
1, 1, 2, 2, 3, 3, 4,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
1, 2, 2, 3, 3, 4, 4,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
2, 2, 3, 3, 4, 4, 5,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, 7, 8, 8, 9,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
3, 3, 4, 4, 5, 5, 6,
|
|
6, 7, 7, 8, 8, 9, 9,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, 8, 9, 9, 10,
|
|
10, 11, 11, 12, 12, 13, 13,
|
|
4, 4, 5, 5, 6, 6, 7,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
11, 11, 12, 12, 13, 13, 14,
|
|
4, 5, 5, 6, 6, 7, 7,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
11, 12, 12, 13, 13, 14, 14,
|
|
5, 5, 6, 6, 7, 7, 8,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
12, 12, 13, 13, 14, 14, 15,
|
|
5, 6, 6, 7, 7, 8, 8,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
12, 13, 13, 14, 14, 15, 15,
|
|
6, 6, 7, 7, 8, 8, 9,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
13, 13, 14, 14, 15, 15, 16,
|
|
6, 7, 7, 8, 8, 9, 9,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
13, 14, 14, 15, 15, 16, 16,
|
|
7, 7, 8, 8, 9, 9, 10,
|
|
10, 11, 11, 12, 12, 13, 13,
|
|
14, 14, 15, 15, 16, 16, 17,
|
|
7, 8, 8, 9, 9, 10, 10,
|
|
11, 11, 12, 12, 13, 13, 14,
|
|
14, 15, 15, 16, 16, 17, 17,
|
|
8, 8, 9, 9, 10, 10, 11,
|
|
11, 12, 12, 13, 13, 14, 14,
|
|
15, 15, 16, 16, 17, 17, 18,
|
|
8, 9, 9, 10, 10, 11, 11,
|
|
12, 12, 13, 13, 14, 14, 15,
|
|
15, 16, 16, 17, 17, 18, 18,
|
|
9, 9, 10, 10, 11, 11, 12,
|
|
12, 13, 13, 14, 14, 15, 15,
|
|
16, 16, 17, 17, 18, 18, 19,
|
|
9, 10, 10, 11, 11, 12, 12,
|
|
13, 13, 14, 14, 15, 15, 16,
|
|
16, 17, 17, 18, 18, 19, 19,
|
|
10, 10, 11, 11, 12, 12, 13,
|
|
13, 14, 14, 15, 15, 16, 16,
|
|
17, 17, 18, 18, 19, 19, 20,
|
|
10, 11, 11, 12, 12, 13, 13,
|
|
14, 14, 15, 15, 16, 16, 17,
|
|
17, 18, 18, 19, 19, 20, 20,
|
|
4294967295, 4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292,
|
|
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
|
|
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
|
|
4294967295, 4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292,
|
|
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967294, 4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291,
|
|
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967294, 4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291,
|
|
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967293, 4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290,
|
|
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967293, 4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290,
|
|
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967292, 4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289,
|
|
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967292, 4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289,
|
|
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
|
|
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
|
|
4294967291, 4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
|
|
4294967291, 4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
|
|
4294967290, 4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
|
|
4294967290, 4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
|
|
4294967289, 4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
|
|
4294967289, 4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
|
|
4294967288, 4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285,
|
|
4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282,
|
|
4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278,
|
|
4294967288, 4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285,
|
|
4294967284, 4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281,
|
|
4294967281, 4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278,
|
|
4294967287, 4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284,
|
|
4294967284, 4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281,
|
|
4294967280, 4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277,
|
|
4294967287, 4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284,
|
|
4294967283, 4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280,
|
|
4294967280, 4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277,
|
|
4294967286, 4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283,
|
|
4294967283, 4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280,
|
|
4294967279, 4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276,
|
|
4294967286, 4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283,
|
|
4294967282, 4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279,
|
|
4294967279, 4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276,
|
|
4294967285, 4294967285, 4294967284, 4294967284, 4294967283, 4294967283, 4294967282,
|
|
4294967282, 4294967281, 4294967281, 4294967280, 4294967280, 4294967279, 4294967279,
|
|
4294967278, 4294967278, 4294967277, 4294967277, 4294967276, 4294967276, 4294967275};
|
|
|
|
unsigned *d_a, *d_b, *d_res;
|
|
HIP_CHECK(hipMalloc(&d_a, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_b, sizeof(int) * size));
|
|
HIP_CHECK(hipMalloc(&d_res, sizeof(int) * size));
|
|
|
|
HIP_CHECK(hipMemcpy(d_a, a.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipMemcpy(d_b, b.data(), sizeof(int) * size, hipMemcpyHostToDevice));
|
|
|
|
urhadd_kernel<<<1, size>>>(d_a, d_b, d_res, size);
|
|
std::vector<int> gpu_res(size, 0);
|
|
|
|
HIP_CHECK(hipMemcpy(gpu_res.data(), d_res, sizeof(int) * size, hipMemcpyDeviceToHost));
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
INFO("iter: " << i << " in: " << a[i] << ", " << b[i] << " expected: " << uhadd_expected[i]
|
|
<< " got: " << gpu_res[i]);
|
|
CHECK(uhadd_expected[i] == gpu_res[i]);
|
|
}
|
|
|
|
HIP_CHECK(hipFree(d_a));
|
|
HIP_CHECK(hipFree(d_b));
|
|
HIP_CHECK(hipFree(d_res));
|
|
}
|