d37af80d7e
* add gfx1100 support
Add support for Radeon 7900 GPUs (RX and PRO), and 7800 PRO.
I was contemplating to add gfx1101 and gfx1102 GPUs as well, but those are the lower end models that are more unlikely to be used for compute intensive jobs. In addition, I do not have access to them to test the support.
* update WF_SIZe for different options
Radeon systems use a WarpSize of 32, unlike current Instinct systems,
which use a warp size of 64. For the device side, a gfx specific ifdef
is sufficient. For the host side, we need to query the device
properties.
* adjust functional tests to wf_size of 32
* update unit tests to handle wf_size of 32
* address reviewer comments
[ROCm/rocshmem commit: d0c2845031]
1523 righe
42 KiB
C++
1523 righe
42 KiB
C++
/******************************************************************************
|
|
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*****************************************************************************/
|
|
|
|
#include "bitwise_gtest.hpp"
|
|
|
|
using namespace rocshmem;
|
|
|
|
/*****************************************************************************
|
|
*************************** Kernel Definitions ******************************
|
|
*****************************************************************************/
|
|
__global__ void lowest_active_lane_kernel(BitwiseDeviceMethods *device_methods,
|
|
WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->lowest_active_lane(warp_matrix, activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void is_lowest_active_lane_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->is_lowest_active_lane(warp_matrix, activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void active_logical_lane_id_2_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->active_logical_lane_id_2(warp_matrix,
|
|
activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void lane_id_kernel(BitwiseDeviceMethods *device_methods,
|
|
WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->lane_id(warp_matrix, activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void number_active_lanes_kernel(BitwiseDeviceMethods *device_methods,
|
|
WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->number_active_lanes(warp_matrix, activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void broadcast_up_value_42_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->broadcast_up_value_42(warp_matrix, activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void fetch_incr_lowest_active_lane_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->fetch_incr_lowest_active_lane(warp_matrix,
|
|
activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void fetch_incr_active_logical_lane_1_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
device_methods->fetch_incr_active_logical_lane_1(warp_matrix,
|
|
activate_lanes_bitfield);
|
|
}
|
|
|
|
__global__ void activate_lane_helper_kernel(
|
|
BitwiseDeviceMethods *device_methods, WarpMatrix *warp_matrix,
|
|
uint64_t activate_lanes_bitfield) {
|
|
bool is_an_active_lane =
|
|
device_methods->activate_lane_helper(activate_lanes_bitfield);
|
|
|
|
/*
|
|
* Index into warp matrix to save return value to be read by host.
|
|
*/
|
|
if (is_an_active_lane) {
|
|
size_t warp_index = hipThreadIdx_x / WF_SIZE;
|
|
size_t block_index = hipBlockIdx_x;
|
|
auto *elem = warp_matrix->access(warp_index, block_index);
|
|
*elem = hipThreadIdx_x;
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
****************************** Fixture Tests ********************************
|
|
*****************************************************************************/
|
|
|
|
/*****************************************************************************
|
|
******************************* Setup Tests *********************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, setup_fixture_only_1_1) {
|
|
setup_fixture({1, 1, 1}, {1, 1, 1});
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, setup_fixture_check_matrix_size_1_1) {
|
|
setup_fixture({1, 1, 1}, {1, 1, 1});
|
|
|
|
ASSERT_EQ(_warp_matrix->rows(), 1);
|
|
ASSERT_EQ(_warp_matrix->columns(), 1);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, setup_fixture_check_matrix_size_64_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
ASSERT_EQ(_warp_matrix->rows(), 1);
|
|
ASSERT_EQ(_warp_matrix->columns(), 1);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, setup_fixture_check_matrix_size_128_1) {
|
|
setup_fixture({128, 1, 1}, {1, 1, 1});
|
|
|
|
ASSERT_EQ(_warp_matrix->rows(), 2);
|
|
ASSERT_EQ(_warp_matrix->columns(), 1);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, setup_fixture_check_matrix_size_128_2) {
|
|
setup_fixture({128, 1, 1}, {2, 1, 1});
|
|
|
|
ASSERT_EQ(_warp_matrix->rows(), 2);
|
|
ASSERT_EQ(_warp_matrix->columns(), 2);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, verify_host_warp_matrix_init_1024_8) {
|
|
setup_fixture({1024, 1, 1}, {8, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
verify_zeroed_warp_matrix();
|
|
}
|
|
|
|
/*****************************************************************************
|
|
************************** Activate Lane Helper******************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_0) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000001;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 0);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000002;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 1);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_2) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000004;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 2);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_3) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000008;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 3);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_4) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000010;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 4);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_8) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000100;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 8);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_12) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000001000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 12);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_28) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000010000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 28);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_29) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000020000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 29);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_30) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000040000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 30);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_31) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000080000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 31);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_32) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000100000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 32);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_33) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000200000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 33);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_34) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000400000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 34);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_35) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000800000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 35);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_44) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000100000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 44);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_48) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0001000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 48);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_52) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0010000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 52);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_56) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0100000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 56);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_60) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x1000000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 60);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_61) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x2000000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 61);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_62) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x4000000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 62);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, activate_lane_helper_64_1_lane_63) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x8000000000000000;
|
|
|
|
host_run_device_kernel(activate_lane_helper_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 63);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*************************** Lowest Active Lane ******************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_0) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffffffffffff;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 0);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffffffffffffffe;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 1);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_2) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffffffffffffffc;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 2);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_3) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffffffffffffff8;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 3);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_4) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffffffffffffff0;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 4);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_8) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffffffffff00;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 8);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_16) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffffffff0000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 16);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_16_1_lane_15) {
|
|
setup_fixture({16, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffffffff8000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 15);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_31) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffff80000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 31);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_32_1_lane_31) {
|
|
setup_fixture({32, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffff80000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 31);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_32) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffff00000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 32);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_36) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffffff000000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 36);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_47) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffff800000000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 47);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_48) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffff000000000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 48);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_49) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfffe000000000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 49);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_64_1_lane_60) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xf000000000000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 60);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_128_1) {
|
|
setup_fixture({128, 1, 1}, {1, 1, 1});
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000080000000;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
auto *elem = _warp_matrix->access(0, 0);
|
|
ASSERT_EQ(*elem, 31);
|
|
|
|
elem = _warp_matrix->access(1, 0);
|
|
ASSERT_EQ(*elem, 31);
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lowest_active_lane_1024_80) {
|
|
setup_fixture({1024, 1, 1}, {80, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000010;
|
|
|
|
host_run_device_kernel(lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 4);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
************************** Is Lowest Active Lane ****************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, is_lowest_active_lane_64_1_lane_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xf0f0f10000000002;
|
|
|
|
host_run_device_kernel(is_lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, is_lowest_active_lane_64_1_lane_2) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xf0f0f10000000004;
|
|
|
|
host_run_device_kernel(is_lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, is_lowest_active_lane_64_1_lane_3) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xf0f0f10000000008;
|
|
|
|
host_run_device_kernel(is_lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 3);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, is_lowest_active_lane_64_1_lane_4) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x00f0fd8f350f0010;
|
|
|
|
host_run_device_kernel(is_lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 4);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, is_lowest_active_lane_256_8_lane_40) {
|
|
setup_fixture({256, 1, 1}, {8, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xf0f0f10000000000;
|
|
|
|
host_run_device_kernel(is_lowest_active_lane_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 40);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
********************************* Lane ID ***********************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, lane_id_1_1_lane_15_inactive_left_zero) {
|
|
setup_fixture({1, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000008000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_32_1_lane_15) {
|
|
setup_fixture({32, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000008000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 15);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_64_1_lane_32) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000100000000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 32);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_64_1_lane_49) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0002000000000000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 49);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_128_2_lane_2) {
|
|
setup_fixture({128, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000002;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_128_2_lane_61) {
|
|
setup_fixture({128, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x2000000000000000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 61);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, lane_id_128_2_lane_63) {
|
|
setup_fixture({128, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x8000000000000000;
|
|
|
|
host_run_device_kernel(lane_id_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 63);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*************************** Number Active Lanes *****************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, number_active_lanes_kernel_128_2_num_active_1) {
|
|
setup_fixture({256, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0004000000000000;
|
|
|
|
host_run_device_kernel(number_active_lanes_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, number_active_lanes_kernel_128_2_num_active_52) {
|
|
setup_fixture({256, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xfff0ffff0ffff0ff;
|
|
|
|
host_run_device_kernel(number_active_lanes_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 52);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, number_active_lanes_kernel_128_2_num_active_64) {
|
|
setup_fixture({128, 1, 1}, {2, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xffffffffffffffff;
|
|
|
|
host_run_device_kernel(number_active_lanes_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 64);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
************************* Active Logical Lane ID ****************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, active_logical_lane_id_kernel_64_1_logic_2_is_29) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0FC0000020002004;
|
|
|
|
host_run_device_kernel(active_logical_lane_id_2_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 29);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, active_logical_lane_id_kernel_64_1_logic_2_is_62) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x4000000000000024;
|
|
|
|
host_run_device_kernel(active_logical_lane_id_2_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 62);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, active_logical_lane_id_kernel_768_3_logic_2_is_32) {
|
|
setup_fixture({768, 1, 1}, {3, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFF000100001100;
|
|
|
|
host_run_device_kernel(active_logical_lane_id_2_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 32);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
****************************** Broadcast Up *********************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_lane_0) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000001;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_lane_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000000002;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_lane_16) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0000000000010000;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_odd_index) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x0101010101010101;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_even_index) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x1010101010101010;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, broadcast_up_value_42_kernel_64_1_all) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFFFFFFFFFFFFFF;
|
|
|
|
host_run_device_kernel(broadcast_up_value_42_kernel, activate_lanes_bitfield);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem, 42);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
******************************* Fetch Incr **********************************
|
|
*****************************************************************************/
|
|
TEST_F(BitwiseTestFixture, fetch_incr_kernel_4_1) {
|
|
setup_fixture({4, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0x000000000000000A;
|
|
|
|
host_run_device_kernel(fetch_incr_lowest_active_lane_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 2);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % this->_wf_size, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, fetch_incr_kernel_64_1) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFFFFFFFFFFFFFF;
|
|
|
|
host_run_device_kernel(fetch_incr_lowest_active_lane_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 64);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % this->_wf_size, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, fetch_incr_kernel_64_1_top_half) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFFFFFF00000000;
|
|
|
|
host_run_device_kernel(fetch_incr_lowest_active_lane_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 32);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % 32, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, fetch_incr_kernel_64_1_alternating) {
|
|
setup_fixture({64, 1, 1}, {1, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xF0F0F0F0F0F0F0F0;
|
|
|
|
host_run_device_kernel(fetch_incr_lowest_active_lane_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 32);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % 32, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, fetch_incr_kernel_1024_1024) {
|
|
setup_fixture({1024, 1, 1}, {1024, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFFFFFFFFFFFFFF;
|
|
|
|
host_run_device_kernel(fetch_incr_lowest_active_lane_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 1024 * 1024);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % this->_wf_size, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(BitwiseTestFixture, fetch_incr_logical_1_kernel_1024_1024) {
|
|
setup_fixture({1024, 1, 1}, {1024, 1, 1});
|
|
|
|
zero_warp_matrix();
|
|
|
|
/*
|
|
* index (tens): 6554443322211000
|
|
* (ones): 0628406284062840
|
|
*/
|
|
uint64_t activate_lanes_bitfield = 0xFFFFFFFFFFFFFFFF;
|
|
|
|
host_run_device_kernel(fetch_incr_active_logical_lane_1_kernel,
|
|
activate_lanes_bitfield);
|
|
|
|
ASSERT_EQ(*_device_methods->_fetch_value, 1024 * 1024);
|
|
|
|
for (size_t i = 0; i < _warp_matrix->rows(); i++) {
|
|
for (size_t j = 0; j < _warp_matrix->columns(); j++) {
|
|
auto *elem = _warp_matrix->access(i, j);
|
|
ASSERT_EQ(*elem % this->_wf_size, 1);
|
|
}
|
|
}
|
|
}
|